docs: update README.md and doc-test

This commit is contained in:
PoiScript 2019-06-28 00:13:05 +08:00
parent 21aba13d71
commit 5a9e085b61
5 changed files with 298 additions and 226 deletions

212
README.md
View file

@ -1,34 +1,15 @@
# Orgize
Orgize is a Emacs Org-mode parser written by pure Rust. It behaves like a pull
parser (returning an iterator of events) but not exactly.
A Rust library for parsing orgmode files.
Besides, orgize also provides some mechanism for exporting org-mode files to
various formats, e.g. HTML.
## Parse
## Usage
```toml
[dependencies]
orgize = "0.1.0"
```
To parse a orgmode string, simply invoking the `Org::parse` function:
```rust
// Rust 2015 only
extern crate orgize;
```
use orgize::Org;
## Example
### Using Parser
Orgize parser acts like a event-based parser, which means it returns an
`Iterator` of `Event` s.
```rust
use orgize::Parser;
let parser = Parser::new(r#"* Title 1
let org = Org::parse(r#"* Title 1
*Section 1*
** Title 2
_Section 2_
@ -36,21 +17,97 @@ _Section 2_
/Section 3/
* Title 4
=Section 4="#);
```
for event in parser {
## Iter
`Org::iter` function will return a iteractor of `Event`s, which is
a simple wrapper of `Element`.
```rust
for event in org.iter() {
// handling the event
}
```
### Using Render
**Note**: whether an element is container or not, it will appears two times in a loop.
One as `Event::Start(element)`, one as `Event::End(element)`.
You can use the built-in `HtmlRender` to generate html string directly:
## Render html
You can call the `Org::html_default` function to generate html directly, which
uses the `DefaultHtmlHandler` internally:
```rust
use orgize::export::HtmlRender;
use std::io::{Cursor, Result};
let mut writer = Vec::new();
org.html_default(&mut writer).unwrap();
fn main() -> Result<()> {
assert_eq!(
String::from_utf8(writer).unwrap(),
"<main><h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
<h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
<h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
<h1>Title 4</h1><section><p><code>Section 4</code></p></section></main>"
);
```
## Render html with custom HtmlHandler
To customize html rending, simply implementing `HtmlHandler` trait and passing
it to the `Org::html` function.
The following code demonstrates how to add a id for every headline and return
own error type while rendering.
```rust
#[derive(Debug)]
enum MyError {
IO(IOError),
Heading,
Utf8(FromUtf8Error),
}
// From<std::io::Error> trait is required for custom error type
impl From<IOError> for MyError {
fn from(err: IOError) -> Self {
MyError::IO(err)
}
}
impl From<FromUtf8Error> for MyError {
fn from(err: FromUtf8Error) -> Self {
MyError::Utf8(err)
}
}
struct MyHtmlHandler;
impl HtmlHandler<MyError> for MyHtmlHandler {
fn start<W: Write>(&mut self, mut w: W, element: &Element<'_>) -> Result<(), MyError> {
let mut default_handler = DefaultHtmlHandler;
match element {
Element::Headline { headline, .. } => {
if headline.level > 6 {
return Err(MyError::Heading);
} else {
let slugify = slugify!(headline.title);
write!(
w,
"<h{0}><a id=\"{1}\" href=\"#{1}\">{2}</a></h{0}>",
headline.level,
slugify,
Escape(headline.title),
)?;
}
}
// fallthrough to default handler
_ => default_handler.start(w, element)?,
}
Ok(())
}
}
fn main() -> Result<(), MyError> {
let contents = r"* Title 1
*Section 1*
** Title 2
@ -60,96 +117,33 @@ _Section 2_
* Title 4
=Section 4=";
let mut cursor = Cursor::new(Vec::new());
let mut render = HtmlRender::default(&mut cursor, &contents);
render.render()?;
let mut writer = Vec::new();
Org::parse(&contents).html(&mut writer, MyHtmlHandler)?;
assert_eq!(
String::from_utf8(cursor.into_inner()).unwrap(),
"<h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
<h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
<h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
<h1>Title 4</h1><section><p><code>Section 4</code></p></section>"
String::from_utf8(writer)?,
"<main><h1><a id=\"title-1\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
<h2><a id=\"title-2\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
<h1><a id=\"title-3\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
<h1><a id=\"title-4\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section></main>"
);
Ok(())
}
```
### Custom HtmlHandler
**Note**: as I mentioned above, each element will appears two times while iterating.
And handler will silently ignores all end events from non-container elements.
You can create your own handler by implementing `HtmlHandler` trait and passing
it to the `HtmlRender`.
So if you want to change how a non-container element renders, just redefine the start
function and leave the end function untouched.
The following example demonstrates how to add an anchor for every headline and
use your own error type.
## Serde
`Org` struct have already implemented serde's `Serialize` trait. It means you can
freely serialize it into any format that serde supports such as json:
```rust
use orgize::{export::*, headline::Headline};
use slugify::slugify;
use std::io::{Cursor, Error as IOError, Write};
use std::string::FromUtf8Error;
// custom error type
#[derive(Debug)]
enum Error {
IO(IOError),
Headline,
Utf8(FromUtf8Error),
}
// From<std::io::Error> trait is required for custom error type
impl From<IOError> for Error {
fn from(err: IOError) -> Error {
Error::IO(err)
}
}
struct CustomHtmlHandler;
impl<W: Write> HtmlHandler<W, Error> for CustomHtmlHandler {
fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), Error> {
if hdl.level > 6 {
Err(Error::Headline)
} else {
write!(
w,
r##"<h{}><a class="anchor" href="#{}">"##,
hdl.level,
slugify!(hdl.title),
)?;
self.escape(w, hdl.title)?;
Ok(write!(w, "</a></h{}>", hdl.level)?)
}
}
}
fn main() -> Result<(), Error> {
let contents = r"* Title 1
*Section 1*
** Title 2
_Section 2_
* Title 3
/Section 3/
* Title 4
=Section 4=";
let mut cursor = Cursor::new(Vec::new());
let mut render = HtmlRender::new(CustomHtmlHandler, &mut cursor, &contents);
render.render()?;
assert_eq!(
String::from_utf8(cursor.into_inner()).map_err(Error::Utf8)?,
"<h1><a class=\"anchor\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
<h2><a class=\"anchor\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
<h1><a class=\"anchor\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
<h1><a class=\"anchor\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section>"
);
Ok(())
}
println!("{}", to_string(&org).unwrap());
```
## License

View file

@ -81,12 +81,12 @@ fn parse() {
);
assert_eq!(
Keyword::parse("#+ATTR_LATEX: :width 5cm"),
Keyword::parse("#+ATTR_LATEX: :width 5cm\n"),
Some((
"ATTR_LATEX",
None,
":width 5cm",
"#+ATTR_LATEX: :width 5cm".len()
"#+ATTR_LATEX: :width 5cm\n".len()
))
);

View file

@ -1,3 +1,5 @@
//! Org-mode elements module
mod block;
mod clock;
mod cookie;
@ -47,6 +49,12 @@ pub use self::{
use indextree::NodeId;
/// Org-mode element enum
///
/// Generally, each variant contains a element struct and
/// a set of properties which indicate the position of the
/// element in the original string.
///
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(tag = "type"))]

View file

@ -1,16 +1,15 @@
//! A Rust library for parsing orgmode files.
//!
//! # Using Parser
//! # Parse
//!
//! Orgize parser acts like a event-based parser, which means it
//! returns an `Iterator` of [`Event`] s.
//! To parse a orgmode string, simply invoking the [`Org::parse`] function:
//!
//! [`Event`]: enum.Event.html
//! [`Org::parse`]: org/struct.Org.html#method.parse
//!
//! ```rust
//! use orgize::Parser;
//! use orgize::Org;
//!
//! let parser = Parser::new(r#"* Title 1
//! let org = Org::parse(r#"* Title 1
//! *Section 1*
//! ** Title 2
//! _Section 2_
@ -18,23 +17,140 @@
//! /Section 3/
//! * Title 4
//! =Section 4="#);
//! ```
//!
//! for event in parser {
//! # Iter
//!
//! [`Org::iter`] function will return a iteractor of [`Event`]s, which is
//! a simple wrapper of [`Element`].
//!
//! [`Org::iter`]: org/struct.Org.html#method.iter
//! [`Event`]: iter/enum.Event.html
//! [`Element`]: elements/enum.Element.html
//!
//! ```rust
//! # use orgize::Org;
//! #
//! # let org = Org::parse(r#"* Title 1
//! # *Section 1*
//! # ** Title 2
//! # _Section 2_
//! # * Title 3
//! # /Section 3/
//! # * Title 4
//! # =Section 4="#);
//! #
//! for event in org.iter() {
//! // handling the event
//! }
//! ```
//!
//! # Using Render
//! **Note**: whether an element is container or not, it will appears two times in a loop.
//! One as [`Event::Start(element)`], one as [`Event::End(element)`].
//!
//! You can use the built-in [`HtmlRender`] to generate html string directly:
//! [`Event::Start(element)`]: iter/enum.Event.html#variant.Start
//! [`Event::End(element)`]: iter/enum.Event.html#variant.End
//!
//! [`HtmlRender`]: export/struct.HtmlRender.html
//! # Render html
//!
//! You can call the [`Org::html_default`] function to generate html directly, which
//! uses the [`DefaultHtmlHandler`] internally:
//!
//! [`Org::html_default`]: org/struct.Org.html#method.html_default
//! [`DefaultHtmlHandler`]: export/html/struct.DefaultHtmlHandler.html
//!
//! ```rust
//! use orgize::export::HtmlRender;
//! use std::io::{Cursor, Result};
//! # use orgize::Org;
//! #
//! # let org = Org::parse(r#"* Title 1
//! # *Section 1*
//! # ** Title 2
//! # _Section 2_
//! # * Title 3
//! # /Section 3/
//! # * Title 4
//! # =Section 4="#);
//! #
//! let mut writer = Vec::new();
//! org.html_default(&mut writer).unwrap();
//!
//! fn main() -> Result<()> {
//! assert_eq!(
//! String::from_utf8(writer).unwrap(),
//! "<main><h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
//! <h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
//! <h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
//! <h1>Title 4</h1><section><p><code>Section 4</code></p></section></main>"
//! );
//! ```
//!
//! # Render html with custom HtmlHandler
//!
//! To customize html rending, simply implementing [`HtmlHandler`] trait and passing
//! it to the [`Org::html`] function.
//!
//! [`HtmlHandler`]: export/html/trait.HtmlHandler.html
//! [`Org::html`]: org/struct.Org.html#method.html
//!
//! The following code demonstrates how to add a id for every headline and return
//! own error type while rendering.
//!
//! ```rust
//! # use std::convert::From;
//! # use std::io::{Error as IOError, Write};
//! # use std::string::FromUtf8Error;
//! #
//! # use orgize::export::{html::Escape, DefaultHtmlHandler, HtmlHandler};
//! # use orgize::{Element, Org};
//! # use slugify::slugify;
//! #
//! #[derive(Debug)]
//! enum MyError {
//! IO(IOError),
//! Heading,
//! Utf8(FromUtf8Error),
//! }
//!
//! // From<std::io::Error> trait is required for custom error type
//! impl From<IOError> for MyError {
//! fn from(err: IOError) -> Self {
//! MyError::IO(err)
//! }
//! }
//!
//! impl From<FromUtf8Error> for MyError {
//! fn from(err: FromUtf8Error) -> Self {
//! MyError::Utf8(err)
//! }
//! }
//!
//! struct MyHtmlHandler;
//!
//! impl HtmlHandler<MyError> for MyHtmlHandler {
//! fn start<W: Write>(&mut self, mut w: W, element: &Element<'_>) -> Result<(), MyError> {
//! let mut default_handler = DefaultHtmlHandler;
//! match element {
//! Element::Headline { headline, .. } => {
//! if headline.level > 6 {
//! return Err(MyError::Heading);
//! } else {
//! let slugify = slugify!(headline.title);
//! write!(
//! w,
//! "<h{0}><a id=\"{1}\" href=\"#{1}\">{2}</a></h{0}>",
//! headline.level,
//! slugify,
//! Escape(headline.title),
//! )?;
//! }
//! }
//! // fallthrough to default handler
//! _ => default_handler.start(w, element)?,
//! }
//! Ok(())
//! }
//! }
//!
//! fn main() -> Result<(), MyError> {
//! let contents = r"* Title 1
//! *Section 1*
//! ** Title 2
@ -44,99 +160,45 @@
//! * Title 4
//! =Section 4=";
//!
//! let mut cursor = Cursor::new(Vec::new());
//! let mut render = HtmlRender::default(&mut cursor, &contents);
//!
//! render.render()?;
//!
//! let mut writer = Vec::new();
//! Org::parse(&contents).html(&mut writer, MyHtmlHandler)?;
//! assert_eq!(
//! String::from_utf8(cursor.into_inner()).unwrap(),
//! "<h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
//! <h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
//! <h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
//! <h1>Title 4</h1><section><p><code>Section 4</code></p></section>"
//! String::from_utf8(writer)?,
//! "<main><h1><a id=\"title-1\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
//! <h2><a id=\"title-2\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
//! <h1><a id=\"title-3\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
//! <h1><a id=\"title-4\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section></main>"
//! );
//!
//! Ok(())
//! }
//! ```
//!
//! # Custom HtmlHandler
//! **Note**: as I mentioned above, each element will appears two times while iterating.
//! And handler will silently ignores all end events from non-container elements.
//!
//! You can create your own handler by implementing [`HtmlHandler`] trait and passing
//! it to the [`HtmlRender`].
//! So if you want to change how a non-container element renders, just redefine the start
//! function and leave the end function untouched.
//!
//! The following example demonstrates how to add an anchor for every headline and use
//! your own error type.
//! # Serde
//!
//! [`HtmlHandler`]: export/trait.HtmlHandler.html
//! [`HtmlRender`]: export/struct.HtmlRender.html
//! `Org` struct have already implemented serde's `Serialize` trait. It means you can
//! freely serialize it into any format that serde supports such as json:
//!
//! ```rust
//! use orgize::{export::*, headline::Headline};
//! use slugify::slugify;
//! use std::io::{Cursor, Error as IOError, Write};
//! use std::string::FromUtf8Error;
//! use serde_json::to_string;
//! # use orgize::Org;
//! #
//! # let org = Org::parse(r#"* Title 1
//! # *Section 1*
//! # ** Title 2
//! # _Section 2_
//! # * Title 3
//! # /Section 3/
//! # * Title 4
//! # =Section 4="#);
//!
//! // custom error type
//! #[derive(Debug)]
//! enum Error {
//! IO(IOError),
//! Headline,
//! Utf8(FromUtf8Error),
//! }
//!
//! // From<std::io::Error> trait is required for custom error type
//! impl From<IOError> for Error {
//! fn from(err: IOError) -> Error {
//! Error::IO(err)
//! }
//! }
//!
//! struct CustomHtmlHandler;
//!
//! impl<W: Write> HtmlHandler<W, Error> for CustomHtmlHandler {
//! fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), Error> {
//! if hdl.level > 6 {
//! Err(Error::Headline)
//! } else {
//! write!(
//! w,
//! r##"<h{}><a class="anchor" href="#{}">"##,
//! hdl.level,
//! slugify!(hdl.title),
//! )?;
//! self.escape(w, hdl.title)?;
//! Ok(write!(w, "</a></h{}>", hdl.level)?)
//! }
//! }
//! }
//!
//! fn main() -> Result<(), Error> {
//! let contents = r"* Title 1
//! *Section 1*
//! ** Title 2
//! _Section 2_
//! * Title 3
//! /Section 3/
//! * Title 4
//! =Section 4=";
//!
//! let mut cursor = Cursor::new(Vec::new());
//! let mut render = HtmlRender::new(CustomHtmlHandler, &mut cursor, &contents);
//!
//! render.render()?;
//!
//! assert_eq!(
//! String::from_utf8(cursor.into_inner()).map_err(Error::Utf8)?,
//! "<h1><a class=\"anchor\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
//! <h2><a class=\"anchor\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
//! <h1><a class=\"anchor\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
//! <h1><a class=\"anchor\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section>"
//! );
//!
//! Ok(())
//! }
//! println!("{}", to_string(&org).unwrap());
//! ```
pub mod elements;

View file

@ -1,6 +1,6 @@
use indextree::{Arena, NodeId};
use jetscii::bytes;
use memchr::{memchr, memchr_iter, memrchr_iter};
use memchr::{memchr, memchr_iter};
use std::io::{Error, Write};
use crate::elements::*;
@ -80,13 +80,11 @@ impl<'a> Org<'a> {
if begin < end {
let off = Headline::find_level(&self.text[begin..end], std::usize::MAX);
if off != 0 {
let (contents_begin, contents_end) =
skip_empty_lines(&self.text[begin..begin + off]);
let section = Element::Section {
begin,
end: begin + off,
contents_begin: begin + contents_begin,
contents_end: begin + contents_end,
contents_begin: begin,
contents_end: begin + off,
};
let new_node = self.arena.new_node(section);
node.append(new_node, &mut self.arena).unwrap();
@ -236,7 +234,7 @@ impl<'a> Org<'a> {
if let Some((ty, off)) = self.parse_element(begin, end) {
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
pos += off;
pos += off + skip_empty_lines(&text[off..]);
}
let mut last_end = pos;
@ -247,12 +245,17 @@ impl<'a> Org<'a> {
.iter()
.all(u8::is_ascii_whitespace)
{
let (end, _) = skip_empty_lines(&text[pos + i..]);
let end = skip_empty_lines(&text[pos + i..]);
let new_node = self.arena.new_node(Element::Paragraph {
begin: begin + last_end,
end: begin + pos + i + end,
contents_begin: begin + last_end,
contents_end: begin + pos,
contents_end: begin
+ if text.as_bytes()[pos - 1] == b'\n' {
pos - 1
} else {
pos
},
});
node.append(new_node, &mut self.arena).unwrap();
pos += i + end;
@ -263,13 +266,18 @@ impl<'a> Org<'a> {
begin: begin + last_end,
end: begin + pos,
contents_begin: begin + last_end,
contents_end: begin + pos,
contents_end: begin
+ if text.as_bytes()[pos - 1] == b'\n' {
pos - 1
} else {
pos
},
});
node.append(new_node, &mut self.arena).unwrap();
}
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
pos += off;
pos += off + skip_empty_lines(&text[pos + off..]);
last_end = pos;
} else {
pos += i + 1;
@ -755,9 +763,8 @@ impl<'a> Org<'a> {
}
}
fn skip_empty_lines(text: &str) -> (usize, usize) {
fn skip_empty_lines(text: &str) -> usize {
let mut i = 0;
let mut j = text.len();
for pos in memchr_iter(b'\n', text.as_bytes()) {
if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) {
i = pos + 1;
@ -765,14 +772,15 @@ fn skip_empty_lines(text: &str) -> (usize, usize) {
break;
}
}
for pos in memrchr_iter(b'\n', text.as_bytes()) {
if text.as_bytes()[pos..j].iter().all(u8::is_ascii_whitespace) {
j = pos;
} else {
break;
}
i
}
(i, j)
#[test]
fn test_skip_empty_lines() {
assert_eq!(skip_empty_lines("foo"), 0);
assert_eq!(skip_empty_lines(" foo"), 0);
assert_eq!(skip_empty_lines(" \nfoo\n"), " \n".len());
assert_eq!(skip_empty_lines(" \n\n\nfoo\n"), " \n\n\n".len());
assert_eq!(skip_empty_lines(" \n \n\nfoo\n"), " \n \n\n".len());
assert_eq!(skip_empty_lines(" \n \n\n foo\n"), " \n \n\n".len());
}