docs: update README.md and doc-test

This commit is contained in:
PoiScript 2019-06-28 00:13:05 +08:00
parent 21aba13d71
commit 5a9e085b61
5 changed files with 298 additions and 226 deletions

212
README.md
View file

@ -1,34 +1,15 @@
# Orgize # Orgize
Orgize is a Emacs Org-mode parser written by pure Rust. It behaves like a pull A Rust library for parsing orgmode files.
parser (returning an iterator of events) but not exactly.
Besides, orgize also provides some mechanism for exporting org-mode files to ## Parse
various formats, e.g. HTML.
## Usage To parse a orgmode string, simply invoking the `Org::parse` function:
```toml
[dependencies]
orgize = "0.1.0"
```
```rust ```rust
// Rust 2015 only use orgize::Org;
extern crate orgize;
```
## Example let org = Org::parse(r#"* Title 1
### Using Parser
Orgize parser acts like a event-based parser, which means it returns an
`Iterator` of `Event` s.
```rust
use orgize::Parser;
let parser = Parser::new(r#"* Title 1
*Section 1* *Section 1*
** Title 2 ** Title 2
_Section 2_ _Section 2_
@ -36,21 +17,97 @@ _Section 2_
/Section 3/ /Section 3/
* Title 4 * Title 4
=Section 4="#); =Section 4="#);
```
for event in parser { ## Iter
`Org::iter` function will return a iteractor of `Event`s, which is
a simple wrapper of `Element`.
```rust
for event in org.iter() {
// handling the event // handling the event
} }
``` ```
### Using Render **Note**: whether an element is container or not, it will appears two times in a loop.
One as `Event::Start(element)`, one as `Event::End(element)`.
You can use the built-in `HtmlRender` to generate html string directly: ## Render html
You can call the `Org::html_default` function to generate html directly, which
uses the `DefaultHtmlHandler` internally:
```rust ```rust
use orgize::export::HtmlRender; let mut writer = Vec::new();
use std::io::{Cursor, Result}; org.html_default(&mut writer).unwrap();
fn main() -> Result<()> { assert_eq!(
String::from_utf8(writer).unwrap(),
"<main><h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
<h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
<h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
<h1>Title 4</h1><section><p><code>Section 4</code></p></section></main>"
);
```
## Render html with custom HtmlHandler
To customize html rending, simply implementing `HtmlHandler` trait and passing
it to the `Org::html` function.
The following code demonstrates how to add a id for every headline and return
own error type while rendering.
```rust
#[derive(Debug)]
enum MyError {
IO(IOError),
Heading,
Utf8(FromUtf8Error),
}
// From<std::io::Error> trait is required for custom error type
impl From<IOError> for MyError {
fn from(err: IOError) -> Self {
MyError::IO(err)
}
}
impl From<FromUtf8Error> for MyError {
fn from(err: FromUtf8Error) -> Self {
MyError::Utf8(err)
}
}
struct MyHtmlHandler;
impl HtmlHandler<MyError> for MyHtmlHandler {
fn start<W: Write>(&mut self, mut w: W, element: &Element<'_>) -> Result<(), MyError> {
let mut default_handler = DefaultHtmlHandler;
match element {
Element::Headline { headline, .. } => {
if headline.level > 6 {
return Err(MyError::Heading);
} else {
let slugify = slugify!(headline.title);
write!(
w,
"<h{0}><a id=\"{1}\" href=\"#{1}\">{2}</a></h{0}>",
headline.level,
slugify,
Escape(headline.title),
)?;
}
}
// fallthrough to default handler
_ => default_handler.start(w, element)?,
}
Ok(())
}
}
fn main() -> Result<(), MyError> {
let contents = r"* Title 1 let contents = r"* Title 1
*Section 1* *Section 1*
** Title 2 ** Title 2
@ -60,96 +117,33 @@ _Section 2_
* Title 4 * Title 4
=Section 4="; =Section 4=";
let mut cursor = Cursor::new(Vec::new()); let mut writer = Vec::new();
let mut render = HtmlRender::default(&mut cursor, &contents); Org::parse(&contents).html(&mut writer, MyHtmlHandler)?;
render.render()?;
assert_eq!( assert_eq!(
String::from_utf8(cursor.into_inner()).unwrap(), String::from_utf8(writer)?,
"<h1>Title 1</h1><section><p><b>Section 1</b></p></section>\ "<main><h1><a id=\"title-1\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
<h2>Title 2</h2><section><p><u>Section 2</u></p></section>\ <h2><a id=\"title-2\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
<h1>Title 3</h1><section><p><i>Section 3</i></p></section>\ <h1><a id=\"title-3\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
<h1>Title 4</h1><section><p><code>Section 4</code></p></section>" <h1><a id=\"title-4\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section></main>"
); );
Ok(()) Ok(())
} }
``` ```
### Custom HtmlHandler **Note**: as I mentioned above, each element will appears two times while iterating.
And handler will silently ignores all end events from non-container elements.
You can create your own handler by implementing `HtmlHandler` trait and passing So if you want to change how a non-container element renders, just redefine the start
it to the `HtmlRender`. function and leave the end function untouched.
The following example demonstrates how to add an anchor for every headline and ## Serde
use your own error type.
`Org` struct have already implemented serde's `Serialize` trait. It means you can
freely serialize it into any format that serde supports such as json:
```rust ```rust
use orgize::{export::*, headline::Headline}; println!("{}", to_string(&org).unwrap());
use slugify::slugify;
use std::io::{Cursor, Error as IOError, Write};
use std::string::FromUtf8Error;
// custom error type
#[derive(Debug)]
enum Error {
IO(IOError),
Headline,
Utf8(FromUtf8Error),
}
// From<std::io::Error> trait is required for custom error type
impl From<IOError> for Error {
fn from(err: IOError) -> Error {
Error::IO(err)
}
}
struct CustomHtmlHandler;
impl<W: Write> HtmlHandler<W, Error> for CustomHtmlHandler {
fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), Error> {
if hdl.level > 6 {
Err(Error::Headline)
} else {
write!(
w,
r##"<h{}><a class="anchor" href="#{}">"##,
hdl.level,
slugify!(hdl.title),
)?;
self.escape(w, hdl.title)?;
Ok(write!(w, "</a></h{}>", hdl.level)?)
}
}
}
fn main() -> Result<(), Error> {
let contents = r"* Title 1
*Section 1*
** Title 2
_Section 2_
* Title 3
/Section 3/
* Title 4
=Section 4=";
let mut cursor = Cursor::new(Vec::new());
let mut render = HtmlRender::new(CustomHtmlHandler, &mut cursor, &contents);
render.render()?;
assert_eq!(
String::from_utf8(cursor.into_inner()).map_err(Error::Utf8)?,
"<h1><a class=\"anchor\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
<h2><a class=\"anchor\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
<h1><a class=\"anchor\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
<h1><a class=\"anchor\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section>"
);
Ok(())
}
``` ```
## License ## License

View file

@ -81,12 +81,12 @@ fn parse() {
); );
assert_eq!( assert_eq!(
Keyword::parse("#+ATTR_LATEX: :width 5cm"), Keyword::parse("#+ATTR_LATEX: :width 5cm\n"),
Some(( Some((
"ATTR_LATEX", "ATTR_LATEX",
None, None,
":width 5cm", ":width 5cm",
"#+ATTR_LATEX: :width 5cm".len() "#+ATTR_LATEX: :width 5cm\n".len()
)) ))
); );

View file

@ -1,3 +1,5 @@
//! Org-mode elements module
mod block; mod block;
mod clock; mod clock;
mod cookie; mod cookie;
@ -47,6 +49,12 @@ pub use self::{
use indextree::NodeId; use indextree::NodeId;
/// Org-mode element enum
///
/// Generally, each variant contains a element struct and
/// a set of properties which indicate the position of the
/// element in the original string.
///
#[derive(Debug)] #[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))] #[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(tag = "type"))] #[cfg_attr(feature = "serde", serde(tag = "type"))]

View file

@ -1,16 +1,15 @@
//! A Rust library for parsing orgmode files. //! A Rust library for parsing orgmode files.
//! //!
//! # Using Parser //! # Parse
//! //!
//! Orgize parser acts like a event-based parser, which means it //! To parse a orgmode string, simply invoking the [`Org::parse`] function:
//! returns an `Iterator` of [`Event`] s.
//! //!
//! [`Event`]: enum.Event.html //! [`Org::parse`]: org/struct.Org.html#method.parse
//! //!
//! ```rust //! ```rust
//! use orgize::Parser; //! use orgize::Org;
//! //!
//! let parser = Parser::new(r#"* Title 1 //! let org = Org::parse(r#"* Title 1
//! *Section 1* //! *Section 1*
//! ** Title 2 //! ** Title 2
//! _Section 2_ //! _Section 2_
@ -18,23 +17,140 @@
//! /Section 3/ //! /Section 3/
//! * Title 4 //! * Title 4
//! =Section 4="#); //! =Section 4="#);
//! ```
//! //!
//! for event in parser { //! # Iter
//!
//! [`Org::iter`] function will return a iteractor of [`Event`]s, which is
//! a simple wrapper of [`Element`].
//!
//! [`Org::iter`]: org/struct.Org.html#method.iter
//! [`Event`]: iter/enum.Event.html
//! [`Element`]: elements/enum.Element.html
//!
//! ```rust
//! # use orgize::Org;
//! #
//! # let org = Org::parse(r#"* Title 1
//! # *Section 1*
//! # ** Title 2
//! # _Section 2_
//! # * Title 3
//! # /Section 3/
//! # * Title 4
//! # =Section 4="#);
//! #
//! for event in org.iter() {
//! // handling the event //! // handling the event
//! } //! }
//! ``` //! ```
//! //!
//! # Using Render //! **Note**: whether an element is container or not, it will appears two times in a loop.
//! One as [`Event::Start(element)`], one as [`Event::End(element)`].
//! //!
//! You can use the built-in [`HtmlRender`] to generate html string directly: //! [`Event::Start(element)`]: iter/enum.Event.html#variant.Start
//! [`Event::End(element)`]: iter/enum.Event.html#variant.End
//! //!
//! [`HtmlRender`]: export/struct.HtmlRender.html //! # Render html
//!
//! You can call the [`Org::html_default`] function to generate html directly, which
//! uses the [`DefaultHtmlHandler`] internally:
//!
//! [`Org::html_default`]: org/struct.Org.html#method.html_default
//! [`DefaultHtmlHandler`]: export/html/struct.DefaultHtmlHandler.html
//! //!
//! ```rust //! ```rust
//! use orgize::export::HtmlRender; //! # use orgize::Org;
//! use std::io::{Cursor, Result}; //! #
//! # let org = Org::parse(r#"* Title 1
//! # *Section 1*
//! # ** Title 2
//! # _Section 2_
//! # * Title 3
//! # /Section 3/
//! # * Title 4
//! # =Section 4="#);
//! #
//! let mut writer = Vec::new();
//! org.html_default(&mut writer).unwrap();
//! //!
//! fn main() -> Result<()> { //! assert_eq!(
//! String::from_utf8(writer).unwrap(),
//! "<main><h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
//! <h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
//! <h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
//! <h1>Title 4</h1><section><p><code>Section 4</code></p></section></main>"
//! );
//! ```
//!
//! # Render html with custom HtmlHandler
//!
//! To customize html rending, simply implementing [`HtmlHandler`] trait and passing
//! it to the [`Org::html`] function.
//!
//! [`HtmlHandler`]: export/html/trait.HtmlHandler.html
//! [`Org::html`]: org/struct.Org.html#method.html
//!
//! The following code demonstrates how to add a id for every headline and return
//! own error type while rendering.
//!
//! ```rust
//! # use std::convert::From;
//! # use std::io::{Error as IOError, Write};
//! # use std::string::FromUtf8Error;
//! #
//! # use orgize::export::{html::Escape, DefaultHtmlHandler, HtmlHandler};
//! # use orgize::{Element, Org};
//! # use slugify::slugify;
//! #
//! #[derive(Debug)]
//! enum MyError {
//! IO(IOError),
//! Heading,
//! Utf8(FromUtf8Error),
//! }
//!
//! // From<std::io::Error> trait is required for custom error type
//! impl From<IOError> for MyError {
//! fn from(err: IOError) -> Self {
//! MyError::IO(err)
//! }
//! }
//!
//! impl From<FromUtf8Error> for MyError {
//! fn from(err: FromUtf8Error) -> Self {
//! MyError::Utf8(err)
//! }
//! }
//!
//! struct MyHtmlHandler;
//!
//! impl HtmlHandler<MyError> for MyHtmlHandler {
//! fn start<W: Write>(&mut self, mut w: W, element: &Element<'_>) -> Result<(), MyError> {
//! let mut default_handler = DefaultHtmlHandler;
//! match element {
//! Element::Headline { headline, .. } => {
//! if headline.level > 6 {
//! return Err(MyError::Heading);
//! } else {
//! let slugify = slugify!(headline.title);
//! write!(
//! w,
//! "<h{0}><a id=\"{1}\" href=\"#{1}\">{2}</a></h{0}>",
//! headline.level,
//! slugify,
//! Escape(headline.title),
//! )?;
//! }
//! }
//! // fallthrough to default handler
//! _ => default_handler.start(w, element)?,
//! }
//! Ok(())
//! }
//! }
//!
//! fn main() -> Result<(), MyError> {
//! let contents = r"* Title 1 //! let contents = r"* Title 1
//! *Section 1* //! *Section 1*
//! ** Title 2 //! ** Title 2
@ -44,99 +160,45 @@
//! * Title 4 //! * Title 4
//! =Section 4="; //! =Section 4=";
//! //!
//! let mut cursor = Cursor::new(Vec::new()); //! let mut writer = Vec::new();
//! let mut render = HtmlRender::default(&mut cursor, &contents); //! Org::parse(&contents).html(&mut writer, MyHtmlHandler)?;
//!
//! render.render()?;
//!
//! assert_eq!( //! assert_eq!(
//! String::from_utf8(cursor.into_inner()).unwrap(), //! String::from_utf8(writer)?,
//! "<h1>Title 1</h1><section><p><b>Section 1</b></p></section>\ //! "<main><h1><a id=\"title-1\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
//! <h2>Title 2</h2><section><p><u>Section 2</u></p></section>\ //! <h2><a id=\"title-2\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
//! <h1>Title 3</h1><section><p><i>Section 3</i></p></section>\ //! <h1><a id=\"title-3\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
//! <h1>Title 4</h1><section><p><code>Section 4</code></p></section>" //! <h1><a id=\"title-4\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section></main>"
//! ); //! );
//! //!
//! Ok(()) //! Ok(())
//! } //! }
//! ``` //! ```
//! //!
//! # Custom HtmlHandler //! **Note**: as I mentioned above, each element will appears two times while iterating.
//! And handler will silently ignores all end events from non-container elements.
//! //!
//! You can create your own handler by implementing [`HtmlHandler`] trait and passing //! So if you want to change how a non-container element renders, just redefine the start
//! it to the [`HtmlRender`]. //! function and leave the end function untouched.
//! //!
//! The following example demonstrates how to add an anchor for every headline and use //! # Serde
//! your own error type.
//! //!
//! [`HtmlHandler`]: export/trait.HtmlHandler.html //! `Org` struct have already implemented serde's `Serialize` trait. It means you can
//! [`HtmlRender`]: export/struct.HtmlRender.html //! freely serialize it into any format that serde supports such as json:
//! //!
//! ```rust //! ```rust
//! use orgize::{export::*, headline::Headline}; //! use serde_json::to_string;
//! use slugify::slugify; //! # use orgize::Org;
//! use std::io::{Cursor, Error as IOError, Write}; //! #
//! use std::string::FromUtf8Error; //! # let org = Org::parse(r#"* Title 1
//! # *Section 1*
//! # ** Title 2
//! # _Section 2_
//! # * Title 3
//! # /Section 3/
//! # * Title 4
//! # =Section 4="#);
//! //!
//! // custom error type //! println!("{}", to_string(&org).unwrap());
//! #[derive(Debug)]
//! enum Error {
//! IO(IOError),
//! Headline,
//! Utf8(FromUtf8Error),
//! }
//!
//! // From<std::io::Error> trait is required for custom error type
//! impl From<IOError> for Error {
//! fn from(err: IOError) -> Error {
//! Error::IO(err)
//! }
//! }
//!
//! struct CustomHtmlHandler;
//!
//! impl<W: Write> HtmlHandler<W, Error> for CustomHtmlHandler {
//! fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), Error> {
//! if hdl.level > 6 {
//! Err(Error::Headline)
//! } else {
//! write!(
//! w,
//! r##"<h{}><a class="anchor" href="#{}">"##,
//! hdl.level,
//! slugify!(hdl.title),
//! )?;
//! self.escape(w, hdl.title)?;
//! Ok(write!(w, "</a></h{}>", hdl.level)?)
//! }
//! }
//! }
//!
//! fn main() -> Result<(), Error> {
//! let contents = r"* Title 1
//! *Section 1*
//! ** Title 2
//! _Section 2_
//! * Title 3
//! /Section 3/
//! * Title 4
//! =Section 4=";
//!
//! let mut cursor = Cursor::new(Vec::new());
//! let mut render = HtmlRender::new(CustomHtmlHandler, &mut cursor, &contents);
//!
//! render.render()?;
//!
//! assert_eq!(
//! String::from_utf8(cursor.into_inner()).map_err(Error::Utf8)?,
//! "<h1><a class=\"anchor\" href=\"#title-1\">Title 1</a></h1><section><p><b>Section 1</b></p></section>\
//! <h2><a class=\"anchor\" href=\"#title-2\">Title 2</a></h2><section><p><u>Section 2</u></p></section>\
//! <h1><a class=\"anchor\" href=\"#title-3\">Title 3</a></h1><section><p><i>Section 3</i></p></section>\
//! <h1><a class=\"anchor\" href=\"#title-4\">Title 4</a></h1><section><p><code>Section 4</code></p></section>"
//! );
//!
//! Ok(())
//! }
//! ``` //! ```
pub mod elements; pub mod elements;

View file

@ -1,6 +1,6 @@
use indextree::{Arena, NodeId}; use indextree::{Arena, NodeId};
use jetscii::bytes; use jetscii::bytes;
use memchr::{memchr, memchr_iter, memrchr_iter}; use memchr::{memchr, memchr_iter};
use std::io::{Error, Write}; use std::io::{Error, Write};
use crate::elements::*; use crate::elements::*;
@ -80,13 +80,11 @@ impl<'a> Org<'a> {
if begin < end { if begin < end {
let off = Headline::find_level(&self.text[begin..end], std::usize::MAX); let off = Headline::find_level(&self.text[begin..end], std::usize::MAX);
if off != 0 { if off != 0 {
let (contents_begin, contents_end) =
skip_empty_lines(&self.text[begin..begin + off]);
let section = Element::Section { let section = Element::Section {
begin, begin,
end: begin + off, end: begin + off,
contents_begin: begin + contents_begin, contents_begin: begin,
contents_end: begin + contents_end, contents_end: begin + off,
}; };
let new_node = self.arena.new_node(section); let new_node = self.arena.new_node(section);
node.append(new_node, &mut self.arena).unwrap(); node.append(new_node, &mut self.arena).unwrap();
@ -236,7 +234,7 @@ impl<'a> Org<'a> {
if let Some((ty, off)) = self.parse_element(begin, end) { if let Some((ty, off)) = self.parse_element(begin, end) {
let new_node = self.arena.new_node(ty); let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap(); node.append(new_node, &mut self.arena).unwrap();
pos += off; pos += off + skip_empty_lines(&text[off..]);
} }
let mut last_end = pos; let mut last_end = pos;
@ -247,12 +245,17 @@ impl<'a> Org<'a> {
.iter() .iter()
.all(u8::is_ascii_whitespace) .all(u8::is_ascii_whitespace)
{ {
let (end, _) = skip_empty_lines(&text[pos + i..]); let end = skip_empty_lines(&text[pos + i..]);
let new_node = self.arena.new_node(Element::Paragraph { let new_node = self.arena.new_node(Element::Paragraph {
begin: begin + last_end, begin: begin + last_end,
end: begin + pos + i + end, end: begin + pos + i + end,
contents_begin: begin + last_end, contents_begin: begin + last_end,
contents_end: begin + pos, contents_end: begin
+ if text.as_bytes()[pos - 1] == b'\n' {
pos - 1
} else {
pos
},
}); });
node.append(new_node, &mut self.arena).unwrap(); node.append(new_node, &mut self.arena).unwrap();
pos += i + end; pos += i + end;
@ -263,13 +266,18 @@ impl<'a> Org<'a> {
begin: begin + last_end, begin: begin + last_end,
end: begin + pos, end: begin + pos,
contents_begin: begin + last_end, contents_begin: begin + last_end,
contents_end: begin + pos, contents_end: begin
+ if text.as_bytes()[pos - 1] == b'\n' {
pos - 1
} else {
pos
},
}); });
node.append(new_node, &mut self.arena).unwrap(); node.append(new_node, &mut self.arena).unwrap();
} }
let new_node = self.arena.new_node(ty); let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap(); node.append(new_node, &mut self.arena).unwrap();
pos += off; pos += off + skip_empty_lines(&text[pos + off..]);
last_end = pos; last_end = pos;
} else { } else {
pos += i + 1; pos += i + 1;
@ -755,9 +763,8 @@ impl<'a> Org<'a> {
} }
} }
fn skip_empty_lines(text: &str) -> (usize, usize) { fn skip_empty_lines(text: &str) -> usize {
let mut i = 0; let mut i = 0;
let mut j = text.len();
for pos in memchr_iter(b'\n', text.as_bytes()) { for pos in memchr_iter(b'\n', text.as_bytes()) {
if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) { if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) {
i = pos + 1; i = pos + 1;
@ -765,14 +772,15 @@ fn skip_empty_lines(text: &str) -> (usize, usize) {
break; break;
} }
} }
i
for pos in memrchr_iter(b'\n', text.as_bytes()) { }
if text.as_bytes()[pos..j].iter().all(u8::is_ascii_whitespace) {
j = pos; #[test]
} else { fn test_skip_empty_lines() {
break; assert_eq!(skip_empty_lines("foo"), 0);
} assert_eq!(skip_empty_lines(" foo"), 0);
} assert_eq!(skip_empty_lines(" \nfoo\n"), " \n".len());
assert_eq!(skip_empty_lines(" \n\n\nfoo\n"), " \n\n\n".len());
(i, j) assert_eq!(skip_empty_lines(" \n \n\nfoo\n"), " \n \n\n".len());
assert_eq!(skip_empty_lines(" \n \n\n foo\n"), " \n \n\n".len());
} }