diff --git a/README.md b/README.md
index 3cfcb84..4846c72 100644
--- a/README.md
+++ b/README.md
@@ -1,34 +1,15 @@
# Orgize
-Orgize is a Emacs Org-mode parser written by pure Rust. It behaves like a pull
-parser (returning an iterator of events) but not exactly.
+A Rust library for parsing orgmode files.
-Besides, orgize also provides some mechanism for exporting org-mode files to
-various formats, e.g. HTML.
+## Parse
-## Usage
-
-```toml
-[dependencies]
-orgize = "0.1.0"
-```
+To parse a orgmode string, simply invoking the `Org::parse` function:
```rust
-// Rust 2015 only
-extern crate orgize;
-```
+use orgize::Org;
-## Example
-
-### Using Parser
-
-Orgize parser acts like a event-based parser, which means it returns an
-`Iterator` of `Event` s.
-
-```rust
-use orgize::Parser;
-
-let parser = Parser::new(r#"* Title 1
+let org = Org::parse(r#"* Title 1
*Section 1*
** Title 2
_Section 2_
@@ -36,21 +17,97 @@ _Section 2_
/Section 3/
* Title 4
=Section 4="#);
+```
-for event in parser {
+## Iter
+
+`Org::iter` function will return a iteractor of `Event`s, which is
+a simple wrapper of `Element`.
+
+```rust
+for event in org.iter() {
// handling the event
}
```
-### Using Render
+**Note**: whether an element is container or not, it will appears two times in a loop.
+One as `Event::Start(element)`, one as `Event::End(element)`.
-You can use the built-in `HtmlRender` to generate html string directly:
+## Render html
+
+You can call the `Org::html_default` function to generate html directly, which
+uses the `DefaultHtmlHandler` internally:
```rust
-use orgize::export::HtmlRender;
-use std::io::{Cursor, Result};
+let mut writer = Vec::new();
+org.html_default(&mut writer).unwrap();
-fn main() -> Result<()> {
+assert_eq!(
+ String::from_utf8(writer).unwrap(),
+ "Title 1
\
+ Title 2
\
+ Title 3
\
+ Title 4
"
+);
+```
+
+## Render html with custom HtmlHandler
+
+To customize html rending, simply implementing `HtmlHandler` trait and passing
+it to the `Org::html` function.
+
+The following code demonstrates how to add a id for every headline and return
+own error type while rendering.
+
+```rust
+#[derive(Debug)]
+enum MyError {
+ IO(IOError),
+ Heading,
+ Utf8(FromUtf8Error),
+}
+
+// From trait is required for custom error type
+impl From for MyError {
+ fn from(err: IOError) -> Self {
+ MyError::IO(err)
+ }
+}
+
+impl From for MyError {
+ fn from(err: FromUtf8Error) -> Self {
+ MyError::Utf8(err)
+ }
+}
+
+struct MyHtmlHandler;
+
+impl HtmlHandler for MyHtmlHandler {
+ fn start(&mut self, mut w: W, element: &Element<'_>) -> Result<(), MyError> {
+ let mut default_handler = DefaultHtmlHandler;
+ match element {
+ Element::Headline { headline, .. } => {
+ if headline.level > 6 {
+ return Err(MyError::Heading);
+ } else {
+ let slugify = slugify!(headline.title);
+ write!(
+ w,
+ "{2}",
+ headline.level,
+ slugify,
+ Escape(headline.title),
+ )?;
+ }
+ }
+ // fallthrough to default handler
+ _ => default_handler.start(w, element)?,
+ }
+ Ok(())
+ }
+}
+
+fn main() -> Result<(), MyError> {
let contents = r"* Title 1
*Section 1*
** Title 2
@@ -60,96 +117,33 @@ _Section 2_
* Title 4
=Section 4=";
- let mut cursor = Cursor::new(Vec::new());
- let mut render = HtmlRender::default(&mut cursor, &contents);
-
- render.render()?;
-
+ let mut writer = Vec::new();
+ Org::parse(&contents).html(&mut writer, MyHtmlHandler)?;
assert_eq!(
- String::from_utf8(cursor.into_inner()).unwrap(),
- "Title 1
\
- Title 2
\
- Title 3
\
- Title 4
"
+ String::from_utf8(writer)?,
+ "\
+ \
+ \
+ "
);
Ok(())
}
```
-### Custom HtmlHandler
+**Note**: as I mentioned above, each element will appears two times while iterating.
+And handler will silently ignores all end events from non-container elements.
-You can create your own handler by implementing `HtmlHandler` trait and passing
-it to the `HtmlRender`.
+So if you want to change how a non-container element renders, just redefine the start
+function and leave the end function untouched.
-The following example demonstrates how to add an anchor for every headline and
-use your own error type.
+## Serde
+
+`Org` struct have already implemented serde's `Serialize` trait. It means you can
+freely serialize it into any format that serde supports such as json:
```rust
-use orgize::{export::*, headline::Headline};
-use slugify::slugify;
-use std::io::{Cursor, Error as IOError, Write};
-use std::string::FromUtf8Error;
-
-// custom error type
-#[derive(Debug)]
-enum Error {
- IO(IOError),
- Headline,
- Utf8(FromUtf8Error),
-}
-
-// From trait is required for custom error type
-impl From for Error {
- fn from(err: IOError) -> Error {
- Error::IO(err)
- }
-}
-
-struct CustomHtmlHandler;
-
-impl HtmlHandler for CustomHtmlHandler {
- fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), Error> {
- if hdl.level > 6 {
- Err(Error::Headline)
- } else {
- write!(
- w,
- r##""##,
- hdl.level,
- slugify!(hdl.title),
- )?;
- self.escape(w, hdl.title)?;
- Ok(write!(w, "", hdl.level)?)
- }
- }
-}
-
-fn main() -> Result<(), Error> {
- let contents = r"* Title 1
-*Section 1*
-** Title 2
-_Section 2_
-* Title 3
-/Section 3/
-* Title 4
-=Section 4=";
-
- let mut cursor = Cursor::new(Vec::new());
- let mut render = HtmlRender::new(CustomHtmlHandler, &mut cursor, &contents);
-
- render.render()?;
-
- assert_eq!(
- String::from_utf8(cursor.into_inner()).map_err(Error::Utf8)?,
- "\
- \
- \
- "
- );
-
- Ok(())
-}
+println!("{}", to_string(&org).unwrap());
```
## License
diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs
index 89033cf..a232548 100644
--- a/src/elements/keyword.rs
+++ b/src/elements/keyword.rs
@@ -81,12 +81,12 @@ fn parse() {
);
assert_eq!(
- Keyword::parse("#+ATTR_LATEX: :width 5cm"),
+ Keyword::parse("#+ATTR_LATEX: :width 5cm\n"),
Some((
"ATTR_LATEX",
None,
":width 5cm",
- "#+ATTR_LATEX: :width 5cm".len()
+ "#+ATTR_LATEX: :width 5cm\n".len()
))
);
diff --git a/src/elements/mod.rs b/src/elements/mod.rs
index 03eb75a..c5b89dd 100644
--- a/src/elements/mod.rs
+++ b/src/elements/mod.rs
@@ -1,3 +1,5 @@
+//! Org-mode elements module
+
mod block;
mod clock;
mod cookie;
@@ -47,6 +49,12 @@ pub use self::{
use indextree::NodeId;
+/// Org-mode element enum
+///
+/// Generally, each variant contains a element struct and
+/// a set of properties which indicate the position of the
+/// element in the original string.
+///
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(tag = "type"))]
diff --git a/src/lib.rs b/src/lib.rs
index 866a7ba..82bd890 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,16 +1,15 @@
//! A Rust library for parsing orgmode files.
//!
-//! # Using Parser
+//! # Parse
//!
-//! Orgize parser acts like a event-based parser, which means it
-//! returns an `Iterator` of [`Event`] s.
+//! To parse a orgmode string, simply invoking the [`Org::parse`] function:
//!
-//! [`Event`]: enum.Event.html
+//! [`Org::parse`]: org/struct.Org.html#method.parse
//!
//! ```rust
-//! use orgize::Parser;
+//! use orgize::Org;
//!
-//! let parser = Parser::new(r#"* Title 1
+//! let org = Org::parse(r#"* Title 1
//! *Section 1*
//! ** Title 2
//! _Section 2_
@@ -18,23 +17,140 @@
//! /Section 3/
//! * Title 4
//! =Section 4="#);
+//! ```
//!
-//! for event in parser {
+//! # Iter
+//!
+//! [`Org::iter`] function will return a iteractor of [`Event`]s, which is
+//! a simple wrapper of [`Element`].
+//!
+//! [`Org::iter`]: org/struct.Org.html#method.iter
+//! [`Event`]: iter/enum.Event.html
+//! [`Element`]: elements/enum.Element.html
+//!
+//! ```rust
+//! # use orgize::Org;
+//! #
+//! # let org = Org::parse(r#"* Title 1
+//! # *Section 1*
+//! # ** Title 2
+//! # _Section 2_
+//! # * Title 3
+//! # /Section 3/
+//! # * Title 4
+//! # =Section 4="#);
+//! #
+//! for event in org.iter() {
//! // handling the event
//! }
//! ```
//!
-//! # Using Render
+//! **Note**: whether an element is container or not, it will appears two times in a loop.
+//! One as [`Event::Start(element)`], one as [`Event::End(element)`].
//!
-//! You can use the built-in [`HtmlRender`] to generate html string directly:
+//! [`Event::Start(element)`]: iter/enum.Event.html#variant.Start
+//! [`Event::End(element)`]: iter/enum.Event.html#variant.End
//!
-//! [`HtmlRender`]: export/struct.HtmlRender.html
+//! # Render html
+//!
+//! You can call the [`Org::html_default`] function to generate html directly, which
+//! uses the [`DefaultHtmlHandler`] internally:
+//!
+//! [`Org::html_default`]: org/struct.Org.html#method.html_default
+//! [`DefaultHtmlHandler`]: export/html/struct.DefaultHtmlHandler.html
//!
//! ```rust
-//! use orgize::export::HtmlRender;
-//! use std::io::{Cursor, Result};
+//! # use orgize::Org;
+//! #
+//! # let org = Org::parse(r#"* Title 1
+//! # *Section 1*
+//! # ** Title 2
+//! # _Section 2_
+//! # * Title 3
+//! # /Section 3/
+//! # * Title 4
+//! # =Section 4="#);
+//! #
+//! let mut writer = Vec::new();
+//! org.html_default(&mut writer).unwrap();
//!
-//! fn main() -> Result<()> {
+//! assert_eq!(
+//! String::from_utf8(writer).unwrap(),
+//! "Title 1
\
+//! Title 2
\
+//! Title 3
\
+//! Title 4
"
+//! );
+//! ```
+//!
+//! # Render html with custom HtmlHandler
+//!
+//! To customize html rending, simply implementing [`HtmlHandler`] trait and passing
+//! it to the [`Org::html`] function.
+//!
+//! [`HtmlHandler`]: export/html/trait.HtmlHandler.html
+//! [`Org::html`]: org/struct.Org.html#method.html
+//!
+//! The following code demonstrates how to add a id for every headline and return
+//! own error type while rendering.
+//!
+//! ```rust
+//! # use std::convert::From;
+//! # use std::io::{Error as IOError, Write};
+//! # use std::string::FromUtf8Error;
+//! #
+//! # use orgize::export::{html::Escape, DefaultHtmlHandler, HtmlHandler};
+//! # use orgize::{Element, Org};
+//! # use slugify::slugify;
+//! #
+//! #[derive(Debug)]
+//! enum MyError {
+//! IO(IOError),
+//! Heading,
+//! Utf8(FromUtf8Error),
+//! }
+//!
+//! // From trait is required for custom error type
+//! impl From for MyError {
+//! fn from(err: IOError) -> Self {
+//! MyError::IO(err)
+//! }
+//! }
+//!
+//! impl From for MyError {
+//! fn from(err: FromUtf8Error) -> Self {
+//! MyError::Utf8(err)
+//! }
+//! }
+//!
+//! struct MyHtmlHandler;
+//!
+//! impl HtmlHandler for MyHtmlHandler {
+//! fn start(&mut self, mut w: W, element: &Element<'_>) -> Result<(), MyError> {
+//! let mut default_handler = DefaultHtmlHandler;
+//! match element {
+//! Element::Headline { headline, .. } => {
+//! if headline.level > 6 {
+//! return Err(MyError::Heading);
+//! } else {
+//! let slugify = slugify!(headline.title);
+//! write!(
+//! w,
+//! "{2}",
+//! headline.level,
+//! slugify,
+//! Escape(headline.title),
+//! )?;
+//! }
+//! }
+//! // fallthrough to default handler
+//! _ => default_handler.start(w, element)?,
+//! }
+//! Ok(())
+//! }
+//! }
+//!
+//! fn main() -> Result<(), MyError> {
//! let contents = r"* Title 1
//! *Section 1*
//! ** Title 2
@@ -44,99 +160,45 @@
//! * Title 4
//! =Section 4=";
//!
-//! let mut cursor = Cursor::new(Vec::new());
-//! let mut render = HtmlRender::default(&mut cursor, &contents);
-//!
-//! render.render()?;
-//!
+//! let mut writer = Vec::new();
+//! Org::parse(&contents).html(&mut writer, MyHtmlHandler)?;
//! assert_eq!(
-//! String::from_utf8(cursor.into_inner()).unwrap(),
-//! "Title 1
\
-//! Title 2
\
-//! Title 3
\
-//! Title 4
"
+//! String::from_utf8(writer)?,
+//! "\
+//! \
+//! \
+//! "
//! );
//!
//! Ok(())
//! }
//! ```
//!
-//! # Custom HtmlHandler
+//! **Note**: as I mentioned above, each element will appears two times while iterating.
+//! And handler will silently ignores all end events from non-container elements.
//!
-//! You can create your own handler by implementing [`HtmlHandler`] trait and passing
-//! it to the [`HtmlRender`].
+//! So if you want to change how a non-container element renders, just redefine the start
+//! function and leave the end function untouched.
//!
-//! The following example demonstrates how to add an anchor for every headline and use
-//! your own error type.
+//! # Serde
//!
-//! [`HtmlHandler`]: export/trait.HtmlHandler.html
-//! [`HtmlRender`]: export/struct.HtmlRender.html
+//! `Org` struct have already implemented serde's `Serialize` trait. It means you can
+//! freely serialize it into any format that serde supports such as json:
//!
//! ```rust
-//! use orgize::{export::*, headline::Headline};
-//! use slugify::slugify;
-//! use std::io::{Cursor, Error as IOError, Write};
-//! use std::string::FromUtf8Error;
+//! use serde_json::to_string;
+//! # use orgize::Org;
+//! #
+//! # let org = Org::parse(r#"* Title 1
+//! # *Section 1*
+//! # ** Title 2
+//! # _Section 2_
+//! # * Title 3
+//! # /Section 3/
+//! # * Title 4
+//! # =Section 4="#);
//!
-//! // custom error type
-//! #[derive(Debug)]
-//! enum Error {
-//! IO(IOError),
-//! Headline,
-//! Utf8(FromUtf8Error),
-//! }
-//!
-//! // From trait is required for custom error type
-//! impl From for Error {
-//! fn from(err: IOError) -> Error {
-//! Error::IO(err)
-//! }
-//! }
-//!
-//! struct CustomHtmlHandler;
-//!
-//! impl HtmlHandler for CustomHtmlHandler {
-//! fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), Error> {
-//! if hdl.level > 6 {
-//! Err(Error::Headline)
-//! } else {
-//! write!(
-//! w,
-//! r##""##,
-//! hdl.level,
-//! slugify!(hdl.title),
-//! )?;
-//! self.escape(w, hdl.title)?;
-//! Ok(write!(w, "", hdl.level)?)
-//! }
-//! }
-//! }
-//!
-//! fn main() -> Result<(), Error> {
-//! let contents = r"* Title 1
-//! *Section 1*
-//! ** Title 2
-//! _Section 2_
-//! * Title 3
-//! /Section 3/
-//! * Title 4
-//! =Section 4=";
-//!
-//! let mut cursor = Cursor::new(Vec::new());
-//! let mut render = HtmlRender::new(CustomHtmlHandler, &mut cursor, &contents);
-//!
-//! render.render()?;
-//!
-//! assert_eq!(
-//! String::from_utf8(cursor.into_inner()).map_err(Error::Utf8)?,
-//! "\
-//! \
-//! \
-//! "
-//! );
-//!
-//! Ok(())
-//! }
+//! println!("{}", to_string(&org).unwrap());
//! ```
pub mod elements;
diff --git a/src/org.rs b/src/org.rs
index ecae501..51870ad 100644
--- a/src/org.rs
+++ b/src/org.rs
@@ -1,6 +1,6 @@
use indextree::{Arena, NodeId};
use jetscii::bytes;
-use memchr::{memchr, memchr_iter, memrchr_iter};
+use memchr::{memchr, memchr_iter};
use std::io::{Error, Write};
use crate::elements::*;
@@ -80,13 +80,11 @@ impl<'a> Org<'a> {
if begin < end {
let off = Headline::find_level(&self.text[begin..end], std::usize::MAX);
if off != 0 {
- let (contents_begin, contents_end) =
- skip_empty_lines(&self.text[begin..begin + off]);
let section = Element::Section {
begin,
end: begin + off,
- contents_begin: begin + contents_begin,
- contents_end: begin + contents_end,
+ contents_begin: begin,
+ contents_end: begin + off,
};
let new_node = self.arena.new_node(section);
node.append(new_node, &mut self.arena).unwrap();
@@ -236,7 +234,7 @@ impl<'a> Org<'a> {
if let Some((ty, off)) = self.parse_element(begin, end) {
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
- pos += off;
+ pos += off + skip_empty_lines(&text[off..]);
}
let mut last_end = pos;
@@ -247,12 +245,17 @@ impl<'a> Org<'a> {
.iter()
.all(u8::is_ascii_whitespace)
{
- let (end, _) = skip_empty_lines(&text[pos + i..]);
+ let end = skip_empty_lines(&text[pos + i..]);
let new_node = self.arena.new_node(Element::Paragraph {
begin: begin + last_end,
end: begin + pos + i + end,
contents_begin: begin + last_end,
- contents_end: begin + pos,
+ contents_end: begin
+ + if text.as_bytes()[pos - 1] == b'\n' {
+ pos - 1
+ } else {
+ pos
+ },
});
node.append(new_node, &mut self.arena).unwrap();
pos += i + end;
@@ -263,13 +266,18 @@ impl<'a> Org<'a> {
begin: begin + last_end,
end: begin + pos,
contents_begin: begin + last_end,
- contents_end: begin + pos,
+ contents_end: begin
+ + if text.as_bytes()[pos - 1] == b'\n' {
+ pos - 1
+ } else {
+ pos
+ },
});
node.append(new_node, &mut self.arena).unwrap();
}
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
- pos += off;
+ pos += off + skip_empty_lines(&text[pos + off..]);
last_end = pos;
} else {
pos += i + 1;
@@ -755,9 +763,8 @@ impl<'a> Org<'a> {
}
}
-fn skip_empty_lines(text: &str) -> (usize, usize) {
+fn skip_empty_lines(text: &str) -> usize {
let mut i = 0;
- let mut j = text.len();
for pos in memchr_iter(b'\n', text.as_bytes()) {
if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) {
i = pos + 1;
@@ -765,14 +772,15 @@ fn skip_empty_lines(text: &str) -> (usize, usize) {
break;
}
}
-
- for pos in memrchr_iter(b'\n', text.as_bytes()) {
- if text.as_bytes()[pos..j].iter().all(u8::is_ascii_whitespace) {
- j = pos;
- } else {
- break;
- }
- }
-
- (i, j)
+ i
+}
+
+#[test]
+fn test_skip_empty_lines() {
+ assert_eq!(skip_empty_lines("foo"), 0);
+ assert_eq!(skip_empty_lines(" foo"), 0);
+ assert_eq!(skip_empty_lines(" \nfoo\n"), " \n".len());
+ assert_eq!(skip_empty_lines(" \n\n\nfoo\n"), " \n\n\n".len());
+ assert_eq!(skip_empty_lines(" \n \n\nfoo\n"), " \n \n\n".len());
+ assert_eq!(skip_empty_lines(" \n \n\n foo\n"), " \n \n\n".len());
}