diff --git a/README.md b/README.md index 3cfcb84..4846c72 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,15 @@ # Orgize -Orgize is a Emacs Org-mode parser written by pure Rust. It behaves like a pull -parser (returning an iterator of events) but not exactly. +A Rust library for parsing orgmode files. -Besides, orgize also provides some mechanism for exporting org-mode files to -various formats, e.g. HTML. +## Parse -## Usage - -```toml -[dependencies] -orgize = "0.1.0" -``` +To parse a orgmode string, simply invoking the `Org::parse` function: ```rust -// Rust 2015 only -extern crate orgize; -``` +use orgize::Org; -## Example - -### Using Parser - -Orgize parser acts like a event-based parser, which means it returns an -`Iterator` of `Event` s. - -```rust -use orgize::Parser; - -let parser = Parser::new(r#"* Title 1 +let org = Org::parse(r#"* Title 1 *Section 1* ** Title 2 _Section 2_ @@ -36,21 +17,97 @@ _Section 2_ /Section 3/ * Title 4 =Section 4="#); +``` -for event in parser { +## Iter + +`Org::iter` function will return a iteractor of `Event`s, which is +a simple wrapper of `Element`. + +```rust +for event in org.iter() { // handling the event } ``` -### Using Render +**Note**: whether an element is container or not, it will appears two times in a loop. +One as `Event::Start(element)`, one as `Event::End(element)`. -You can use the built-in `HtmlRender` to generate html string directly: +## Render html + +You can call the `Org::html_default` function to generate html directly, which +uses the `DefaultHtmlHandler` internally: ```rust -use orgize::export::HtmlRender; -use std::io::{Cursor, Result}; +let mut writer = Vec::new(); +org.html_default(&mut writer).unwrap(); -fn main() -> Result<()> { +assert_eq!( + String::from_utf8(writer).unwrap(), + "

Title 1

Section 1

\ +

Title 2

Section 2

\ +

Title 3

Section 3

\ +

Title 4

Section 4

" +); +``` + +## Render html with custom HtmlHandler + +To customize html rending, simply implementing `HtmlHandler` trait and passing +it to the `Org::html` function. + +The following code demonstrates how to add a id for every headline and return +own error type while rendering. + +```rust +#[derive(Debug)] +enum MyError { + IO(IOError), + Heading, + Utf8(FromUtf8Error), +} + +// From trait is required for custom error type +impl From for MyError { + fn from(err: IOError) -> Self { + MyError::IO(err) + } +} + +impl From for MyError { + fn from(err: FromUtf8Error) -> Self { + MyError::Utf8(err) + } +} + +struct MyHtmlHandler; + +impl HtmlHandler for MyHtmlHandler { + fn start(&mut self, mut w: W, element: &Element<'_>) -> Result<(), MyError> { + let mut default_handler = DefaultHtmlHandler; + match element { + Element::Headline { headline, .. } => { + if headline.level > 6 { + return Err(MyError::Heading); + } else { + let slugify = slugify!(headline.title); + write!( + w, + "{2}", + headline.level, + slugify, + Escape(headline.title), + )?; + } + } + // fallthrough to default handler + _ => default_handler.start(w, element)?, + } + Ok(()) + } +} + +fn main() -> Result<(), MyError> { let contents = r"* Title 1 *Section 1* ** Title 2 @@ -60,96 +117,33 @@ _Section 2_ * Title 4 =Section 4="; - let mut cursor = Cursor::new(Vec::new()); - let mut render = HtmlRender::default(&mut cursor, &contents); - - render.render()?; - + let mut writer = Vec::new(); + Org::parse(&contents).html(&mut writer, MyHtmlHandler)?; assert_eq!( - String::from_utf8(cursor.into_inner()).unwrap(), - "

Title 1

Section 1

\ -

Title 2

Section 2

\ -

Title 3

Section 3

\ -

Title 4

Section 4

" + String::from_utf8(writer)?, + "

Title 1

Section 1

\ +

Title 2

Section 2

\ +

Title 3

Section 3

\ +

Title 4

Section 4

" ); Ok(()) } ``` -### Custom HtmlHandler +**Note**: as I mentioned above, each element will appears two times while iterating. +And handler will silently ignores all end events from non-container elements. -You can create your own handler by implementing `HtmlHandler` trait and passing -it to the `HtmlRender`. +So if you want to change how a non-container element renders, just redefine the start +function and leave the end function untouched. -The following example demonstrates how to add an anchor for every headline and -use your own error type. +## Serde + +`Org` struct have already implemented serde's `Serialize` trait. It means you can +freely serialize it into any format that serde supports such as json: ```rust -use orgize::{export::*, headline::Headline}; -use slugify::slugify; -use std::io::{Cursor, Error as IOError, Write}; -use std::string::FromUtf8Error; - -// custom error type -#[derive(Debug)] -enum Error { - IO(IOError), - Headline, - Utf8(FromUtf8Error), -} - -// From trait is required for custom error type -impl From for Error { - fn from(err: IOError) -> Error { - Error::IO(err) - } -} - -struct CustomHtmlHandler; - -impl HtmlHandler for CustomHtmlHandler { - fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), Error> { - if hdl.level > 6 { - Err(Error::Headline) - } else { - write!( - w, - r##""##, - hdl.level, - slugify!(hdl.title), - )?; - self.escape(w, hdl.title)?; - Ok(write!(w, "", hdl.level)?) - } - } -} - -fn main() -> Result<(), Error> { - let contents = r"* Title 1 -*Section 1* -** Title 2 -_Section 2_ -* Title 3 -/Section 3/ -* Title 4 -=Section 4="; - - let mut cursor = Cursor::new(Vec::new()); - let mut render = HtmlRender::new(CustomHtmlHandler, &mut cursor, &contents); - - render.render()?; - - assert_eq!( - String::from_utf8(cursor.into_inner()).map_err(Error::Utf8)?, - "

Title 1

Section 1

\ -

Title 2

Section 2

\ -

Title 3

Section 3

\ -

Title 4

Section 4

" - ); - - Ok(()) -} +println!("{}", to_string(&org).unwrap()); ``` ## License diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs index 89033cf..a232548 100644 --- a/src/elements/keyword.rs +++ b/src/elements/keyword.rs @@ -81,12 +81,12 @@ fn parse() { ); assert_eq!( - Keyword::parse("#+ATTR_LATEX: :width 5cm"), + Keyword::parse("#+ATTR_LATEX: :width 5cm\n"), Some(( "ATTR_LATEX", None, ":width 5cm", - "#+ATTR_LATEX: :width 5cm".len() + "#+ATTR_LATEX: :width 5cm\n".len() )) ); diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 03eb75a..c5b89dd 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -1,3 +1,5 @@ +//! Org-mode elements module + mod block; mod clock; mod cookie; @@ -47,6 +49,12 @@ pub use self::{ use indextree::NodeId; +/// Org-mode element enum +/// +/// Generally, each variant contains a element struct and +/// a set of properties which indicate the position of the +/// element in the original string. +/// #[derive(Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize))] #[cfg_attr(feature = "serde", serde(tag = "type"))] diff --git a/src/lib.rs b/src/lib.rs index 866a7ba..82bd890 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,15 @@ //! A Rust library for parsing orgmode files. //! -//! # Using Parser +//! # Parse //! -//! Orgize parser acts like a event-based parser, which means it -//! returns an `Iterator` of [`Event`] s. +//! To parse a orgmode string, simply invoking the [`Org::parse`] function: //! -//! [`Event`]: enum.Event.html +//! [`Org::parse`]: org/struct.Org.html#method.parse //! //! ```rust -//! use orgize::Parser; +//! use orgize::Org; //! -//! let parser = Parser::new(r#"* Title 1 +//! let org = Org::parse(r#"* Title 1 //! *Section 1* //! ** Title 2 //! _Section 2_ @@ -18,23 +17,140 @@ //! /Section 3/ //! * Title 4 //! =Section 4="#); +//! ``` //! -//! for event in parser { +//! # Iter +//! +//! [`Org::iter`] function will return a iteractor of [`Event`]s, which is +//! a simple wrapper of [`Element`]. +//! +//! [`Org::iter`]: org/struct.Org.html#method.iter +//! [`Event`]: iter/enum.Event.html +//! [`Element`]: elements/enum.Element.html +//! +//! ```rust +//! # use orgize::Org; +//! # +//! # let org = Org::parse(r#"* Title 1 +//! # *Section 1* +//! # ** Title 2 +//! # _Section 2_ +//! # * Title 3 +//! # /Section 3/ +//! # * Title 4 +//! # =Section 4="#); +//! # +//! for event in org.iter() { //! // handling the event //! } //! ``` //! -//! # Using Render +//! **Note**: whether an element is container or not, it will appears two times in a loop. +//! One as [`Event::Start(element)`], one as [`Event::End(element)`]. //! -//! You can use the built-in [`HtmlRender`] to generate html string directly: +//! [`Event::Start(element)`]: iter/enum.Event.html#variant.Start +//! [`Event::End(element)`]: iter/enum.Event.html#variant.End //! -//! [`HtmlRender`]: export/struct.HtmlRender.html +//! # Render html +//! +//! You can call the [`Org::html_default`] function to generate html directly, which +//! uses the [`DefaultHtmlHandler`] internally: +//! +//! [`Org::html_default`]: org/struct.Org.html#method.html_default +//! [`DefaultHtmlHandler`]: export/html/struct.DefaultHtmlHandler.html //! //! ```rust -//! use orgize::export::HtmlRender; -//! use std::io::{Cursor, Result}; +//! # use orgize::Org; +//! # +//! # let org = Org::parse(r#"* Title 1 +//! # *Section 1* +//! # ** Title 2 +//! # _Section 2_ +//! # * Title 3 +//! # /Section 3/ +//! # * Title 4 +//! # =Section 4="#); +//! # +//! let mut writer = Vec::new(); +//! org.html_default(&mut writer).unwrap(); //! -//! fn main() -> Result<()> { +//! assert_eq!( +//! String::from_utf8(writer).unwrap(), +//! "

Title 1

Section 1

\ +//!

Title 2

Section 2

\ +//!

Title 3

Section 3

\ +//!

Title 4

Section 4

" +//! ); +//! ``` +//! +//! # Render html with custom HtmlHandler +//! +//! To customize html rending, simply implementing [`HtmlHandler`] trait and passing +//! it to the [`Org::html`] function. +//! +//! [`HtmlHandler`]: export/html/trait.HtmlHandler.html +//! [`Org::html`]: org/struct.Org.html#method.html +//! +//! The following code demonstrates how to add a id for every headline and return +//! own error type while rendering. +//! +//! ```rust +//! # use std::convert::From; +//! # use std::io::{Error as IOError, Write}; +//! # use std::string::FromUtf8Error; +//! # +//! # use orgize::export::{html::Escape, DefaultHtmlHandler, HtmlHandler}; +//! # use orgize::{Element, Org}; +//! # use slugify::slugify; +//! # +//! #[derive(Debug)] +//! enum MyError { +//! IO(IOError), +//! Heading, +//! Utf8(FromUtf8Error), +//! } +//! +//! // From trait is required for custom error type +//! impl From for MyError { +//! fn from(err: IOError) -> Self { +//! MyError::IO(err) +//! } +//! } +//! +//! impl From for MyError { +//! fn from(err: FromUtf8Error) -> Self { +//! MyError::Utf8(err) +//! } +//! } +//! +//! struct MyHtmlHandler; +//! +//! impl HtmlHandler for MyHtmlHandler { +//! fn start(&mut self, mut w: W, element: &Element<'_>) -> Result<(), MyError> { +//! let mut default_handler = DefaultHtmlHandler; +//! match element { +//! Element::Headline { headline, .. } => { +//! if headline.level > 6 { +//! return Err(MyError::Heading); +//! } else { +//! let slugify = slugify!(headline.title); +//! write!( +//! w, +//! "{2}", +//! headline.level, +//! slugify, +//! Escape(headline.title), +//! )?; +//! } +//! } +//! // fallthrough to default handler +//! _ => default_handler.start(w, element)?, +//! } +//! Ok(()) +//! } +//! } +//! +//! fn main() -> Result<(), MyError> { //! let contents = r"* Title 1 //! *Section 1* //! ** Title 2 @@ -44,99 +160,45 @@ //! * Title 4 //! =Section 4="; //! -//! let mut cursor = Cursor::new(Vec::new()); -//! let mut render = HtmlRender::default(&mut cursor, &contents); -//! -//! render.render()?; -//! +//! let mut writer = Vec::new(); +//! Org::parse(&contents).html(&mut writer, MyHtmlHandler)?; //! assert_eq!( -//! String::from_utf8(cursor.into_inner()).unwrap(), -//! "

Title 1

Section 1

\ -//!

Title 2

Section 2

\ -//!

Title 3

Section 3

\ -//!

Title 4

Section 4

" +//! String::from_utf8(writer)?, +//! "

Title 1

Section 1

\ +//!

Title 2

Section 2

\ +//!

Title 3

Section 3

\ +//!

Title 4

Section 4

" //! ); //! //! Ok(()) //! } //! ``` //! -//! # Custom HtmlHandler +//! **Note**: as I mentioned above, each element will appears two times while iterating. +//! And handler will silently ignores all end events from non-container elements. //! -//! You can create your own handler by implementing [`HtmlHandler`] trait and passing -//! it to the [`HtmlRender`]. +//! So if you want to change how a non-container element renders, just redefine the start +//! function and leave the end function untouched. //! -//! The following example demonstrates how to add an anchor for every headline and use -//! your own error type. +//! # Serde //! -//! [`HtmlHandler`]: export/trait.HtmlHandler.html -//! [`HtmlRender`]: export/struct.HtmlRender.html +//! `Org` struct have already implemented serde's `Serialize` trait. It means you can +//! freely serialize it into any format that serde supports such as json: //! //! ```rust -//! use orgize::{export::*, headline::Headline}; -//! use slugify::slugify; -//! use std::io::{Cursor, Error as IOError, Write}; -//! use std::string::FromUtf8Error; +//! use serde_json::to_string; +//! # use orgize::Org; +//! # +//! # let org = Org::parse(r#"* Title 1 +//! # *Section 1* +//! # ** Title 2 +//! # _Section 2_ +//! # * Title 3 +//! # /Section 3/ +//! # * Title 4 +//! # =Section 4="#); //! -//! // custom error type -//! #[derive(Debug)] -//! enum Error { -//! IO(IOError), -//! Headline, -//! Utf8(FromUtf8Error), -//! } -//! -//! // From trait is required for custom error type -//! impl From for Error { -//! fn from(err: IOError) -> Error { -//! Error::IO(err) -//! } -//! } -//! -//! struct CustomHtmlHandler; -//! -//! impl HtmlHandler for CustomHtmlHandler { -//! fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), Error> { -//! if hdl.level > 6 { -//! Err(Error::Headline) -//! } else { -//! write!( -//! w, -//! r##""##, -//! hdl.level, -//! slugify!(hdl.title), -//! )?; -//! self.escape(w, hdl.title)?; -//! Ok(write!(w, "", hdl.level)?) -//! } -//! } -//! } -//! -//! fn main() -> Result<(), Error> { -//! let contents = r"* Title 1 -//! *Section 1* -//! ** Title 2 -//! _Section 2_ -//! * Title 3 -//! /Section 3/ -//! * Title 4 -//! =Section 4="; -//! -//! let mut cursor = Cursor::new(Vec::new()); -//! let mut render = HtmlRender::new(CustomHtmlHandler, &mut cursor, &contents); -//! -//! render.render()?; -//! -//! assert_eq!( -//! String::from_utf8(cursor.into_inner()).map_err(Error::Utf8)?, -//! "

Title 1

Section 1

\ -//!

Title 2

Section 2

\ -//!

Title 3

Section 3

\ -//!

Title 4

Section 4

" -//! ); -//! -//! Ok(()) -//! } +//! println!("{}", to_string(&org).unwrap()); //! ``` pub mod elements; diff --git a/src/org.rs b/src/org.rs index ecae501..51870ad 100644 --- a/src/org.rs +++ b/src/org.rs @@ -1,6 +1,6 @@ use indextree::{Arena, NodeId}; use jetscii::bytes; -use memchr::{memchr, memchr_iter, memrchr_iter}; +use memchr::{memchr, memchr_iter}; use std::io::{Error, Write}; use crate::elements::*; @@ -80,13 +80,11 @@ impl<'a> Org<'a> { if begin < end { let off = Headline::find_level(&self.text[begin..end], std::usize::MAX); if off != 0 { - let (contents_begin, contents_end) = - skip_empty_lines(&self.text[begin..begin + off]); let section = Element::Section { begin, end: begin + off, - contents_begin: begin + contents_begin, - contents_end: begin + contents_end, + contents_begin: begin, + contents_end: begin + off, }; let new_node = self.arena.new_node(section); node.append(new_node, &mut self.arena).unwrap(); @@ -236,7 +234,7 @@ impl<'a> Org<'a> { if let Some((ty, off)) = self.parse_element(begin, end) { let new_node = self.arena.new_node(ty); node.append(new_node, &mut self.arena).unwrap(); - pos += off; + pos += off + skip_empty_lines(&text[off..]); } let mut last_end = pos; @@ -247,12 +245,17 @@ impl<'a> Org<'a> { .iter() .all(u8::is_ascii_whitespace) { - let (end, _) = skip_empty_lines(&text[pos + i..]); + let end = skip_empty_lines(&text[pos + i..]); let new_node = self.arena.new_node(Element::Paragraph { begin: begin + last_end, end: begin + pos + i + end, contents_begin: begin + last_end, - contents_end: begin + pos, + contents_end: begin + + if text.as_bytes()[pos - 1] == b'\n' { + pos - 1 + } else { + pos + }, }); node.append(new_node, &mut self.arena).unwrap(); pos += i + end; @@ -263,13 +266,18 @@ impl<'a> Org<'a> { begin: begin + last_end, end: begin + pos, contents_begin: begin + last_end, - contents_end: begin + pos, + contents_end: begin + + if text.as_bytes()[pos - 1] == b'\n' { + pos - 1 + } else { + pos + }, }); node.append(new_node, &mut self.arena).unwrap(); } let new_node = self.arena.new_node(ty); node.append(new_node, &mut self.arena).unwrap(); - pos += off; + pos += off + skip_empty_lines(&text[pos + off..]); last_end = pos; } else { pos += i + 1; @@ -755,9 +763,8 @@ impl<'a> Org<'a> { } } -fn skip_empty_lines(text: &str) -> (usize, usize) { +fn skip_empty_lines(text: &str) -> usize { let mut i = 0; - let mut j = text.len(); for pos in memchr_iter(b'\n', text.as_bytes()) { if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) { i = pos + 1; @@ -765,14 +772,15 @@ fn skip_empty_lines(text: &str) -> (usize, usize) { break; } } - - for pos in memrchr_iter(b'\n', text.as_bytes()) { - if text.as_bytes()[pos..j].iter().all(u8::is_ascii_whitespace) { - j = pos; - } else { - break; - } - } - - (i, j) + i +} + +#[test] +fn test_skip_empty_lines() { + assert_eq!(skip_empty_lines("foo"), 0); + assert_eq!(skip_empty_lines(" foo"), 0); + assert_eq!(skip_empty_lines(" \nfoo\n"), " \n".len()); + assert_eq!(skip_empty_lines(" \n\n\nfoo\n"), " \n\n\n".len()); + assert_eq!(skip_empty_lines(" \n \n\nfoo\n"), " \n \n\n".len()); + assert_eq!(skip_empty_lines(" \n \n\n foo\n"), " \n \n\n".len()); }