diff --git a/Cargo.toml b/Cargo.toml index 56f935f..a421ff6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,19 +17,18 @@ travis-ci = { repository = "PoiScript/orgize" } [features] default = ["serde"] -extra-serde-info = ["serde"] [dependencies] bytecount = "0.5.1" chrono = { version = "0.4.7", optional = true } -indextree = "3.2.0" +indextree = "3.3.0" jetscii = "0.4.4" -memchr = "2.2.0" -serde = { version = "1.0.94", optional = true, features = ["derive"] } +memchr = "2.2.1" +serde = { version = "1.0.97", optional = true, features = ["derive"] } nom = "5.0.0" [dev-dependencies] lazy_static = "1.3.0" pretty_assertions = "0.6.1" -serde_json = "1.0.39" +serde_json = "1.0.40" slugify = "0.1.0" diff --git a/README.md b/README.md index 5bd5c6d..d98571b 100644 --- a/README.md +++ b/README.md @@ -190,8 +190,6 @@ By now, orgize provides three features: + `serde`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default. -+ `extra-serde-info`: includes the position information while serializing, disabled by default. - + `chrono`: adds the ability to convert `Datetime` into `chrono` struct, disabled by default. ## License diff --git a/src/elements/block.rs b/src/elements/block.rs index bf5cd9f..9cf5df0 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -9,13 +9,11 @@ pub struct Block<'a> { pub name: &'a str, #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))] pub args: Option<&'a str>, - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - pub contents: &'a str, } impl Block<'_> { #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>)> { + pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> { debug_assert!(text.starts_with("#+")); if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" { @@ -40,11 +38,8 @@ impl Block<'_> { if text[pos..i].trim().eq_ignore_ascii_case(&end) { return Some(( &text[i + 1..], - Element::Block(Block { - name, - args, - contents: &text[off..pos], - }), + Element::Block(Block { name, args }), + &text[off..pos], )); } @@ -52,14 +47,7 @@ impl Block<'_> { } if text[pos..].trim().eq_ignore_ascii_case(&end) { - Some(( - "", - Element::Block(Block { - name, - args, - contents: &text[off..pos], - }), - )) + Some(("", Element::Block(Block { name, args }), &text[off..pos])) } else { None } @@ -75,8 +63,8 @@ fn parse() { Element::Block(Block { name: "SRC", args: None, - contents: "" }), + "" )) ); assert_eq!( @@ -86,8 +74,8 @@ fn parse() { Element::Block(Block { name: "SRC", args: Some("javascript"), - contents: "console.log('Hello World!');\n" }), + "console.log('Hello World!');\n" )) ); // TODO: more testing diff --git a/src/elements/drawer.rs b/src/elements/drawer.rs index 7c274ce..69c528f 100644 --- a/src/elements/drawer.rs +++ b/src/elements/drawer.rs @@ -7,13 +7,11 @@ use crate::elements::Element; #[derive(Debug)] pub struct Drawer<'a> { pub name: &'a str, - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - pub contents: &'a str, } impl Drawer<'_> { #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>)> { + pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> { debug_assert!(text.starts_with(':')); let mut lines = memchr_iter(b'\n', text.as_bytes()); @@ -36,8 +34,8 @@ impl Drawer<'_> { &text[i + 1..], Element::Drawer(Drawer { name: &name[0..name.len() - 1], - contents: &text[off..pos], }), + &text[off..pos], )); } pos = i + 1; @@ -48,8 +46,8 @@ impl Drawer<'_> { "", Element::Drawer(Drawer { name: &name[0..name.len() - 1], - contents: &text[off..pos], }), + &text[off..pos], )) } else { None @@ -63,10 +61,8 @@ fn parse() { Drawer::parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"), Some(( "", - Element::Drawer(Drawer { - name: "PROPERTIES", - contents: " :CUSTOM_ID: id\n" - }) + Element::Drawer(Drawer { name: "PROPERTIES" }), + " :CUSTOM_ID: id\n" )) ) } diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index 5fada24..ced6b5c 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -9,14 +9,11 @@ pub struct DynBlock<'a> { pub block_name: &'a str, #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))] pub arguments: Option<&'a str>, - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - pub contents: &'a str, } impl DynBlock<'_> { #[inline] - // return (dyn_block, contents-begin, contents-end, end) - pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>)> { + pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> { debug_assert!(text.starts_with("#+")); if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") { @@ -50,8 +47,8 @@ impl DynBlock<'_> { Element::DynBlock(DynBlock { block_name: name, arguments: para, - contents: &text[off..pos], }), + &text[off..pos], )); } @@ -64,8 +61,8 @@ impl DynBlock<'_> { Element::DynBlock(DynBlock { block_name: name, arguments: para, - contents: &text[off..pos], }), + &text[off..pos], )) } else { None @@ -83,8 +80,8 @@ fn parse() { Element::DynBlock(DynBlock { block_name: "clocktable", arguments: Some(":scope file"), - contents: "CONTENTS\n" - },) + }), + "CONTENTS\n" )) ); } diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs index 7859cd0..c3680fa 100644 --- a/src/elements/fn_def.rs +++ b/src/elements/fn_def.rs @@ -1,39 +1,32 @@ use memchr::memchr; +use nom::{ + bytes::complete::{tag, take_while1}, + IResult, +}; #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "serde", derive(serde::Serialize))] #[derive(Debug)] pub struct FnDef<'a> { pub label: &'a str, - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - pub contents: &'a str, +} + +fn parse_label(input: &str) -> IResult<&str, &str> { + let (input, _) = tag("[fn:")(input)?; + let (input, label) = + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?; + let (input, _) = tag("]")(input)?; + Ok((input, label)) } impl FnDef<'_> { #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, FnDef<'_>)> { - if text.starts_with("[fn:") { - let (label, off) = memchr(b']', text.as_bytes()) - .filter(|&i| { - i != 4 - && text.as_bytes()["[fn:".len()..i] - .iter() - .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') - }) - .map(|i| (&text["[fn:".len()..i], i + 1))?; + pub(crate) fn parse(text: &str) -> Option<(&str, FnDef<'_>, &str)> { + let (tail, label) = parse_label(text).ok()?; - let end = memchr(b'\n', text.as_bytes()).unwrap_or_else(|| text.len()); + let end = memchr(b'\n', tail.as_bytes()).unwrap_or_else(|| tail.len()); - Some(( - &text[end..], - FnDef { - label, - contents: &text[off..end], - }, - )) - } else { - None - } + Some((&tail[end..], FnDef { label }, &tail[0..end])) } } @@ -41,43 +34,19 @@ impl FnDef<'_> { fn parse() { assert_eq!( FnDef::parse("[fn:1] https://orgmode.org"), - Some(( - "", - FnDef { - label: "1", - contents: " https://orgmode.org" - }, - )) + Some(("", FnDef { label: "1" }, " https://orgmode.org")) ); assert_eq!( FnDef::parse("[fn:word_1] https://orgmode.org"), - Some(( - "", - FnDef { - label: "word_1", - contents: " https://orgmode.org" - }, - )) + Some(("", FnDef { label: "word_1" }, " https://orgmode.org")) ); assert_eq!( FnDef::parse("[fn:WORD-1] https://orgmode.org"), - Some(( - "", - FnDef { - label: "WORD-1", - contents: " https://orgmode.org" - }, - )) + Some(("", FnDef { label: "WORD-1" }, " https://orgmode.org")) ); assert_eq!( FnDef::parse("[fn:WORD]"), - Some(( - "", - FnDef { - label: "WORD", - contents: "" - }, - )) + Some(("", FnDef { label: "WORD" }, "")) ); assert_eq!(FnDef::parse("[fn:] https://orgmode.org"), None); assert_eq!(FnDef::parse("[fn:wor d] https://orgmode.org"), None); diff --git a/src/elements/headline.rs b/src/elements/headline.rs index 7805bba..92d38a5 100644 --- a/src/elements/headline.rs +++ b/src/elements/headline.rs @@ -22,12 +22,13 @@ pub struct Headline<'a> { /// headline keyword #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))] pub keyword: Option<&'a str>, - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - pub contents: &'a str, } impl Headline<'_> { - pub(crate) fn parse<'a>(text: &'a str, config: &ParseConfig<'_>) -> (&'a str, Headline<'a>) { + pub(crate) fn parse<'a>( + text: &'a str, + config: &ParseConfig<'_>, + ) -> (&'a str, Headline<'a>, &'a str) { let level = memchr2(b'\n', b' ', text.as_bytes()).unwrap_or_else(|| text.len()); debug_assert!(level > 0); @@ -55,8 +56,8 @@ impl Headline<'_> { priority: None, title: "", tags: Vec::new(), - contents: &text[off..end], }, + &text[off..end], ); } @@ -108,8 +109,8 @@ impl Headline<'_> { priority, title, tags: tags.split(':').filter(|s| !s.is_empty()).collect(), - contents: &text[off..end], }, + &text[off..end], ) } @@ -165,8 +166,8 @@ fn parse() { keyword: Some("DONE"), title: "COMMENT Title", tags: vec!["tag", "a2%"], - contents: "" }, + "" ) ); assert_eq!( @@ -179,8 +180,8 @@ fn parse() { tags: vec!["tag", "a2%"], title: "ToDO [#A] COMMENT Title", keyword: None, - contents: "" }, + "" ) ); assert_eq!( @@ -193,8 +194,8 @@ fn parse() { tags: vec!["tag", "a2%"], title: "T0DO [#A] COMMENT Title", keyword: None, - contents: "" }, + "" ) ); assert_eq!( @@ -207,8 +208,8 @@ fn parse() { tags: vec!["tag", "a2%"], title: "[#1] COMMENT Title", keyword: Some("DONE"), - contents: "", }, + "" ) ); assert_eq!( @@ -221,8 +222,8 @@ fn parse() { tags: vec!["tag", "a2%"], title: "[#a] COMMENT Title", keyword: Some("DONE"), - contents: "", }, + "" ) ); assert_eq!( @@ -235,8 +236,8 @@ fn parse() { tags: Vec::new(), title: "COMMENT Title :tag:a2%", keyword: Some("DONE"), - contents: "" }, + "" ) ); assert_eq!( @@ -249,8 +250,8 @@ fn parse() { tags: Vec::new(), title: "COMMENT Title tag:a2%:", keyword: Some("DONE"), - contents: "" }, + "" ) ); assert_eq!( @@ -263,8 +264,8 @@ fn parse() { tags: Vec::new(), title: "COMMENT Title tag:a2%:", keyword: None, - contents: "" }, + "" ) ); } @@ -287,8 +288,8 @@ fn parse_todo_keywords() { keyword: None, title: "DONE [#A] COMMENT Title", tags: vec!["tag", "a2%"], - contents: "" }, + "" ) ); assert_eq!( @@ -307,8 +308,8 @@ fn parse_todo_keywords() { keyword: Some("TASK"), title: "COMMENT Title", tags: vec!["tag", "a2%"], - contents: "" }, + "" ) ); } diff --git a/src/elements/list.rs b/src/elements/list.rs index 2deac9e..326f501 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -4,16 +4,14 @@ use std::iter::once; #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "serde", derive(serde::Serialize))] #[derive(Debug)] -pub struct List<'a> { +pub struct List { pub indent: usize, pub ordered: bool, - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - pub contents: &'a str, } -impl List<'_> { +impl List { #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, List<'_>)> { + pub(crate) fn parse(text: &str) -> Option<(&str, List, &str)> { let (indent, tail) = text .find(|c| c != ' ') .map(|off| (off, &text[off..])) @@ -33,14 +31,7 @@ impl List<'_> { if line_indent < indent || (line_indent == indent && is_item(&line[line_indent..]).is_none()) { - Some(( - &text[pos..], - List { - indent, - ordered, - contents: &text[0..pos], - }, - )) + Some((&text[pos..], List { indent, ordered }, &text[0..pos])) } else { pos = i; continue; @@ -52,48 +43,20 @@ impl List<'_> { if line_indent < indent || (line_indent == indent && is_item(&line[line_indent..]).is_none()) { - Some(( - &text[pos..], - List { - indent, - ordered, - contents: &text[0..pos], - }, - )) + Some((&text[pos..], List { indent, ordered }, &text[0..pos])) } else { pos = next_i; continue; } } else { - Some(( - &text[next_i..], - List { - indent, - ordered, - contents: &text[0..pos], - }, - )) + Some((&text[next_i..], List { indent, ordered }, &text[0..pos])) } } else { - Some(( - &text[i..], - List { - indent, - ordered, - contents: &text[0..pos], - }, - )) + Some((&text[i..], List { indent, ordered }, &text[0..pos])) }; } - Some(( - &text[pos..], - List { - indent, - ordered, - contents: &text[0..pos], - }, - )) + Some((&text[pos..], List { indent, ordered }, &text[0..pos])) } } @@ -102,12 +65,11 @@ impl List<'_> { #[derive(Debug)] pub struct ListItem<'a> { pub bullet: &'a str, - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - pub contents: &'a str, } impl ListItem<'_> { - pub(crate) fn parse(text: &str, indent: usize) -> (&str, ListItem<'_>) { + #[inline] + pub(crate) fn parse(text: &str, indent: usize) -> (&str, ListItem<'_>, &str) { debug_assert!(&text[0..indent].trim().is_empty()); let off = &text[indent..].find(' ').unwrap() + 1 + indent; @@ -125,8 +87,8 @@ impl ListItem<'_> { &text[pos..], ListItem { bullet: &text[indent..off], - contents: &text[off..pos], }, + &text[off..pos], ); } } @@ -137,8 +99,8 @@ impl ListItem<'_> { "", ListItem { bullet: &text[indent..off], - contents: &text[off..], }, + &text[off..], ) } } @@ -197,8 +159,8 @@ fn list_parse() { List { indent: 0, ordered: false, - contents: "+ item1\n+ item2" }, + "+ item1\n+ item2" )) ); assert_eq!( @@ -208,8 +170,8 @@ fn list_parse() { List { indent: 0, ordered: false, - contents: "* item1\n \n* item2" }, + "* item1\n \n* item2" )) ); assert_eq!( @@ -219,8 +181,8 @@ fn list_parse() { List { indent: 0, ordered: false, - contents: "* item1\n" }, + "* item1\n" )) ); assert_eq!( @@ -230,8 +192,8 @@ fn list_parse() { List { indent: 0, ordered: false, - contents: "* item1\n" }, + "* item1\n" )) ); assert_eq!( @@ -241,8 +203,8 @@ fn list_parse() { List { indent: 0, ordered: false, - contents: "+ item1\n + item2\n" }, + "+ item1\n + item2\n" )) ); assert_eq!( @@ -252,8 +214,8 @@ fn list_parse() { List { indent: 0, ordered: false, - contents: "+ item1\n \n + item2\n \n+ item 3" }, + "+ item1\n \n + item2\n \n+ item 3" )) ); assert_eq!( @@ -263,8 +225,8 @@ fn list_parse() { List { indent: 2, ordered: false, - contents: " + item1\n \n + item2" }, + " + item1\n \n + item2" )) ); assert_eq!( @@ -274,8 +236,8 @@ fn list_parse() { List { indent: 0, ordered: false, - contents: "+ 1\n\n - 2\n\n - 3\n\n+ 4" }, + "+ 1\n\n - 2\n\n - 3\n\n+ 4" )) ); } diff --git a/src/elements/macros.rs b/src/elements/macros.rs index ae348a9..949e453 100644 --- a/src/elements/macros.rs +++ b/src/elements/macros.rs @@ -44,7 +44,7 @@ fn parse() { Element::Macros(Macros { name: "poem", arguments: Some("red,blue") - },) + }) )) ); assert_eq!( @@ -54,7 +54,7 @@ fn parse() { Element::Macros(Macros { name: "poem", arguments: Some(")") - },) + }) )) ); assert_eq!( @@ -64,7 +64,7 @@ fn parse() { Element::Macros(Macros { name: "author", arguments: None - },) + }) )) ); assert!(Macros::parse("{{{0uthor}}}").is_err()); diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 42d64ff..44a6579 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -56,23 +56,16 @@ pub use self::{ #[derive(Debug)] #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "serde", derive(serde::Serialize))] -#[cfg_attr(feature = "serde", serde(tag = "type"))] -#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] +#[cfg_attr(feature = "serde", serde(tag = "type", rename_all = "snake_case"))] pub enum Element<'a> { Block(Block<'a>), BabelCall(BabelCall<'a>), - Section { - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - contents: &'a str, - }, + Section, Clock(Clock<'a>), Cookie(Cookie<'a>), - RadioTarget(RadioTarget<'a>), + RadioTarget(RadioTarget), Drawer(Drawer<'a>), - Document { - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - contents: &'a str, - }, + Document, DynBlock(DynBlock<'a>), FnDef(FnDef<'a>), FnRef(FnRef<'a>), @@ -81,49 +74,24 @@ pub enum Element<'a> { InlineSrc(InlineSrc<'a>), Keyword(Keyword<'a>), Link(Link<'a>), - List(List<'a>), + List(List), ListItem(ListItem<'a>), Macros(Macros<'a>), Planning(Planning<'a>), Snippet(Snippet<'a>), - Text { - value: &'a str, - }, - Paragraph { - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - contents: &'a str, - }, + Text { value: &'a str }, + Paragraph, Rule, Timestamp(Timestamp<'a>), Target(Target<'a>), - Bold { - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - contents: &'a str, - }, - Strike { - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - contents: &'a str, - }, - Italic { - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - contents: &'a str, - }, - Underline { - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - contents: &'a str, - }, - Verbatim { - value: &'a str, - }, - Code { - value: &'a str, - }, - Comment { - value: &'a str, - }, - FixedWidth { - value: &'a str, - }, + Bold, + Strike, + Italic, + Underline, + Verbatim { value: &'a str }, + Code { value: &'a str }, + Comment { value: &'a str }, + FixedWidth { value: &'a str }, } macro_rules! impl_from { @@ -140,7 +108,6 @@ impl_from!(Block); impl_from!(BabelCall); impl_from!(Clock); impl_from!(Cookie); -impl_from!(RadioTarget); impl_from!(Drawer); impl_from!(DynBlock); impl_from!(FnDef); @@ -150,7 +117,6 @@ impl_from!(InlineCall); impl_from!(InlineSrc); impl_from!(Keyword); impl_from!(Link); -impl_from!(List); impl_from!(ListItem); impl_from!(Macros); impl_from!(Planning); diff --git a/src/elements/radio_target.rs b/src/elements/radio_target.rs index 0f5c48b..71a3f68 100644 --- a/src/elements/radio_target.rs +++ b/src/elements/radio_target.rs @@ -10,14 +10,11 @@ use crate::elements::Element; #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "serde", derive(serde::Serialize))] #[derive(Debug)] -pub struct RadioTarget<'a> { - #[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))] - contents: &'a str, -} +pub struct RadioTarget; -impl RadioTarget<'_> { +impl RadioTarget { #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { + pub(crate) fn parse(input: &str) -> IResult<&str, (Element, &str)> { let (input, _) = tag("<<<")(input)?; let (input, contents) = verify( take_while(|c: char| c != '<' && c != '\n' && c != '>'), @@ -25,7 +22,7 @@ impl RadioTarget<'_> { )(input)?; let (input, _) = tag(">>>")(input)?; - Ok((input, Element::RadioTarget(RadioTarget { contents }))) + Ok((input, (Element::RadioTarget(RadioTarget), contents))) } } @@ -33,16 +30,11 @@ impl RadioTarget<'_> { fn parse() { assert_eq!( RadioTarget::parse("<<>>"), - Ok(("", Element::RadioTarget(RadioTarget { contents: "target" }))) + Ok(("", (Element::RadioTarget(RadioTarget), "target"))) ); assert_eq!( RadioTarget::parse("<<>>"), - Ok(( - "", - Element::RadioTarget(RadioTarget { - contents: "tar get" - },) - )) + Ok(("", (Element::RadioTarget(RadioTarget), "tar get"))) ); assert!(RadioTarget::parse("<<>>").is_err()); assert!(RadioTarget::parse("<<< target>>>").is_err()); diff --git a/src/export/mod.rs b/src/export/mod.rs index fd87f01..99260ce 100644 --- a/src/export/mod.rs +++ b/src/export/mod.rs @@ -1,3 +1,5 @@ pub mod html; +pub mod org; pub use html::{DefaultHtmlHandler, HtmlHandler}; +pub use org::{DefaultOrgHandler, OrgHandler}; diff --git a/src/lib.rs b/src/lib.rs index fe3cc6e..5618724 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -201,8 +201,6 @@ //! //! + `serde`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default. //! -//! + `extra-serde-info`: includes the position information while serializing, disabled by default. -//! //! + `chrono`: adds the ability to convert `Datetime` into `chrono` struct, disabled by default. //! //! # License diff --git a/src/org.rs b/src/org.rs index b2aa315..56a964a 100644 --- a/src/org.rs +++ b/src/org.rs @@ -5,7 +5,7 @@ use std::io::{Error, Write}; use crate::config::ParseConfig; use crate::elements::*; -use crate::export::{DefaultHtmlHandler, HtmlHandler}; +use crate::export::*; use crate::iter::Iter; pub struct Org<'a> { @@ -13,19 +13,104 @@ pub struct Org<'a> { pub(crate) document: NodeId, } +enum Container<'a> { + // List + List { + content: &'a str, + node: NodeId, + indent: usize, + }, + // Block, List Item + Block { + content: &'a str, + node: NodeId, + }, + // Pargraph, Inline Markup + Inline { + content: &'a str, + node: NodeId, + }, + // Headline, Document + Headline { + content: &'a str, + node: NodeId, + }, + // Section + Section { + content: &'a str, + node: NodeId, + }, +} + impl<'a> Org<'a> { pub fn parse(text: &'a str) -> Self { Org::parse_with_config(text, ParseConfig::default()) } - pub fn parse_with_config(text: &'a str, config: ParseConfig<'_>) -> Self { + pub fn parse_with_config(content: &'a str, config: ParseConfig<'_>) -> Self { let mut arena = Arena::new(); - let document = arena.new_node(Element::Document { contents: text }); + let document = arena.new_node(Element::Document); - let mut org = Org { arena, document }; - org.parse_internal(config); + let mut containers = vec![Container::Headline { + content, + node: document, + }]; - org + while let Some(container) = containers.pop() { + match container { + Container::Headline { + mut content, + node: parent, + } => { + if !content.is_empty() { + let off = Headline::find_level(content, std::usize::MAX); + if off != 0 { + let node = arena.new_node(Element::Section); + parent.append(node, &mut arena).unwrap(); + containers.push(Container::Section { + content: &content[0..off], + node, + }); + content = &content[off..]; + } + } + while !content.is_empty() { + let (tail, headline, headline_content) = Headline::parse(content, &config); + let headline = Element::Headline(headline); + let node = arena.new_node(headline); + parent.append(node, &mut arena).unwrap(); + containers.push(Container::Headline { + content: headline_content, + node, + }); + content = tail; + } + } + Container::Section { content, node } => { + // TODO + if let Some((tail, _planning)) = Planning::parse(content) { + parse_elements_children(&mut arena, tail, node, &mut containers); + } else { + parse_elements_children(&mut arena, content, node, &mut containers); + } + } + Container::Block { content, node } => { + parse_elements_children(&mut arena, content, node, &mut containers); + } + Container::Inline { content, node } => { + parse_objects_children(&mut arena, content, node, &mut containers); + } + Container::List { + content, + node, + indent, + } => { + parse_list_items(&mut arena, content, indent, node, &mut containers); + } + } + } + + Org { arena, document } } pub fn iter(&'a self) -> Iter<'a> { @@ -49,376 +134,370 @@ impl<'a> Org<'a> { for event in self.iter() { match event { - Start(e) => handler.start(&mut writer, e)?, - End(e) => handler.end(&mut writer, e)?, + Start(element) => handler.start(&mut writer, element)?, + End(element) => handler.end(&mut writer, element)?, } } Ok(()) } +} - fn parse_internal(&mut self, config: ParseConfig<'_>) { - let mut node = self.document; - loop { - match self.arena[node].data { - Element::Document { mut contents } - | Element::Headline(Headline { mut contents, .. }) => { - if !contents.is_empty() { - let off = Headline::find_level(contents, std::usize::MAX); - if off != 0 { - let section = Element::Section { - contents: &contents[0..off], - }; - let new_node = self.arena.new_node(section); - node.append(new_node, &mut self.arena).unwrap(); - contents = &contents[off..]; - } - } - while !contents.is_empty() { - let (tail, headline) = Headline::parse(contents, &config); - let headline = Element::Headline(headline); - let new_node = self.arena.new_node(headline); - node.append(new_node, &mut self.arena).unwrap(); - contents = tail; - } - } - Element::Section { contents } => { - // TODO - if let Some((tail, _planning)) = Planning::parse(contents) { - self.parse_elements_children(tail, node); - } else { - self.parse_elements_children(contents, node); - } - } - Element::Block(Block { contents, .. }) - | Element::ListItem(ListItem { contents, .. }) => { - self.parse_elements_children(contents, node); - } - Element::Paragraph { contents } - | Element::Bold { contents } - | Element::Underline { contents } - | Element::Italic { contents } - | Element::Strike { contents } => { - self.parse_objects_children(contents, node); - } - Element::List(List { - contents, indent, .. - }) => { - self.parse_list_items(contents, indent, node); - } - _ => (), - } +fn parse_elements_children<'a>( + arena: &mut Arena>, + content: &'a str, + parent: NodeId, + containers: &mut Vec>, +) { + let mut tail = skip_empty_lines(content); - if let Some(next_node) = self.next_node(node) { - node = next_node; - } else { - break; - } - } + if let Some((new_tail, element)) = parse_element(content, arena, containers) { + parent.append(element, arena).unwrap(); + tail = skip_empty_lines(new_tail); } - fn next_node(&self, mut node: NodeId) -> Option { - if let Some(child) = self.arena[node].first_child() { - return Some(child); - } + let mut text = tail; + let mut pos = 0; - loop { - if let Some(sibling) = self.arena[node].next_sibling() { - return Some(sibling); - } else if let Some(parent) = self.arena[node].parent() { - node = parent; - } else { - return None; - } - } - } - - fn parse_elements_children(&mut self, input: &'a str, node: NodeId) { - let mut tail = skip_empty_lines(input); - - if let Some((new_tail, element)) = self.parse_element(input) { - let new_node = self.arena.new_node(element); - node.append(new_node, &mut self.arena).unwrap(); - tail = skip_empty_lines(new_tail); - } - - let mut text = tail; - let mut pos = 0; - - while !tail.is_empty() { - let i = memchr(b'\n', tail.as_bytes()) - .map(|i| i + 1) - .unwrap_or_else(|| tail.len()); - if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) { - tail = skip_empty_lines(&tail[i..]); - let new_node = self.arena.new_node(Element::Paragraph { - contents: if text.as_bytes()[pos - 1] == b'\n' { - &text[0..pos - 1] - } else { - &text[0..pos] - }, - }); - node.append(new_node, &mut self.arena).unwrap(); - text = tail; - pos = 0; - } else if let Some((new_tail, element)) = self.parse_element(tail) { - if pos != 0 { - let new_node = self.arena.new_node(Element::Paragraph { - contents: if text.as_bytes()[pos - 1] == b'\n' { - &text[0..pos - 1] - } else { - &text[0..pos] - }, - }); - node.append(new_node, &mut self.arena).unwrap(); - pos = 0; - } - let new_node = self.arena.new_node(element); - node.append(new_node, &mut self.arena).unwrap(); - tail = skip_empty_lines(new_tail); - text = tail; - } else { - tail = &tail[i..]; - pos += i; - } - } - - if !text.is_empty() { - let new_node = self.arena.new_node(Element::Paragraph { - contents: if text.as_bytes()[pos - 1] == b'\n' { - &text[0..pos - 1] - } else { - &text[0..pos] - }, + while !tail.is_empty() { + let i = memchr(b'\n', tail.as_bytes()) + .map(|i| i + 1) + .unwrap_or_else(|| tail.len()); + if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) { + tail = skip_empty_lines(&tail[i..]); + let node = arena.new_node(Element::Paragraph); + parent.append(node, arena).unwrap(); + containers.push(Container::Inline { + content: &text[0..pos].trim_end_matches('\n'), + node, }); - node.append(new_node, &mut self.arena).unwrap(); - } - } - - fn parse_element(&self, contents: &'a str) -> Option<(&'a str, Element<'a>)> { - if let Some((tail, fn_def)) = FnDef::parse(contents) { - let fn_def = Element::FnDef(fn_def); - return Some((tail, fn_def)); - } else if let Some((tail, list)) = List::parse(contents) { - let list = Element::List(list); - return Some((tail, list)); - } - - let tail = contents.trim_start(); - - if let Some((tail, clock)) = Clock::parse(tail) { - return Some((tail, clock)); - } - - // TODO: LaTeX environment - if tail.starts_with("\\begin{") {} - - if tail.starts_with('-') { - if let Ok((tail, rule)) = Rule::parse(tail) { - return Some((tail, rule)); + text = tail; + pos = 0; + } else if let Some((new_tail, element)) = parse_element(tail, arena, containers) { + if pos != 0 { + let node = arena.new_node(Element::Paragraph); + parent.append(node, arena).unwrap(); + containers.push(Container::Inline { + content: &text[0..pos].trim_end_matches('\n'), + node, + }); + pos = 0; } - } - - if tail.starts_with(':') { - if let Some((tail, drawer)) = Drawer::parse(tail) { - return Some((tail, drawer)); - } - } - - if tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n") { - let mut last_end = 1; // ":" - for i in memchr_iter(b'\n', contents.as_bytes()) { - last_end = i + 1; - let line = &contents[last_end..]; - if !(line == ":" || line.starts_with(": ") || line.starts_with(":\n")) { - let fixed_width = Element::FixedWidth { - value: &contents[0..i + 1], - }; - return Some((&contents[i + 1..], fixed_width)); - } - } - let fixed_width = Element::FixedWidth { - value: &contents[0..last_end], - }; - return Some((&contents[last_end..], fixed_width)); - } - - if tail == "#" || tail.starts_with("# ") || tail.starts_with("#\n") { - let mut last_end = 1; // "#" - for i in memchr_iter(b'\n', contents.as_bytes()) { - last_end = i + 1; - let line = &contents[last_end..]; - if !(line == "#" || line.starts_with("# ") || line.starts_with("#\n")) { - let fixed_width = Element::Comment { - value: &contents[0..i + 1], - }; - return Some((&contents[i + 1..], fixed_width)); - } - } - let fixed_width = Element::Comment { - value: &contents[0..last_end], - }; - return Some((&contents[last_end..], fixed_width)); - } - - if tail.starts_with("#+") { - Block::parse(tail) - .or_else(|| DynBlock::parse(tail)) - .or_else(|| Keyword::parse(tail).ok()) + parent.append(element, arena).unwrap(); + tail = skip_empty_lines(new_tail); + text = tail; } else { - None + tail = &tail[i..]; + pos += i; } } - fn parse_objects_children(&mut self, contents: &'a str, node: NodeId) { - let mut tail = contents; + if !text.is_empty() { + let node = arena.new_node(Element::Paragraph); + parent.append(node, arena).unwrap(); + containers.push(Container::Inline { + content: &text[0..pos].trim_end_matches('\n'), + node, + }); + } +} - if let Some((new_tail, obj)) = self.parse_object(tail) { - let new_node = self.arena.new_node(obj); - node.append(new_node, &mut self.arena).unwrap(); - tail = new_tail; +fn parse_element<'a>( + contents: &'a str, + arena: &mut Arena>, + containers: &mut Vec>, +) -> Option<(&'a str, NodeId)> { + if let Some((tail, fn_def, content)) = FnDef::parse(contents) { + let node = arena.new_node(Element::FnDef(fn_def)); + containers.push(Container::Block { content, node }); + return Some((tail, node)); + } else if let Some((tail, list, content)) = List::parse(contents) { + let indent = list.indent; + let node = arena.new_node(Element::List(list)); + containers.push(Container::List { + content, + node, + indent, + }); + return Some((tail, node)); + } + + let tail = contents.trim_start(); + + if let Some((tail, clock)) = Clock::parse(tail) { + return Some((tail, arena.new_node(clock))); + } + + // TODO: LaTeX environment + if tail.starts_with("\\begin{") {} + + if tail.starts_with('-') { + if let Ok((tail, rule)) = Rule::parse(tail) { + return Some((tail, arena.new_node(rule))); } + } - let mut text = tail; - let mut pos = 0; + if tail.starts_with(':') { + if let Some((tail, drawer, content)) = Drawer::parse(tail) { + return Some((tail, arena.new_node(drawer))); + } + } - let bs = bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n'); + // FixedWidth + if tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n") { + let mut last_end = 1; // ":" + for i in memchr_iter(b'\n', contents.as_bytes()) { + last_end = i + 1; + let line = &contents[last_end..]; + if !(line == ":" || line.starts_with(": ") || line.starts_with(":\n")) { + let fixed_width = arena.new_node(Element::FixedWidth { + value: &contents[0..i + 1], + }); + return Some((&contents[i + 1..], fixed_width)); + } + } + let fixed_width = arena.new_node(Element::FixedWidth { + value: &contents[0..last_end], + }); + return Some((&contents[last_end..], fixed_width)); + } - while let Some(off) = bs.find(tail.as_bytes()) { - match tail.as_bytes()[off] { - b'{' => { - if let Some((new_tail, obj)) = self.parse_object(&tail[off..]) { - if pos != 0 { - let new_node = self.arena.new_node(Element::Text { - value: &text[0..pos + off], - }); - node.append(new_node, &mut self.arena).unwrap(); - pos = 0; - } - let new_node = self.arena.new_node(obj); - node.append(new_node, &mut self.arena).unwrap(); - tail = new_tail; - text = new_tail; - } else if let Some((new_tail, obj)) = self.parse_object(&tail[off + 1..]) { - let new_node = self.arena.new_node(Element::Text { - value: &text[0..pos + off + 1], + // Comment + if tail == "#" || tail.starts_with("# ") || tail.starts_with("#\n") { + let mut last_end = 1; // "#" + for i in memchr_iter(b'\n', contents.as_bytes()) { + last_end = i + 1; + let line = &contents[last_end..]; + if !(line == "#" || line.starts_with("# ") || line.starts_with("#\n")) { + let comment = arena.new_node(Element::Comment { + value: &contents[0..i + 1], + }); + return Some((&contents[i + 1..], comment)); + } + } + let comment = arena.new_node(Element::Comment { + value: &contents[0..last_end], + }); + return Some((&contents[last_end..], comment)); + } + + if tail.starts_with("#+") { + if let Some((tail, block, content)) = Block::parse(tail) { + let node = arena.new_node(block); + containers.push(Container::Block { content, node }); + Some((tail, node)) + } else if let Some((tail, dyn_block, content)) = DynBlock::parse(tail) { + let node = arena.new_node(dyn_block); + containers.push(Container::Block { content, node }); + Some((tail, node)) + } else { + Keyword::parse(tail) + .ok() + .map(|(tail, kw)| (tail, arena.new_node(kw))) + } + } else { + None + } +} + +fn parse_objects_children<'a>( + arena: &mut Arena>, + content: &'a str, + parent: NodeId, + containers: &mut Vec>, +) { + let mut tail = content; + + if let Some((new_tail, obj)) = parse_object(tail, arena, containers) { + parent.append(obj, arena).unwrap(); + tail = new_tail; + } + + let mut text = tail; + let mut pos = 0; + + let bs = bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n'); + + while let Some(off) = bs.find(tail.as_bytes()) { + match tail.as_bytes()[off] { + b'{' => { + if let Some((new_tail, obj)) = parse_object(&tail[off..], arena, containers) { + if pos != 0 { + let node = arena.new_node(Element::Text { + value: &text[0..pos + off], }); - node.append(new_node, &mut self.arena).unwrap(); + parent.append(node, arena).unwrap(); pos = 0; - let new_node = self.arena.new_node(obj); - node.append(new_node, &mut self.arena).unwrap(); - tail = new_tail; - text = new_tail; - } else { - tail = &tail[off + 1..]; - pos += off + 1; } + parent.append(obj, arena).unwrap(); + tail = new_tail; + text = new_tail; + continue; + } else if let Some((new_tail, obj)) = + parse_object(&tail[off + 1..], arena, containers) + { + let node = arena.new_node(Element::Text { + value: &text[0..pos + off + 1], + }); + parent.append(node, arena).unwrap(); + pos = 0; + parent.append(obj, arena).unwrap(); + tail = new_tail; + text = new_tail; + continue; } - b' ' | b'(' | b'\'' | b'"' | b'\n' => { - if let Some((new_tail, obj)) = self.parse_object(&tail[off + 1..]) { - let new_node = self.arena.new_node(Element::Text { - value: &text[0..pos + off + 1], + } + b' ' | b'(' | b'\'' | b'"' | b'\n' => { + if let Some((new_tail, obj)) = parse_object(&tail[off + 1..], arena, containers) { + let node = arena.new_node(Element::Text { + value: &text[0..pos + off + 1], + }); + parent.append(node, arena).unwrap(); + pos = 0; + parent.append(obj, arena).unwrap(); + tail = new_tail; + text = new_tail; + continue; + } + } + _ => { + if let Some((new_tail, obj)) = parse_object(&tail[off..], arena, containers) { + if pos != 0 { + let node = arena.new_node(Element::Text { + value: &text[0..pos + off], }); - node.append(new_node, &mut self.arena).unwrap(); + parent.append(node, arena).unwrap(); pos = 0; - let new_node = self.arena.new_node(obj); - node.append(new_node, &mut self.arena).unwrap(); - tail = new_tail; - text = new_tail; - } else { - tail = &tail[off + 1..]; - pos += off + 1; - } - } - _ => { - if let Some((new_tail, obj)) = self.parse_object(&tail[off..]) { - if pos != 0 { - let new_node = self.arena.new_node(Element::Text { - value: &text[0..pos + off], - }); - node.append(new_node, &mut self.arena).unwrap(); - pos = 0; - } - let new_node = self.arena.new_node(obj); - node.append(new_node, &mut self.arena).unwrap(); - tail = new_tail; - text = new_tail; - } else { - tail = &tail[off + 1..]; - pos += off + 1; } + parent.append(obj, arena).unwrap(); + tail = new_tail; + text = new_tail; + continue; } } } - - if !text.is_empty() { - let new_node = self.arena.new_node(Element::Text { value: text }); - node.append(new_node, &mut self.arena).unwrap(); - } + tail = &tail[off + 1..]; + pos += off + 1; } - fn parse_object(&self, contents: &'a str) -> Option<(&'a str, Element<'a>)> { - if contents.len() < 3 { - return None; - } + if !text.is_empty() { + let node = arena.new_node(Element::Text { value: text }); + parent.append(node, arena).unwrap(); + } +} - let bytes = contents.as_bytes(); - match bytes[0] { - b'@' => Snippet::parse(contents).ok(), - b'{' => Macros::parse(contents).ok(), - b'<' => RadioTarget::parse(contents) - .or_else(|_| Target::parse(contents)) - .or_else(|_| { - Timestamp::parse_active(contents) - .map(|(tail, timestamp)| (tail, timestamp.into())) - }) - .or_else(|_| { - Timestamp::parse_diary(contents) - .map(|(tail, timestamp)| (tail, timestamp.into())) - }) - .ok(), - b'[' => { - if contents[1..].starts_with("fn:") { - FnRef::parse(contents).map(|(tail, fn_ref)| (tail, fn_ref.into())) - } else if bytes[1] == b'[' { - Link::parse(contents).ok() - } else { - Cookie::parse(contents) - .map(|(tail, cookie)| (tail, cookie.into())) - .or_else(|| { - Timestamp::parse_inactive(contents) - .map(|(tail, timestamp)| (tail, timestamp.into())) - .ok() - }) - } - } - b'*' => parse_emphasis(contents, b'*') - .map(|(tail, contents)| (tail, Element::Bold { contents })), - b'+' => parse_emphasis(contents, b'+') - .map(|(tail, contents)| (tail, Element::Strike { contents })), - b'/' => parse_emphasis(contents, b'/') - .map(|(tail, contents)| (tail, Element::Italic { contents })), - b'_' => parse_emphasis(contents, b'_') - .map(|(tail, contents)| (tail, Element::Underline { contents })), - b'=' => parse_emphasis(contents, b'=') - .map(|(tail, value)| (tail, Element::Verbatim { value })), - b'~' => { - parse_emphasis(contents, b'~').map(|(tail, value)| (tail, Element::Code { value })) - } - b's' if contents.starts_with("src_") => InlineSrc::parse(contents).ok(), - b'c' if contents.starts_with("call_") => InlineCall::parse(contents).ok(), - _ => None, - } +fn parse_object<'a>( + contents: &'a str, + arena: &mut Arena>, + containers: &mut Vec>, +) -> Option<(&'a str, NodeId)> { + if contents.len() < 3 { + return None; } - fn parse_list_items(&mut self, mut contents: &'a str, indent: usize, node: NodeId) { - while !contents.is_empty() { - let (tail, list_item) = ListItem::parse(contents, indent); - let list_item = Element::ListItem(list_item); - let new_node = self.arena.new_node(list_item); - node.append(new_node, &mut self.arena).unwrap(); - contents = tail; + let bytes = contents.as_bytes(); + match bytes[0] { + b'@' => Snippet::parse(contents) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))), + b'{' => Macros::parse(contents) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))), + b'<' => RadioTarget::parse(contents) + .map(|(tail, (radio, content))| (tail, radio)) + .or_else(|_| Target::parse(contents)) + .or_else(|_| { + Timestamp::parse_active(contents).map(|(tail, timestamp)| (tail, timestamp.into())) + }) + .or_else(|_| { + Timestamp::parse_diary(contents).map(|(tail, timestamp)| (tail, timestamp.into())) + }) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))), + b'[' => { + if contents[1..].starts_with("fn:") { + FnRef::parse(contents) + .map(|(tail, fn_ref)| (tail, fn_ref.into())) + .map(|(tail, element)| (tail, arena.new_node(element))) + } else if bytes[1] == b'[' { + Link::parse(contents) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))) + } else { + Cookie::parse(contents) + .map(|(tail, cookie)| (tail, cookie.into())) + .or_else(|| { + Timestamp::parse_inactive(contents) + .map(|(tail, timestamp)| (tail, timestamp.into())) + .ok() + }) + .map(|(tail, element)| (tail, arena.new_node(element))) + } } + b'*' => { + if let Some((tail, content)) = parse_emphasis(contents, b'*') { + let node = arena.new_node(Element::Bold); + containers.push(Container::Inline { content, node }); + Some((tail, node)) + } else { + None + } + } + b'+' => { + if let Some((tail, content)) = parse_emphasis(contents, b'+') { + let node = arena.new_node(Element::Strike); + containers.push(Container::Inline { content, node }); + Some((tail, node)) + } else { + None + } + } + b'/' => { + if let Some((tail, content)) = parse_emphasis(contents, b'/') { + let node = arena.new_node(Element::Italic); + containers.push(Container::Inline { content, node }); + Some((tail, node)) + } else { + None + } + } + b'_' => { + if let Some((tail, content)) = parse_emphasis(contents, b'_') { + let node = arena.new_node(Element::Underline); + containers.push(Container::Inline { content, node }); + Some((tail, node)) + } else { + None + } + } + b'=' => parse_emphasis(contents, b'=') + .map(|(tail, value)| (tail, arena.new_node(Element::Verbatim { value }))), + b'~' => parse_emphasis(contents, b'~') + .map(|(tail, value)| (tail, arena.new_node(Element::Code { value }))), + b's' => InlineSrc::parse(contents) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))), + b'c' => InlineCall::parse(contents) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))), + _ => None, + } +} + +fn parse_list_items<'a>( + arena: &mut Arena>, + mut contents: &'a str, + indent: usize, + parent: NodeId, + containers: &mut Vec>, +) { + while !contents.is_empty() { + let (tail, list_item, content) = ListItem::parse(contents, indent); + let list_item = Element::ListItem(list_item); + let node = arena.new_node(list_item); + parent.append(node, arena).unwrap(); + containers.push(Container::Block { content, node }); + contents = tail; } }