diff --git a/src/elements/block.rs b/src/elements/block.rs index 83bb027..1a4356e 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -2,71 +2,7 @@ use std::borrow::Cow; use nom::{bytes::complete::tag_no_case, character::complete::alpha1, sequence::preceded, IResult}; -use crate::parsers::{take_lines_till, take_until_eol}; - -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct Block<'a> { - pub name: Cow<'a, str>, - pub args: Option>, -} - -impl Block<'_> { - #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, (Block<'_>, &str)> { - let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?; - let (input, args) = take_until_eol(input)?; - let end_line = format!(r"#+END_{}", name); - let (input, contents) = - take_lines_till(|line| line.eq_ignore_ascii_case(&end_line))(input)?; - - Ok(( - input, - ( - Block { - name: name.into(), - args: if args.is_empty() { - None - } else { - Some(args.into()) - }, - }, - contents, - ), - )) - } -} - -#[test] -fn parse() { - assert_eq!( - Block::parse("#+BEGIN_SRC\n#+END_SRC"), - Ok(( - "", - ( - Block { - name: "SRC".into(), - args: None, - }, - "" - ) - )) - ); - assert_eq!( - Block::parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"), - Ok(( - "", - ( - Block { - name: "SRC".into(), - args: Some("javascript".into()), - }, - "console.log('Hello World!');\n" - ) - )) - ); - // TODO: more testing -} +use crate::parsers::{line, take_lines_while}; #[derive(Debug)] #[cfg_attr(test, derive(PartialEq))] @@ -129,3 +65,60 @@ pub struct SourceBlock<'a> { pub language: Cow<'a, str>, pub arguments: Cow<'a, str>, } + +pub(crate) fn parse_block_element(input: &str) -> IResult<&str, (&str, Option<&str>, &str)> { + let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?; + let (input, args) = line(input)?; + let end_line = format!(r"#+END_{}", name); + let (input, contents) = + take_lines_while(|line| !line.trim().eq_ignore_ascii_case(&end_line))(input)?; + let (input, _) = line(input)?; + + Ok(( + input, + ( + name, + if args.trim().is_empty() { + None + } else { + Some(args.trim()) + }, + contents, + ), + )) +} + +#[test] +fn parse() { + assert_eq!( + parse_block_element( + r#"#+BEGIN_SRC +#+END_SRC"# + ), + Ok(("", ("SRC".into(), None, ""))) + ); + assert_eq!( + parse_block_element( + r#"#+begin_src + #+end_src"# + ), + Ok(("", ("src".into(), None, ""))) + ); + assert_eq!( + parse_block_element( + r#"#+BEGIN_SRC javascript +console.log('Hello World!'); +#+END_SRC +"# + ), + Ok(( + "", + ( + "SRC".into(), + Some("javascript".into()), + "console.log('Hello World!');\n" + ) + )) + ); + // TODO: more testing +} diff --git a/src/elements/clock.rs b/src/elements/clock.rs index c764823..5455699 100644 --- a/src/elements/clock.rs +++ b/src/elements/clock.rs @@ -8,7 +8,7 @@ use nom::{ IResult, }; -use crate::elements::{Datetime, Element, Timestamp}; +use crate::elements::{Datetime, Timestamp}; use crate::parsers::eol; /// clock elements @@ -40,7 +40,7 @@ pub enum Clock<'a> { } impl Clock<'_> { - pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { + pub(crate) fn parse(input: &str) -> IResult<&str, Clock<'_>> { let (input, _) = tag("CLOCK:")(input)?; let (input, _) = space0(input)?; let (input, timestamp) = Timestamp::parse_inactive(input)?; @@ -60,13 +60,13 @@ impl Clock<'_> { let (input, _) = eol(input)?; Ok(( input, - Element::Clock(Clock::Closed { + Clock::Closed { start, end, repeater, delay, duration: duration.into(), - }), + }, )) } Timestamp::Inactive { @@ -77,11 +77,11 @@ impl Clock<'_> { let (input, _) = eol(input)?; Ok(( input, - Element::Clock(Clock::Running { + Clock::Running { start, repeater, delay, - }), + }, )) } _ => unreachable!( @@ -148,7 +148,7 @@ fn parse() { Clock::parse("CLOCK: [2003-09-16 Tue 09:39]"), Ok(( "", - Element::Clock(Clock::Running { + Clock::Running { start: Datetime { year: 2003, month: 9, @@ -159,14 +159,14 @@ fn parse() { }, repeater: None, delay: None, - }) + } )) ); assert_eq!( Clock::parse("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00"), Ok(( "", - Element::Clock(Clock::Closed { + Clock::Closed { start: Datetime { year: 2003, month: 9, @@ -186,7 +186,7 @@ fn parse() { repeater: None, delay: None, duration: "1:00".into(), - }) + } )) ); } diff --git a/src/elements/drawer.rs b/src/elements/drawer.rs index 516107d..a2ce9b8 100644 --- a/src/elements/drawer.rs +++ b/src/elements/drawer.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use crate::parsers::{eol, take_lines_till}; +use crate::parsers::{eol, line, take_lines_while}; use nom::{ bytes::complete::{tag, take_while1}, @@ -24,7 +24,9 @@ impl Drawer<'_> { tag(":"), )(input)?; let (input, _) = eol(input)?; - let (input, contents) = take_lines_till(|line| line.eq_ignore_ascii_case(":END:"))(input)?; + let (input, contents) = + take_lines_while(|line| !line.trim().eq_ignore_ascii_case(":END:"))(input)?; + let (input, _) = line(input)?; Ok((input, (Drawer { name: name.into() }, contents))) } diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index 9da0519..ca46813 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -1,7 +1,6 @@ use std::borrow::Cow; -use crate::elements::Element; -use crate::parsers::{take_lines_till, take_until_eol}; +use crate::parsers::{line, take_lines_while}; use nom::{ bytes::complete::tag_no_case, @@ -20,25 +19,26 @@ pub struct DynBlock<'a> { impl DynBlock<'_> { #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, (Element<'_>, &str)> { + pub(crate) fn parse(input: &str) -> IResult<&str, (DynBlock<'_>, &str)> { let (input, _) = tag_no_case("#+BEGIN:")(input)?; let (input, _) = space1(input)?; let (input, name) = alpha1(input)?; - let (input, args) = take_until_eol(input)?; - - let (input, contents) = take_lines_till(|line| line.eq_ignore_ascii_case("#+END:"))(input)?; + let (input, args) = line(input)?; + let (input, contents) = + take_lines_while(|line| !line.trim().eq_ignore_ascii_case("#+END:"))(input)?; + let (input, _) = line(input)?; Ok(( input, ( - Element::DynBlock(DynBlock { + DynBlock { block_name: name.into(), - arguments: if args.is_empty() { + arguments: if args.trim().is_empty() { None } else { - Some(args.into()) + Some(args.trim().into()) }, - }), + }, contents, ), )) @@ -49,14 +49,19 @@ impl DynBlock<'_> { fn parse() { // TODO: testing assert_eq!( - DynBlock::parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"), + DynBlock::parse( + r#"#+BEGIN: clocktable :scope file +CONTENTS +#+END: +"# + ), Ok(( "", ( - Element::DynBlock(DynBlock { + DynBlock { block_name: "clocktable".into(), arguments: Some(":scope file".into()), - }), + }, "CONTENTS\n" ) )) diff --git a/src/elements/emphasis.rs b/src/elements/emphasis.rs index 5d580c5..c894e7d 100644 --- a/src/elements/emphasis.rs +++ b/src/elements/emphasis.rs @@ -2,7 +2,7 @@ use bytecount::count; use memchr::memchr_iter; #[inline] -pub(crate) fn parse(text: &str, marker: u8) -> Option<(&str, &str)> { +pub(crate) fn parse_emphasis(text: &str, marker: u8) -> Option<(&str, &str)> { debug_assert!(text.len() >= 3); let bytes = text.as_bytes(); @@ -35,19 +35,14 @@ fn validate_marker(pos: usize, text: &str) -> bool { } } -#[cfg(test)] -mod tests { - #[test] - fn parse() { - use super::parse; - - assert_eq!(parse("*bold*", b'*'), Some(("", "bold"))); - assert_eq!(parse("*bo*ld*", b'*'), Some(("", "bo*ld"))); - assert_eq!(parse("*bo\nld*", b'*'), Some(("", "bo\nld"))); - assert_eq!(parse("*bold*a", b'*'), None); - assert_eq!(parse("*bold*", b'/'), None); - assert_eq!(parse("*bold *", b'*'), None); - assert_eq!(parse("* bold*", b'*'), None); - assert_eq!(parse("*b\nol\nd*", b'*'), None); - } +#[test] +fn parse() { + assert_eq!(parse_emphasis("*bold*", b'*'), Some(("", "bold"))); + assert_eq!(parse_emphasis("*bo*ld*", b'*'), Some(("", "bo*ld"))); + assert_eq!(parse_emphasis("*bo\nld*", b'*'), Some(("", "bo\nld"))); + assert_eq!(parse_emphasis("*bold*a", b'*'), None); + assert_eq!(parse_emphasis("*bold*", b'/'), None); + assert_eq!(parse_emphasis("*bold *", b'*'), None); + assert_eq!(parse_emphasis("* bold*", b'*'), None); + assert_eq!(parse_emphasis("*b\nol\nd*", b'*'), None); } diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs index ca4efbe..541c99b 100644 --- a/src/elements/fn_def.rs +++ b/src/elements/fn_def.rs @@ -1,12 +1,13 @@ use std::borrow::Cow; -use memchr::memchr; use nom::{ bytes::complete::{tag, take_while1}, sequence::delimited, IResult, }; +use crate::parsers::line; + #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] #[derive(Debug)] @@ -14,29 +15,24 @@ pub struct FnDef<'a> { pub label: Cow<'a, str>, } -fn parse_label(input: &str) -> IResult<&str, &str> { - let (input, label) = delimited( - tag("[fn:"), - take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), - tag("]"), - )(input)?; - - Ok((input, label)) -} - impl FnDef<'_> { #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, FnDef<'_>, &str)> { - let (tail, label) = parse_label(text).ok()?; + pub(crate) fn parse(input: &str) -> IResult<&str, (FnDef<'_>, &str)> { + let (input, label) = delimited( + tag("[fn:"), + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), + tag("]"), + )(input)?; + let (input, content) = line(input)?; - let end = memchr(b'\n', tail.as_bytes()).unwrap_or_else(|| tail.len()); - - Some(( - &tail[end..], - FnDef { - label: label.into(), - }, - &tail[0..end], + Ok(( + input, + ( + FnDef { + label: label.into(), + }, + content, + ), )) } } @@ -45,39 +41,46 @@ impl FnDef<'_> { fn parse() { assert_eq!( FnDef::parse("[fn:1] https://orgmode.org"), - Some(("", FnDef { label: "1".into() }, " https://orgmode.org")) + Ok(("", (FnDef { label: "1".into() }, " https://orgmode.org"))) ); assert_eq!( FnDef::parse("[fn:word_1] https://orgmode.org"), - Some(( + Ok(( "", - FnDef { - label: "word_1".into() - }, - " https://orgmode.org" + ( + FnDef { + label: "word_1".into() + }, + " https://orgmode.org" + ) )) ); assert_eq!( FnDef::parse("[fn:WORD-1] https://orgmode.org"), - Some(( + Ok(( "", - FnDef { - label: "WORD-1".into() - }, - " https://orgmode.org" + ( + FnDef { + label: "WORD-1".into() + }, + " https://orgmode.org" + ) )) ); assert_eq!( FnDef::parse("[fn:WORD]"), - Some(( + Ok(( "", - FnDef { - label: "WORD".into() - }, - "" + ( + FnDef { + label: "WORD".into() + }, + "" + ) )) ); - assert_eq!(FnDef::parse("[fn:] https://orgmode.org"), None); - assert_eq!(FnDef::parse("[fn:wor d] https://orgmode.org"), None); - assert_eq!(FnDef::parse("[fn:WORD https://orgmode.org"), None); + + assert!(FnDef::parse("[fn:] https://orgmode.org").is_err()); + assert!(FnDef::parse("[fn:wor d] https://orgmode.org").is_err()); + assert!(FnDef::parse("[fn:WORD https://orgmode.org").is_err()); } diff --git a/src/elements/inline_call.rs b/src/elements/inline_call.rs index 9b2edf9..44c3ca6 100644 --- a/src/elements/inline_call.rs +++ b/src/elements/inline_call.rs @@ -7,8 +7,6 @@ use nom::{ IResult, }; -use crate::elements::Element; - #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] #[derive(Debug)] @@ -21,9 +19,9 @@ pub struct InlineCall<'a> { pub end_header: Option>, } -impl<'a> InlineCall<'a> { +impl InlineCall<'_> { #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { + pub(crate) fn parse(input: &str) -> IResult<&str, InlineCall<'_>> { let (input, name) = preceded( tag("call_"), take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')'), @@ -43,12 +41,12 @@ impl<'a> InlineCall<'a> { Ok(( input, - Element::InlineCall(InlineCall { + InlineCall { name: name.into(), arguments: arguments.into(), inside_header: inside_header.map(Into::into), end_header: end_header.map(Into::into), - }), + }, )) } } @@ -59,48 +57,48 @@ fn parse() { InlineCall::parse("call_square(4)"), Ok(( "", - Element::InlineCall(InlineCall { + InlineCall { name: "square".into(), arguments: "4".into(), inside_header: None, end_header: None, - }), + } )) ); assert_eq!( InlineCall::parse("call_square[:results output](4)"), Ok(( "", - Element::InlineCall(InlineCall { + InlineCall { name: "square".into(), arguments: "4".into(), inside_header: Some(":results output".into()), end_header: None, - }), + }, )) ); assert_eq!( InlineCall::parse("call_square(4)[:results html]"), Ok(( "", - Element::InlineCall(InlineCall { + InlineCall { name: "square".into(), arguments: "4".into(), inside_header: None, end_header: Some(":results html".into()), - }), + }, )) ); assert_eq!( InlineCall::parse("call_square[:results output](4)[:results html]"), Ok(( "", - Element::InlineCall(InlineCall { + InlineCall { name: "square".into(), arguments: "4".into(), inside_header: Some(":results output".into()), end_header: Some(":results html".into()), - }), + }, )) ); } diff --git a/src/elements/inline_src.rs b/src/elements/inline_src.rs index ebac5ab..62e84dc 100644 --- a/src/elements/inline_src.rs +++ b/src/elements/inline_src.rs @@ -7,8 +7,6 @@ use nom::{ IResult, }; -use crate::elements::Element; - #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] #[derive(Debug)] @@ -21,7 +19,7 @@ pub struct InlineSrc<'a> { impl InlineSrc<'_> { #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { + pub(crate) fn parse(input: &str) -> IResult<&str, InlineSrc<'_>> { let (input, _) = tag("src_")(input)?; let (input, lang) = take_while1(|c: char| !c.is_ascii_whitespace() && c != '[' && c != '{')(input)?; @@ -35,11 +33,11 @@ impl InlineSrc<'_> { Ok(( input, - Element::InlineSrc(InlineSrc { + InlineSrc { lang: lang.into(), options: options.map(Into::into), body: body.into(), - }), + }, )) } } @@ -50,22 +48,22 @@ fn parse() { InlineSrc::parse("src_C{int a = 0;}"), Ok(( "", - Element::InlineSrc(InlineSrc { + InlineSrc { lang: "C".into(), options: None, body: "int a = 0;".into() - }), + }, )) ); assert_eq!( InlineSrc::parse("src_xml[:exports code]{text}"), Ok(( "", - Element::InlineSrc(InlineSrc { + InlineSrc { lang: "xml".into(), options: Some(":exports code".into()), body: "text".into(), - }), + }, )) ); diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs index 747f0fd..3604f46 100644 --- a/src/elements/keyword.rs +++ b/src/elements/keyword.rs @@ -7,8 +7,7 @@ use nom::{ IResult, }; -use crate::elements::Element; -use crate::parsers::take_until_eol; +use crate::parsers::line; #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] @@ -27,132 +26,52 @@ pub struct BabelCall<'a> { pub value: Cow<'a, str>, } -impl Keyword<'_> { - #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { - let (input, _) = tag("#+")(input)?; - let (input, key) = - take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?; - let (input, optional) = opt(delimited( - tag("["), - take_till(|c| c == ']' || c == '\n'), - tag("]"), - ))(input)?; - let (input, _) = tag(":")(input)?; - let (input, value) = take_until_eol(input)?; +pub(crate) fn parse_keyword(input: &str) -> IResult<&str, (&str, Option<&str>, &str)> { + let (input, _) = tag("#+")(input)?; + let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?; + let (input, optional) = opt(delimited( + tag("["), + take_till(|c| c == ']' || c == '\n'), + tag("]"), + ))(input)?; + let (input, _) = tag(":")(input)?; + let (input, value) = line(input)?; - if key.eq_ignore_ascii_case("CALL") { - Ok(( - input, - Element::BabelCall(BabelCall { - value: value.into(), - }), - )) - } else { - Ok(( - input, - Element::Keyword(Keyword { - key: key.into(), - optional: optional.map(Into::into), - value: value.into(), - }), - )) - } - } + Ok((input, (key, optional, value.trim()))) } #[test] fn parse() { + assert_eq!(parse_keyword("#+KEY:"), Ok(("", ("KEY", None, "")))); assert_eq!( - Keyword::parse("#+KEY:"), - Ok(( - "", - Element::Keyword(Keyword { - key: "KEY".into(), - optional: None, - value: "".into(), - }) - )) + parse_keyword("#+KEY: VALUE"), + Ok(("", ("KEY", None, "VALUE"))) ); assert_eq!( - Keyword::parse("#+KEY: VALUE"), - Ok(( - "", - Element::Keyword(Keyword { - key: "KEY".into(), - optional: None, - value: "VALUE".into(), - }) - )) + parse_keyword("#+K_E_Y: VALUE"), + Ok(("", ("K_E_Y", None, "VALUE"))) ); assert_eq!( - Keyword::parse("#+K_E_Y: VALUE"), - Ok(( - "", - Element::Keyword(Keyword { - key: "K_E_Y".into(), - optional: None, - value: "VALUE".into(), - }) - )) + parse_keyword("#+KEY:VALUE\n"), + Ok(("", ("KEY", None, "VALUE"))) ); - assert_eq!( - Keyword::parse("#+KEY:VALUE\n"), - Ok(( - "", - Element::Keyword(Keyword { - key: "KEY".into(), - optional: None, - value: "VALUE".into(), - }) - )) - ); - assert!(Keyword::parse("#+KE Y: VALUE").is_err()); - assert!(Keyword::parse("#+ KEY: VALUE").is_err()); + assert!(parse_keyword("#+KE Y: VALUE").is_err()); + assert!(parse_keyword("#+ KEY: VALUE").is_err()); + + assert_eq!(parse_keyword("#+RESULTS:"), Ok(("", ("RESULTS", None, "")))); assert_eq!( - Keyword::parse("#+RESULTS:"), - Ok(( - "", - Element::Keyword(Keyword { - key: "RESULTS".into(), - optional: None, - value: "".into(), - }) - )) + parse_keyword("#+ATTR_LATEX: :width 5cm\n"), + Ok(("", ("ATTR_LATEX", None, ":width 5cm"))) ); assert_eq!( - Keyword::parse("#+ATTR_LATEX: :width 5cm\n"), - Ok(( - "", - Element::Keyword(Keyword { - key: "ATTR_LATEX".into(), - optional: None, - value: ":width 5cm".into(), - }) - )) + parse_keyword("#+CALL: double(n=4)"), + Ok(("", ("CALL", None, "double(n=4)"))) ); assert_eq!( - Keyword::parse("#+CALL: double(n=4)"), - Ok(( - "", - Element::BabelCall(BabelCall { - value: "double(n=4)".into(), - }) - )) - ); - - assert_eq!( - Keyword::parse("#+CAPTION[Short caption]: Longer caption."), - Ok(( - "", - Element::Keyword(Keyword { - key: "CAPTION".into(), - optional: Some("Short caption".into()), - value: "Longer caption.".into(), - }) - )) + parse_keyword("#+CAPTION[Short caption]: Longer caption."), + Ok(("", ("CAPTION", Some("Short caption"), "Longer caption.",))) ); } diff --git a/src/elements/link.rs b/src/elements/link.rs index 15e7af1..b69da70 100644 --- a/src/elements/link.rs +++ b/src/elements/link.rs @@ -7,8 +7,6 @@ use nom::{ IResult, }; -use crate::elements::Element; - #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] #[derive(Debug)] @@ -20,7 +18,7 @@ pub struct Link<'a> { impl Link<'_> { #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { + pub(crate) fn parse(input: &str) -> IResult<&str, Link<'_>> { let (input, path) = delimited( tag("[["), take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'), @@ -34,10 +32,10 @@ impl Link<'_> { let (input, _) = tag("]")(input)?; Ok(( input, - Element::Link(Link { + Link { path: path.into(), desc: desc.map(Into::into), - }), + }, )) } } @@ -48,20 +46,20 @@ fn parse() { Link::parse("[[#id]]"), Ok(( "", - Element::Link(Link { + Link { path: "#id".into(), desc: None - },) + } )) ); assert_eq!( Link::parse("[[#id][desc]]"), Ok(( "", - Element::Link(Link { + Link { path: "#id".into(), desc: Some("desc".into()) - }) + } )) ); assert!(Link::parse("[[#id][desc]").is_err()); diff --git a/src/elements/macros.rs b/src/elements/macros.rs index 5caab9f..2ed0f3b 100644 --- a/src/elements/macros.rs +++ b/src/elements/macros.rs @@ -7,8 +7,6 @@ use nom::{ IResult, }; -use crate::elements::Element; - #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] #[derive(Debug)] @@ -20,7 +18,7 @@ pub struct Macros<'a> { impl Macros<'_> { #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { + pub(crate) fn parse(input: &str) -> IResult<&str, Macros<'_>> { let (input, _) = tag("{{{")(input)?; let (input, name) = verify( take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), @@ -31,10 +29,10 @@ impl Macros<'_> { Ok(( input, - Element::Macros(Macros { + Macros { name: name.into(), arguments: arguments.map(Into::into), - }), + }, )) } } @@ -45,30 +43,30 @@ fn parse() { Macros::parse("{{{poem(red,blue)}}}"), Ok(( "", - Element::Macros(Macros { + Macros { name: "poem".into(), arguments: Some("red,blue".into()) - }) + } )) ); assert_eq!( Macros::parse("{{{poem())}}}"), Ok(( "", - Element::Macros(Macros { + Macros { name: "poem".into(), arguments: Some(")".into()) - }) + } )) ); assert_eq!( Macros::parse("{{{author}}}"), Ok(( "", - Element::Macros(Macros { + Macros { name: "author".into(), arguments: None - }) + } )) ); assert!(Macros::parse("{{{0uthor}}}").is_err()); diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 2bd0637..44789e4 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -23,8 +23,10 @@ mod target; mod timestamp; mod title; -pub(crate) use block::Block; -pub(crate) use emphasis::parse as parse_emphasis; +pub(crate) use self::{ + block::parse_block_element, emphasis::parse_emphasis, keyword::parse_keyword, rule::parse_rule, + table::parse_table_el, +}; pub use self::{ block::{ @@ -45,7 +47,6 @@ pub use self::{ macros::Macros, planning::Planning, radio_target::RadioTarget, - rule::Rule, snippet::Snippet, table::{Table, TableRow}, target::Target, @@ -53,6 +54,8 @@ pub use self::{ title::Title, }; +use std::borrow::Cow; + /// Org-mode element enum #[derive(Debug)] #[cfg_attr(test, derive(PartialEq))] @@ -95,10 +98,10 @@ pub enum Element<'a> { Strike, Italic, Underline, - Verbatim { value: &'a str }, - Code { value: &'a str }, - Comment { value: &'a str }, - FixedWidth { value: &'a str }, + Verbatim { value: Cow<'a, str> }, + Code { value: Cow<'a, str> }, + Comment { value: Cow<'a, str> }, + FixedWidth { value: Cow<'a, str> }, Title(Title<'a>), Table(Table<'a>), TableRow(TableRow), @@ -176,6 +179,7 @@ impl_from!( Target, Timestamp, Table, + Title, VerseBlock; RadioTarget, List, diff --git a/src/elements/rule.rs b/src/elements/rule.rs index 7c51266..98a3725 100644 --- a/src/elements/rule.rs +++ b/src/elements/rule.rs @@ -1,37 +1,28 @@ -use nom::{bytes::complete::take_while_m_n, character::complete::space0, IResult}; +use nom::{bytes::complete::take_while_m_n, IResult}; use std::usize; -use crate::elements::Element; use crate::parsers::eol; -pub struct Rule; - -impl Rule { - #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { - let (input, _) = space0(input)?; - let (input, _) = take_while_m_n(5, usize::MAX, |c| c == '-')(input)?; - let (input, _) = eol(input)?; - Ok((input, Element::Rule)) - } +pub(crate) fn parse_rule(input: &str) -> IResult<&str, ()> { + let (input, _) = take_while_m_n(5, usize::MAX, |c| c == '-')(input)?; + let (input, _) = eol(input)?; + Ok((input, ())) } #[test] fn parse() { - assert_eq!(Rule::parse("-----"), Ok(("", Element::Rule))); - assert_eq!(Rule::parse("--------"), Ok(("", Element::Rule))); - assert_eq!(Rule::parse(" -----"), Ok(("", Element::Rule))); - assert_eq!(Rule::parse("\t\t-----"), Ok(("", Element::Rule))); - assert_eq!(Rule::parse("\t\t-----\n"), Ok(("", Element::Rule))); - assert_eq!(Rule::parse("\t\t----- \n"), Ok(("", Element::Rule))); - assert!(Rule::parse("").is_err()); - assert!(Rule::parse("----").is_err()); - assert!(Rule::parse(" ----").is_err()); - assert!(Rule::parse(" None----").is_err()); - assert!(Rule::parse("None ----").is_err()); - assert!(Rule::parse("None------").is_err()); - assert!(Rule::parse("----None----").is_err()); - assert!(Rule::parse("\t\t----").is_err()); - assert!(Rule::parse("------None").is_err()); - assert!(Rule::parse("----- None").is_err()); + assert_eq!(parse_rule("-----"), Ok(("", ()))); + assert_eq!(parse_rule("--------"), Ok(("", ()))); + assert_eq!(parse_rule("-----\n"), Ok(("", ()))); + assert_eq!(parse_rule("----- \n"), Ok(("", ()))); + assert!(parse_rule("").is_err()); + assert!(parse_rule("----").is_err()); + assert!(parse_rule("----").is_err()); + assert!(parse_rule("None----").is_err()); + assert!(parse_rule("None ----").is_err()); + assert!(parse_rule("None------").is_err()); + assert!(parse_rule("----None----").is_err()); + assert!(parse_rule("\t\t----").is_err()); + assert!(parse_rule("------None").is_err()); + assert!(parse_rule("----- None").is_err()); } diff --git a/src/elements/snippet.rs b/src/elements/snippet.rs index a2baaf9..02c2f0f 100644 --- a/src/elements/snippet.rs +++ b/src/elements/snippet.rs @@ -6,8 +6,6 @@ use nom::{ IResult, }; -use crate::elements::Element; - #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] #[derive(Debug)] @@ -18,7 +16,7 @@ pub struct Snippet<'a> { impl Snippet<'_> { #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { + pub(crate) fn parse(input: &str) -> IResult<&str, Snippet<'_>> { let (input, (name, value)) = delimited( tag("@@"), separated_pair( @@ -31,10 +29,10 @@ impl Snippet<'_> { Ok(( input, - Element::Snippet(Snippet { + Snippet { name: name.into(), value: value.into(), - }), + }, )) } } @@ -45,40 +43,40 @@ fn parse() { Snippet::parse("@@html:@@"), Ok(( "", - Element::Snippet(Snippet { + Snippet { name: "html".into(), value: "".into() - }) + } )) ); assert_eq!( Snippet::parse("@@latex:any arbitrary LaTeX code@@"), Ok(( "", - Element::Snippet(Snippet { + Snippet { name: "latex".into(), value: "any arbitrary LaTeX code".into(), - }) + } )) ); assert_eq!( Snippet::parse("@@html:@@"), Ok(( "", - Element::Snippet(Snippet { + Snippet { name: "html".into(), value: "".into(), - }) + } )) ); assert_eq!( Snippet::parse("@@html:

@

@@"), Ok(( "", - Element::Snippet(Snippet { + Snippet { name: "html".into(), value: "

@

".into(), - }) + } )) ); assert!(Snippet::parse("@@html:@").is_err()); diff --git a/src/elements/table.rs b/src/elements/table.rs index 4d58c72..6820b89 100644 --- a/src/elements/table.rs +++ b/src/elements/table.rs @@ -1,5 +1,12 @@ use std::borrow::Cow; +use nom::{ + combinator::{peek, verify}, + IResult, +}; + +use crate::parsers::{line, take_lines_while}; + #[derive(Debug)] #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] @@ -34,3 +41,35 @@ impl TableRow { } } } + +pub(crate) fn parse_table_el(input: &str) -> IResult<&str, &str> { + let (input, _) = peek(verify(line, |s: &str| { + let s = s.trim(); + s.starts_with("+-") && s.as_bytes().iter().all(|&c| c == b'+' || c == b'-') + }))(input)?; + + take_lines_while(|line| line.starts_with('|') || line.starts_with('+'))(input) +} + +#[test] +fn parse_table_el_() { + assert_eq!( + parse_table_el( + r#"+---+ +| | ++---+ + +"# + ), + Ok(( + r#" +"#, + r#"+---+ +| | ++---+ +"# + )) + ); + assert!(parse_table_el("").is_err()); + assert!(parse_table_el("+----|---").is_err()); +} diff --git a/src/elements/target.rs b/src/elements/target.rs index 49d4634..fc06a11 100644 --- a/src/elements/target.rs +++ b/src/elements/target.rs @@ -7,8 +7,6 @@ use nom::{ IResult, }; -use crate::elements::Element; - #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] #[derive(Debug)] @@ -18,7 +16,7 @@ pub struct Target<'a> { impl Target<'_> { #[inline] - pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { + pub(crate) fn parse(input: &str) -> IResult<&str, Target<'_>> { let (input, target) = delimited( tag("<<"), verify( @@ -30,9 +28,9 @@ impl Target<'_> { Ok(( input, - Element::Target(Target { + Target { target: target.into(), - }), + }, )) } } @@ -43,18 +41,18 @@ fn parse() { Target::parse("<>"), Ok(( "", - Element::Target(Target { + Target { target: "target".into() - }) + } )) ); assert_eq!( Target::parse("<>"), Ok(( "", - Element::Target(Target { + Target { target: "tar get".into() - }) + } )) ); assert!(Target::parse("<>").is_err()); diff --git a/src/elements/title.rs b/src/elements/title.rs index 5e0a72e..a799cb4 100644 --- a/src/elements/title.rs +++ b/src/elements/title.rs @@ -17,7 +17,7 @@ use std::collections::HashMap; use crate::config::ParseConfig; use crate::elements::{Drawer, Planning}; -use crate::parsers::{skip_empty_lines, take_one_word, take_until_eol}; +use crate::parsers::{line, skip_empty_lines, take_one_word}; #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] @@ -108,7 +108,8 @@ fn parse_headline<'a>( ), ), ))(input)?; - let (input, tail) = take_until_eol(input)?; + let (input, tail) = line(input)?; + let tail = tail.trim(); let (raw, tags) = memrchr(b' ', tail.as_bytes()) .map(|i| (tail[0..i].trim(), &tail[i + 1..])) .filter(|(_, x)| x.len() > 2 && x.starts_with(':') && x.ends_with(':')) @@ -130,7 +131,7 @@ fn parse_headline<'a>( } fn parse_properties_drawer(input: &str) -> IResult<&str, HashMap, Cow<'_, str>>> { - let (input, (drawer, content)) = Drawer::parse(input)?; + let (input, (drawer, content)) = Drawer::parse(input.trim_start())?; if drawer.name != "PROPERTIES" { return Err(Err::Error(error_position!(input, ErrorKind::Tag))); } @@ -146,12 +147,12 @@ fn parse_properties_drawer(input: &str) -> IResult<&str, HashMap, C } fn parse_node_property(input: &str) -> IResult<&str, (&str, &str)> { - let input = skip_empty_lines(input); + let input = skip_empty_lines(input).trim_start(); let (input, name) = map(delimited(tag(":"), take_until(":"), tag(":")), |s: &str| { s.trim_end_matches('+') })(input)?; - let (input, value) = take_until_eol(input)?; - Ok((input, (name, value))) + let (input, value) = line(input)?; + Ok((input, (name, value.trim()))) } impl Title<'_> { @@ -228,6 +229,19 @@ fn parse_headline_() { ); } +#[test] +fn parse_properties_drawer_() { + assert_eq!( + parse_properties_drawer(" :PROPERTIES:\n :CUSTOM_ID: id\n :END:"), + Ok(( + "", + vec![("CUSTOM_ID".into(), "id".into())] + .into_iter() + .collect::>() + )) + ) +} + // #[test] // fn is_commented() { // assert!(Title::parse("* COMMENT Title", &CONFIG) diff --git a/src/org.rs b/src/org.rs index 0b41698..a3e43fe 100644 --- a/src/org.rs +++ b/src/org.rs @@ -9,7 +9,7 @@ use crate::parsers::*; pub struct Org<'a> { pub(crate) arena: Arena>, - document: NodeId, + root: NodeId, } #[derive(Debug)] @@ -25,12 +25,9 @@ impl Org<'_> { pub fn parse_with_config<'a>(content: &'a str, config: &ParseConfig) -> Org<'a> { let mut arena = Arena::new(); - let document = arena.new_node(Element::Document); + let node = arena.new_node(Element::Document); - let containers = &mut vec![Container::Document { - content, - node: document, - }]; + let containers = &mut vec![Container::Document { content, node }]; while let Some(container) = containers.pop() { match container { @@ -38,8 +35,7 @@ impl Org<'_> { parse_section_and_headlines(&mut arena, content, node, containers); } Container::Headline { content, node } => { - let content = parse_title(&mut arena, content, node, containers, config); - parse_section_and_headlines(&mut arena, content, node, containers); + parse_headline_content(&mut arena, content, node, containers, config); } Container::Block { content, node } => { parse_blocks(&mut arena, content, node, containers); @@ -57,28 +53,24 @@ impl Org<'_> { } } - Org { arena, document } + Org { arena, root: node } } - pub fn iter<'a>(&'a self) -> impl Iterator> + 'a { - self.document - .traverse(&self.arena) - .map(move |edge| match edge { - NodeEdge::Start(e) => Event::Start(self.arena[e].get()), - NodeEdge::End(e) => Event::End(self.arena[e].get()), - }) + pub fn iter(&self) -> impl Iterator> + '_ { + self.root.traverse(&self.arena).map(move |edge| match edge { + NodeEdge::Start(e) => Event::Start(self.arena[e].get()), + NodeEdge::End(e) => Event::End(self.arena[e].get()), + }) } - pub fn headlines(&self) -> Vec { - self.document + pub fn headlines(&self) -> impl Iterator + '_ { + self.root .descendants(&self.arena) .skip(1) - .filter(|&node| match self.arena[node].get() { - Element::Headline => true, - _ => false, + .filter_map(move |node| match self.arena[node].get() { + Element::Headline => Some(HeadlineNode(node)), + _ => None, }) - .map(|node| HeadlineNode(node)) - .collect() } pub fn html(&self, wrtier: W) -> Result<(), Error> { @@ -130,6 +122,6 @@ impl Serialize for Org<'_> { fn serialize(&self, serializer: S) -> Result { use serde_indextree::Node; - serializer.serialize_newtype_struct("Node", &Node::new(self.document, &self.arena)) + serializer.serialize_newtype_struct("Org", &Node::new(self.root, &self.arena)) } } diff --git a/src/parsers.rs b/src/parsers.rs index a91ac33..182b0fa 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -1,20 +1,56 @@ // parser related functions +use std::borrow::Cow; + use indextree::{Arena, NodeId}; use jetscii::bytes; -use memchr::{memchr, memchr2, memchr_iter}; +use memchr::{memchr, memchr_iter}; use nom::{ - branch::alt, - bytes::complete::{tag, take_till}, - character::complete::space0, + bytes::complete::take_while1, + character::complete::{line_ending, not_line_ending}, + combinator::{map, opt, recognize, verify}, error::ErrorKind, - error_position, Err, IResult, + error_position, + multi::{many0_count, many1_count}, + sequence::terminated, + Err, IResult, }; -use std::borrow::Cow; use crate::config::ParseConfig; use crate::elements::*; +pub trait ElementArena<'a> { + fn push_element>>(&mut self, element: T, parent: NodeId) -> NodeId; + fn insert_before_last_child>>( + &mut self, + element: T, + parent: NodeId, + ) -> NodeId; +} + +impl<'a> ElementArena<'a> for Arena> { + fn push_element>>(&mut self, element: T, parent: NodeId) -> NodeId { + let node = self.new_node(element.into()); + parent.append(node, self); + node + } + + fn insert_before_last_child>>( + &mut self, + element: T, + parent: NodeId, + ) -> NodeId { + if let Some(child) = self[parent].last_child() { + let node = self.new_node(element.into()); + child.insert_before(node, self); + node + } else { + self.push_element(element, parent) + } + } +} + +#[derive(Debug)] pub enum Container<'a> { // List List { @@ -44,22 +80,21 @@ pub enum Container<'a> { }, } -pub fn parse_title<'a>( - arena: &mut Arena>, +pub fn parse_headline_content<'a, T: ElementArena<'a>>( + arena: &mut T, content: &'a str, parent: NodeId, containers: &mut Vec>, config: &ParseConfig, -) -> &'a str { +) { let (tail, (title, content)) = Title::parse(content, config).unwrap(); - let node = arena.new_node(Element::Title(title)); - parent.append(node, arena); + let node = arena.push_element(title, parent); containers.push(Container::Inline { content, node }); - tail + parse_section_and_headlines(arena, tail, parent, containers); } -pub fn parse_section_and_headlines<'a>( - arena: &mut Arena>, +pub fn parse_section_and_headlines<'a, T: ElementArena<'a>>( + arena: &mut T, content: &'a str, parent: NodeId, containers: &mut Vec>, @@ -71,24 +106,22 @@ pub fn parse_section_and_headlines<'a>( let mut last_end = 0; for i in memchr_iter(b'\n', content.as_bytes()) { - if let Some((mut tail, headline_content)) = parse_headline(&content[last_end..]) { + if let Ok((mut tail, headline_content)) = parse_headline(&content[last_end..]) { if last_end != 0 { - let node = arena.new_node(Element::Section); - parent.append(node, arena); - containers.push(Container::Block { - content: &content[0..last_end], - node, - }); + let node = arena.push_element(Element::Section, parent); + let content = &content[0..last_end]; + containers.push(Container::Block { content, node }); } - let node = arena.new_node(Element::Headline); - parent.append(node, arena); + + let node = arena.push_element(Element::Headline, parent); containers.push(Container::Headline { content: headline_content, node, }); - while let Some((new_tail, content)) = parse_headline(tail) { - let node = arena.new_node(Element::Headline); - parent.append(node, arena); + + while let Ok((new_tail, content)) = parse_headline(tail) { + debug_assert_ne!(tail, new_tail); + let node = arena.push_element(Element::Headline, parent); containers.push(Container::Headline { content, node }); tail = new_tail; } @@ -97,282 +130,263 @@ pub fn parse_section_and_headlines<'a>( last_end = i + 1; } - let node = arena.new_node(Element::Section); - parent.append(node, arena); + let node = arena.push_element(Element::Section, parent); containers.push(Container::Block { content, node }); } -pub fn parse_headline(text: &str) -> Option<(&str, &str)> { - let level = get_headline_level(text)?; - - for i in memchr_iter(b'\n', text.as_bytes()) { - if let Some(l) = get_headline_level(&text[i + 1..]) { - if l <= level { - return Some((&text[i + 1..], &text[0..i + 1])); - } - } - } - - Some(("", text)) -} - -pub fn get_headline_level(text: &str) -> Option { - if let Some(off) = memchr2(b'\n', b' ', text.as_bytes()) { - if off > 0 && text[0..off].as_bytes().iter().all(|&c| c == b'*') { - Some(off) - } else { - None - } - } else if !text.is_empty() && text.as_bytes().iter().all(|&c| c == b'*') { - Some(text.len()) - } else { - None - } -} - -pub fn parse_blocks<'a>( - arena: &mut Arena>, +pub fn parse_blocks<'a, T: ElementArena<'a>>( + arena: &mut T, content: &'a str, parent: NodeId, containers: &mut Vec>, ) { let mut tail = skip_empty_lines(content); - if let Some((new_tail, element)) = parse_block(content, arena, containers) { - parent.append(element, arena); + if let Some(new_tail) = parse_block(content, arena, parent, containers) { tail = skip_empty_lines(new_tail); } let mut text = tail; let mut pos = 0; + macro_rules! insert_paragraph { + ($content:expr) => { + let node = arena.insert_before_last_child(Element::Paragraph, parent); + containers.push(Container::Inline { + content: $content, + node, + }); + }; + } + while !tail.is_empty() { let i = memchr(b'\n', tail.as_bytes()) .map(|i| i + 1) .unwrap_or_else(|| tail.len()); if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) { - tail = skip_empty_lines(&tail[i..]); - let node = arena.new_node(Element::Paragraph); - parent.append(node, arena); - containers.push(Container::Inline { - content: &text[0..pos].trim_end_matches('\n'), - node, - }); - text = tail; + debug_assert_ne!(tail, skip_empty_lines(&tail[i..])); + insert_paragraph!(&text[0..pos].trim_end_matches('\n')); pos = 0; - } else if let Some((new_tail, element)) = parse_block(tail, arena, containers) { + tail = skip_empty_lines(&tail[i..]); + text = tail; + } else if let Some(new_tail) = parse_block(tail, arena, parent, containers) { + debug_assert_ne!(tail, new_tail); if pos != 0 { - let node = arena.new_node(Element::Paragraph); - parent.append(node, arena); - containers.push(Container::Inline { - content: &text[0..pos].trim_end_matches('\n'), - node, - }); + insert_paragraph!(&text[0..pos].trim_end_matches('\n')); pos = 0; } - parent.append(element, arena); tail = skip_empty_lines(new_tail); text = tail; } else { + debug_assert_ne!(tail, &tail[i..]); tail = &tail[i..]; pos += i; } } if !text.is_empty() { - let node = arena.new_node(Element::Paragraph); - parent.append(node, arena); - containers.push(Container::Inline { - content: &text[0..pos].trim_end_matches('\n'), - node, - }); + insert_paragraph!(&text[0..pos].trim_end_matches('\n')); } } -pub fn parse_block<'a>( +pub fn parse_block<'a, T: ElementArena<'a>>( contents: &'a str, - arena: &mut Arena>, + arena: &mut T, + parent: NodeId, containers: &mut Vec>, -) -> Option<(&'a str, NodeId)> { - if let Some((tail, node)) = prase_table(arena, contents, containers) { - return Some((tail, node)); - } - - if let Some((tail, fn_def, content)) = FnDef::parse(contents) { - let node = arena.new_node(Element::FnDef(fn_def)); +) -> Option<&'a str> { + if let Ok((tail, (fn_def, content))) = FnDef::parse(contents) { + let node = arena.push_element(fn_def, parent); containers.push(Container::Block { content, node }); - return Some((tail, node)); + return Some(tail); } else if let Some((tail, list, content)) = List::parse(contents) { let indent = list.indent; - let node = arena.new_node(Element::List(list)); + let node = arena.push_element(list, parent); containers.push(Container::List { content, node, indent, }); - return Some((tail, node)); + return Some(tail); } - let tail = contents.trim_start(); + let contents = contents.trim_start(); - if let Ok((tail, clock)) = Clock::parse(tail) { - return Some((tail, arena.new_node(clock))); - } - - // TODO: LaTeX environment - if tail.starts_with("\\begin{") {} - - if tail.starts_with('-') { - if let Ok((tail, rule)) = Rule::parse(tail) { - return Some((tail, arena.new_node(rule))); + match contents.as_bytes().get(0)? { + b'C' => { + if let Ok((tail, clock)) = Clock::parse(contents) { + arena.push_element(clock, parent); + return Some(tail); + } } + b'\'' => { + // TODO: LaTeX environment + } + b'-' => { + if let Ok((tail, _)) = parse_rule(contents) { + arena.push_element(Element::Rule, parent); + return Some(tail); + } + } + b':' => { + if let Ok((tail, (drawer, content))) = Drawer::parse(contents) { + let node = arena.push_element(drawer, parent); + containers.push(Container::Block { content, node }); + return Some(tail); + } else if let Ok((tail, value)) = parse_fixed_width(contents) { + arena.push_element( + Element::FixedWidth { + value: value.into(), + }, + parent, + ); + return Some(tail); + } + } + b'|' => { + if let Some(tail) = parse_table(arena, contents, containers, parent) { + return Some(tail); + } + } + b'#' => { + if let Ok((tail, (name, args, content))) = parse_block_element(contents) { + match_block( + arena, + parent, + containers, + name.into(), + args.map(Into::into), + content, + ); + return Some(tail); + } else if let Ok((tail, (dyn_block, content))) = DynBlock::parse(contents) { + let node = arena.push_element(dyn_block, parent); + containers.push(Container::Block { content, node }); + return Some(tail); + } else if let Ok((tail, (key, optional, value))) = parse_keyword(contents) { + if (&*key).eq_ignore_ascii_case("CALL") { + arena.push_element( + BabelCall { + value: value.into(), + }, + parent, + ); + } else { + arena.push_element( + Keyword { + key: key.into(), + optional: optional.map(Into::into), + value: value.into(), + }, + parent, + ); + } + return Some(tail); + } else if let Ok((tail, value)) = parse_comment(contents) { + arena.push_element( + Element::Comment { + value: value.into(), + }, + parent, + ); + return Some(tail); + } + } + _ => (), } - if tail.starts_with(':') { - if let Ok((tail, (drawer, content))) = Drawer::parse(tail) { - let node = arena.new_node(drawer.into()); + None +} + +pub fn match_block<'a, T: ElementArena<'a>>( + arena: &mut T, + parent: NodeId, + containers: &mut Vec>, + name: Cow<'a, str>, + args: Option>, + content: &'a str, +) { + match &*name.to_uppercase() { + "CENTER" => { + let node = arena.push_element(CenterBlock { parameters: args }, parent); containers.push(Container::Block { content, node }); - return Some((tail, node)); } - } - - // FixedWidth - if tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n") { - let mut last_end = 1; // ":" - for i in memchr_iter(b'\n', contents.as_bytes()) { - last_end = i + 1; - let tail = contents[last_end..].trim_start(); - if !(tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n")) { - let fixed_width = arena.new_node(Element::FixedWidth { - value: &contents[0..last_end], - }); - return Some((&contents[last_end..], fixed_width)); - } - } - let fixed_width = arena.new_node(Element::FixedWidth { - value: &contents[0..last_end], - }); - return Some((&contents[last_end..], fixed_width)); - } - - // Comment - if tail == "#" || tail.starts_with("# ") || tail.starts_with("#\n") { - let mut last_end = 1; // "#" - for i in memchr_iter(b'\n', contents.as_bytes()) { - last_end = i + 1; - let line = contents[last_end..].trim_start(); - if !(line == "#" || line.starts_with("# ") || line.starts_with("#\n")) { - let comment = arena.new_node(Element::Comment { - value: &contents[0..last_end], - }); - return Some((&contents[last_end..], comment)); - } - } - let comment = arena.new_node(Element::Comment { - value: &contents[0..last_end], - }); - return Some((&contents[last_end..], comment)); - } - - if tail.starts_with("#+") { - if let Ok((tail, (block, content))) = Block::parse(tail) { - match &*block.name.to_uppercase() { - "CENTER" => { - let node = arena.new_node(Element::CenterBlock(CenterBlock { - parameters: block.args, - })); - containers.push(Container::Block { content, node }); - Some((tail, node)) - } - "QUOTE" => { - let node = arena.new_node(Element::QuoteBlock(QuoteBlock { - parameters: block.args, - })); - containers.push(Container::Block { content, node }); - Some((tail, node)) - } - "COMMENT" => { - let node = arena.new_node(Element::CommentBlock(CommentBlock { - data: block.args, - contents: content.into(), - })); - Some((tail, node)) - } - "EXAMPLE" => { - let node = arena.new_node(Element::ExampleBlock(ExampleBlock { - data: block.args, - contents: content.into(), - })); - Some((tail, node)) - } - "EXPORT" => { - let node = arena.new_node(Element::ExportBlock(ExportBlock { - data: block.args.unwrap_or_default(), - contents: content.into(), - })); - Some((tail, node)) - } - "SRC" => { - let (language, arguments) = match &block.args { - Some(Cow::Borrowed(args)) => { - let (language, arguments) = - args.split_at(args.find(' ').unwrap_or_else(|| args.len())); - (Cow::Borrowed(language), Cow::Borrowed(arguments)) - } - Some(Cow::Owned(args)) => { - let (language, arguments) = - args.split_at(args.find(' ').unwrap_or_else(|| args.len())); - (Cow::Owned(language.into()), Cow::Owned(arguments.into())) - } - None => (Cow::Borrowed(""), Cow::Borrowed("")), - }; - let node = arena.new_node(Element::SourceBlock(SourceBlock { - arguments, - language, - contents: content.into(), - })); - Some((tail, node)) - } - "VERSE" => { - let node = arena.new_node(Element::VerseBlock(VerseBlock { - parameters: block.args, - })); - containers.push(Container::Block { content, node }); - Some((tail, node)) - } - _ => { - let node = arena.new_node(Element::SpecialBlock(SpecialBlock { - parameters: block.args, - name: block.name, - })); - containers.push(Container::Block { content, node }); - Some((tail, node)) - } - } - } else if let Ok((tail, (dyn_block, content))) = DynBlock::parse(tail) { - let node = arena.new_node(dyn_block); + "QUOTE" => { + let node = arena.push_element(QuoteBlock { parameters: args }, parent); + containers.push(Container::Block { content, node }); + } + "COMMENT" => { + arena.push_element( + CommentBlock { + data: args, + contents: content.into(), + }, + parent, + ); + } + "EXAMPLE" => { + arena.push_element( + ExampleBlock { + data: args, + contents: content.into(), + }, + parent, + ); + } + "EXPORT" => { + arena.push_element( + ExportBlock { + data: args.unwrap_or_default(), + contents: content.into(), + }, + parent, + ); + } + "SRC" => { + let (language, arguments) = match &args { + Some(Cow::Borrowed(args)) => { + let (language, arguments) = + args.split_at(args.find(' ').unwrap_or_else(|| args.len())); + (language.into(), arguments.into()) + } + None => (Cow::Borrowed(""), Cow::Borrowed("")), + _ => unreachable!("`parse_block_element` returns `Some(Cow::Borrowed)` or `None`"), + }; + arena.push_element( + SourceBlock { + arguments, + language, + contents: content.into(), + }, + parent, + ); + } + "VERSE" => { + let node = arena.push_element(VerseBlock { parameters: args }, parent); + containers.push(Container::Block { content, node }); + } + _ => { + let node = arena.push_element( + SpecialBlock { + parameters: args, + name, + }, + parent, + ); containers.push(Container::Block { content, node }); - Some((tail, node)) - } else { - Keyword::parse(tail) - .ok() - .map(|(tail, kw)| (tail, arena.new_node(kw))) } - } else { - None } } -pub fn parse_inlines<'a>( - arena: &mut Arena>, +pub fn parse_inlines<'a, T: ElementArena<'a>>( + arena: &mut T, content: &'a str, parent: NodeId, containers: &mut Vec>, ) { let mut tail = content; - if let Some((new_tail, element)) = parse_inline(tail, arena, containers) { - parent.append(element, arena); + if let Some(new_tail) = parse_inline(tail, arena, containers, parent) { tail = new_tail; } @@ -381,61 +395,51 @@ pub fn parse_inlines<'a>( let bs = bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n'); + macro_rules! insert_text { + ($value:expr) => { + arena.insert_before_last_child(Element::Text { value: $value }, parent); + pos = 0; + }; + } + + macro_rules! update_tail { + ($new_tail:ident) => { + debug_assert_ne!(tail, $new_tail); + tail = $new_tail; + text = $new_tail; + }; + } + while let Some(off) = bs.find(tail.as_bytes()) { match tail.as_bytes()[off] { b'{' => { - if let Some((new_tail, element)) = parse_inline(&tail[off..], arena, containers) { + if let Some(new_tail) = parse_inline(&tail[off..], arena, containers, parent) { if pos != 0 { - let node = arena.new_node(Element::Text { - value: &text[0..pos + off], - }); - parent.append(node, arena); - pos = 0; + insert_text!(&text[0..pos + off]); } - parent.append(element, arena); - tail = new_tail; - text = new_tail; + update_tail!(new_tail); continue; - } else if let Some((new_tail, element)) = - parse_inline(&tail[off + 1..], arena, containers) + } else if let Some(new_tail) = + parse_inline(&tail[off + 1..], arena, containers, parent) { - let node = arena.new_node(Element::Text { - value: &text[0..pos + off + 1], - }); - parent.append(node, arena); - pos = 0; - parent.append(element, arena); - tail = new_tail; - text = new_tail; + insert_text!(&text[0..pos + off + 1]); + update_tail!(new_tail); continue; } } b' ' | b'(' | b'\'' | b'"' | b'\n' => { - if let Some((new_tail, element)) = parse_inline(&tail[off + 1..], arena, containers) - { - let node = arena.new_node(Element::Text { - value: &text[0..pos + off + 1], - }); - parent.append(node, arena); - pos = 0; - parent.append(element, arena); - tail = new_tail; - text = new_tail; + if let Some(new_tail) = parse_inline(&tail[off + 1..], arena, containers, parent) { + insert_text!(&text[0..pos + off + 1]); + update_tail!(new_tail); continue; } } _ => { - if let Some((new_tail, element)) = parse_inline(&tail[off..], arena, containers) { + if let Some(new_tail) = parse_inline(&tail[off..], arena, containers, parent) { if pos != 0 { - let node = arena.new_node(Element::Text { - value: &text[0..pos + off], - }); - parent.append(node, arena); - pos = 0; + insert_text!(&text[0..pos + off]); } - parent.append(element, arena); - tail = new_tail; - text = new_tail; + update_tail!(new_tail); continue; } } @@ -445,111 +449,134 @@ pub fn parse_inlines<'a>( } if !text.is_empty() { - let node = arena.new_node(Element::Text { value: text }); - parent.append(node, arena); + arena.push_element(Element::Text { value: text }, parent); } } -pub fn parse_inline<'a>( +pub fn parse_inline<'a, T: ElementArena<'a>>( contents: &'a str, - arena: &mut Arena>, + arena: &mut T, containers: &mut Vec>, -) -> Option<(&'a str, NodeId)> { + parent: NodeId, +) -> Option<&'a str> { if contents.len() < 3 { return None; } let bytes = contents.as_bytes(); match bytes[0] { - b'@' => Snippet::parse(contents) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))), - b'{' => Macros::parse(contents) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))), - b'<' => RadioTarget::parse(contents) - .map(|(tail, (radio, _content))| (tail, radio)) - .or_else(|_| Target::parse(contents)) - .or_else(|_| { - Timestamp::parse_active(contents).map(|(tail, timestamp)| (tail, timestamp.into())) - }) - .or_else(|_| { - Timestamp::parse_diary(contents).map(|(tail, timestamp)| (tail, timestamp.into())) - }) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))), + b'@' => { + if let Ok((tail, snippet)) = Snippet::parse(contents) { + arena.push_element(snippet, parent); + return Some(tail); + } + } + b'{' => { + if let Ok((tail, macros)) = Macros::parse(contents) { + arena.push_element(macros, parent); + return Some(tail); + } + } + b'<' => { + if let Ok((tail, (radio, _content))) = RadioTarget::parse(contents) { + arena.push_element(radio, parent); + return Some(tail); + } else if let Ok((tail, target)) = Target::parse(contents) { + arena.push_element(target, parent); + return Some(tail); + } else if let Ok((tail, timestamp)) = Timestamp::parse_active(contents) { + arena.push_element(timestamp, parent); + return Some(tail); + } else if let Ok((tail, timestamp)) = Timestamp::parse_diary(contents) { + arena.push_element(timestamp, parent); + return Some(tail); + } + } b'[' => { - if contents[1..].starts_with("fn:") { - FnRef::parse(contents) - .ok() - .map(|(tail, fn_ref)| (tail, arena.new_node(fn_ref.into()))) - } else if bytes[1] == b'[' { - Link::parse(contents) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))) - } else { - Cookie::parse(contents) - .map(|(tail, cookie)| (tail, cookie.into())) - .or_else(|_| { - Timestamp::parse_inactive(contents) - .map(|(tail, timestamp)| (tail, timestamp.into())) - }) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))) + if let Ok((tail, fn_ref)) = FnRef::parse(contents) { + arena.push_element(fn_ref, parent); + return Some(tail); + } else if let Ok((tail, link)) = Link::parse(contents) { + arena.push_element(link, parent); + return Some(tail); + } else if let Ok((tail, cookie)) = Cookie::parse(contents) { + arena.push_element(cookie, parent); + return Some(tail); + } else if let Ok((tail, timestamp)) = Timestamp::parse_inactive(contents) { + arena.push_element(timestamp, parent); + return Some(tail); } } b'*' => { if let Some((tail, content)) = parse_emphasis(contents, b'*') { - let node = arena.new_node(Element::Bold); + let node = arena.push_element(Element::Bold, parent); containers.push(Container::Inline { content, node }); - Some((tail, node)) - } else { - None + return Some(tail); } } b'+' => { if let Some((tail, content)) = parse_emphasis(contents, b'+') { - let node = arena.new_node(Element::Strike); + let node = arena.push_element(Element::Strike, parent); containers.push(Container::Inline { content, node }); - Some((tail, node)) - } else { - None + return Some(tail); } } b'/' => { if let Some((tail, content)) = parse_emphasis(contents, b'/') { - let node = arena.new_node(Element::Italic); + let node = arena.push_element(Element::Italic, parent); containers.push(Container::Inline { content, node }); - Some((tail, node)) - } else { - None + return Some(tail); } } b'_' => { if let Some((tail, content)) = parse_emphasis(contents, b'_') { - let node = arena.new_node(Element::Underline); + let node = arena.push_element(Element::Underline, parent); containers.push(Container::Inline { content, node }); - Some((tail, node)) - } else { - None + return Some(tail); } } - b'=' => parse_emphasis(contents, b'=') - .map(|(tail, value)| (tail, arena.new_node(Element::Verbatim { value }))), - b'~' => parse_emphasis(contents, b'~') - .map(|(tail, value)| (tail, arena.new_node(Element::Code { value }))), - b's' => InlineSrc::parse(contents) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))), - b'c' => InlineCall::parse(contents) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))), - _ => None, + b'=' => { + if let Some((tail, value)) = parse_emphasis(contents, b'=') { + arena.push_element( + Element::Verbatim { + value: value.into(), + }, + parent, + ); + return Some(tail); + } + } + b'~' => { + if let Some((tail, value)) = parse_emphasis(contents, b'~') { + arena.push_element( + Element::Code { + value: value.into(), + }, + parent, + ); + return Some(tail); + } + } + b's' => { + if let Ok((tail, inline_src)) = InlineSrc::parse(contents) { + arena.push_element(inline_src, parent); + return Some(tail); + } + } + b'c' => { + if let Ok((tail, inline_call)) = InlineCall::parse(contents) { + arena.push_element(inline_call, parent); + return Some(tail); + } + } + _ => (), } + + None } -pub fn parse_list_items<'a>( - arena: &mut Arena>, +pub fn parse_list_items<'a, T: ElementArena<'a>>( + arena: &mut T, mut contents: &'a str, indent: usize, parent: NodeId, @@ -557,32 +584,29 @@ pub fn parse_list_items<'a>( ) { while !contents.is_empty() { let (tail, list_item, content) = ListItem::parse(contents, indent); - let list_item = Element::ListItem(list_item); - let node = arena.new_node(list_item); - parent.append(node, arena); + let node = arena.push_element(list_item, parent); containers.push(Container::Block { content, node }); contents = tail; } } -pub fn prase_table<'a>( - arena: &mut Arena>, +pub fn parse_table<'a, T: ElementArena<'a>>( + arena: &mut T, contents: &'a str, containers: &mut Vec>, -) -> Option<(&'a str, NodeId)> { + parent: NodeId, +) -> Option<&'a str> { if contents.trim_start().starts_with('|') { - let table_node = arena.new_node(Element::Table(Table::Org { tblfm: None })); + let table_node = arena.push_element(Table::Org { tblfm: None }, parent); let mut last_end = 0; for start in memchr_iter(b'\n', contents.as_bytes()) { let line = contents[last_end..start].trim(); match TableRow::parse(line) { Some(TableRow::Standard) => { - let row_node = arena.new_node(Element::TableRow(TableRow::Standard)); - table_node.append(row_node, arena); + let row_node = arena.push_element(TableRow::Standard, table_node); for cell in line[1..].split_terminator('|') { - let cell_node = arena.new_node(Element::TableCell); - row_node.append(cell_node, arena); + let cell_node = arena.push_element(Element::TableCell, row_node); containers.push(Container::Inline { content: cell.trim(), node: cell_node, @@ -590,104 +614,106 @@ pub fn prase_table<'a>( } } Some(TableRow::Rule) => { - let row_node = arena.new_node(Element::TableRow(TableRow::Rule)); - table_node.append(row_node, arena); + arena.push_element(TableRow::Rule, table_node); } - None => return Some((&contents[last_end..], table_node)), + None => return Some(&contents[last_end..]), } last_end = start + 1; } - Some(("", table_node)) - } else if contents.trim_start().starts_with("+-") - && contents[0..memchr(b'\n', contents.as_bytes()).unwrap_or_else(|| contents.len())] - .trim() - .as_bytes() - .iter() - .any(|&c| c != b'+' || c != b'-') - { - let mut last_end = 0; - for start in memchr_iter(b'\n', contents.as_bytes()) { - let line = contents[last_end..start].trim(); - if !line.starts_with('|') && !line.starts_with('+') { - return { - Some(( - &contents[last_end..], - arena.new_node(Element::Table(Table::TableEl { - value: contents[0..last_end].into(), - })), - )) - }; - } - last_end = start + 1; - } - - Some(( - "", - arena.new_node(Element::Table(Table::TableEl { - value: contents.into(), - })), - )) + Some("") + } else if let Ok((tail, value)) = parse_table_el(contents) { + arena.push_element( + Table::TableEl { + value: value.into(), + }, + parent, + ); + Some(tail) } else { None } } -pub fn eol(input: &str) -> IResult<&str, ()> { - let (input, _) = space0(input)?; - if input.is_empty() { - Ok(("", ())) - } else { - let (input, _) = tag("\n")(input)?; - Ok((input, ())) - } +pub fn line(input: &str) -> IResult<&str, &str> { + terminated(not_line_ending, opt(line_ending))(input) } -pub fn take_until_eol(input: &str) -> IResult<&str, &str> { - if let Some(i) = memchr(b'\n', input.as_bytes()) { - Ok((&input[i + 1..], input[0..i].trim())) - } else { - Ok(("", input.trim())) - } +pub fn eol(input: &str) -> IResult<&str, &str> { + verify(line, |s: &str| s.trim().is_empty())(input) } -pub fn take_lines_till(predicate: impl Fn(&str) -> bool) -> impl Fn(&str) -> IResult<&str, &str> { +pub fn take_lines_while(predicate: impl Fn(&str) -> bool) -> impl Fn(&str) -> IResult<&str, &str> { move |input| { - let mut start = 0; - for i in memchr_iter(b'\n', input.as_bytes()) { - if predicate(input[start..i].trim()) { - return Ok((&input[i + 1..], &input[0..start])); - } - start = i + 1; - } - - if predicate(input[start..].trim()) { - Ok(("", &input[0..start])) - } else { - Err(Err::Error(error_position!(input, ErrorKind::TakeTill1))) - } + recognize(many0_count(verify( + |s: &str| { + // repeat until eof + if s.is_empty() { + Err(Err::Error(error_position!(s, ErrorKind::Eof))) + } else { + line(s) + } + }, + |s: &str| predicate(s), + )))(input) } } +pub fn take_lines_while1(predicate: impl Fn(&str) -> bool) -> impl Fn(&str) -> IResult<&str, &str> { + move |input| { + recognize(many1_count(verify( + |s: &str| { + // repeat until eof + if s.is_empty() { + Err(Err::Error(error_position!(s, ErrorKind::Eof))) + } else { + line(s) + } + }, + |s: &str| predicate(s), + )))(input) + } +} + +pub fn skip_empty_lines(input: &str) -> &str { + take_lines_while(|line| line.trim().is_empty())(input) + .map(|(tail, _)| tail) + .unwrap_or(input) +} + +pub fn parse_headline(input: &str) -> IResult<&str, &str> { + let (input_, level) = get_headline_level(input)?; + map( + take_lines_while(move |line| { + if let Ok((_, l)) = get_headline_level(line) { + l.len() > level.len() + } else { + true + } + }), + move |s: &str| &input[0..level.len() + s.len()], + )(input_) +} + +pub fn get_headline_level(input: &str) -> IResult<&str, &str> { + let (input, stars) = take_while1(|c: char| c == '*')(input)?; + if input.is_empty() || input.starts_with(' ') || input.starts_with('\n') { + Ok((input, stars)) + } else { + Err(Err::Error(error_position!(input, ErrorKind::Tag))) + } +} + +pub fn parse_fixed_width(input: &str) -> IResult<&str, &str> { + take_lines_while1(|line| line == ":" || line.starts_with(": "))(input) +} + +pub fn parse_comment(input: &str) -> IResult<&str, &str> { + take_lines_while1(|line| line == "#" || line.starts_with("# "))(input) +} + pub fn take_one_word(input: &str) -> IResult<&str, &str> { - alt((take_till(|c: char| c == ' ' || c == '\t'), |input| { - Ok(("", input)) - }))(input) -} - -pub fn skip_empty_lines(contents: &str) -> &str { - let mut i = 0; - for pos in memchr_iter(b'\n', contents.as_bytes()) { - if contents.as_bytes()[i..pos] - .iter() - .all(u8::is_ascii_whitespace) - { - i = pos + 1; - } else { - break; - } - } - &contents[i..] + take_while1(|c: char| !c.is_ascii_whitespace())(input) } #[test] diff --git a/tests/html.rs b/tests/html.rs index 5e4cd71..b98465c 100644 --- a/tests/html.rs +++ b/tests/html.rs @@ -23,22 +23,18 @@ test_suite!( test_suite!( section_and_headline, - r#"* Title 1 -*Section 1* -** Title 2 -_Section 2_ -* Title 3 -/Section 3/ -* Title 4 -=Section 4="#, - "

Title 1

\ -

Section 1

\ -

Title 2

\ -

Section 2

\ -

Title 3

\ -

Section 3

\ -

Title 4

\ -

Section 4

" + r#"* title 1 +section 1 +** title 2 +section 2 +* title 3 +section 3 +* title 4 +section 4"#, + "

title 1

section 1

\ +

title 2

section 2

\ +

title 3

section 3

\ +

title 4

section 4

" ); test_suite!(