diff --git a/examples/custom_handler.rs b/examples/custom_handler.rs index cd2f20b..46091e0 100644 --- a/examples/custom_handler.rs +++ b/examples/custom_handler.rs @@ -32,7 +32,7 @@ impl From for Error { type Result = std::result::Result<(), Error>; impl HtmlHandler for CustomHtmlHandler { - fn handle_headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result { + fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result { if hdl.level > 6 { Err(Error::Heading) } else { diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 08c3633..63ef60e 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -1,429 +1,8 @@ -pub mod block; -pub mod dyn_block; -pub mod fn_def; -pub mod keyword; -pub mod list; -pub mod rule; +pub(crate) mod block; +pub(crate) mod dyn_block; +pub(crate) mod fn_def; +pub(crate) mod keyword; +pub(crate) mod list; +pub(crate) mod rule; pub use self::keyword::Key; - -use memchr::memchr_iter; - -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub enum Element<'a> { - Paragraph { - cont_end: usize, - end: usize, - }, - Keyword { - key: Key<'a>, - value: &'a str, - }, - Call { - value: &'a str, - }, - FnDef { - label: &'a str, - cont: &'a str, - }, - CtrBlock { - args: Option<&'a str>, - cont_end: usize, - end: usize, - }, - QteBlock { - args: Option<&'a str>, - cont_end: usize, - end: usize, - }, - SplBlock { - args: Option<&'a str>, - name: &'a str, - cont_end: usize, - end: usize, - }, - CommentBlock { - args: Option<&'a str>, - cont: &'a str, - }, - ExampleBlock { - args: Option<&'a str>, - cont: &'a str, - }, - ExportBlock { - args: Option<&'a str>, - cont: &'a str, - }, - SrcBlock { - args: Option<&'a str>, - cont: &'a str, - }, - VerseBlock { - args: Option<&'a str>, - cont: &'a str, - }, - DynBlock { - args: Option<&'a str>, - name: &'a str, - cont_end: usize, - end: usize, - }, - Rule, - Comment(&'a str), - FixedWidth(&'a str), - List { - ident: usize, - ordered: bool, - }, - - // Element::Empty actually means Option::None - Empty, -} - -// return (element, off, next element, next offset) -// the end of first element is relative to the offset -// next offset is relative to the end of the first element -pub fn parse(src: &str) -> (Element<'_>, usize, Option<(Element<'_>, usize)>) { - // skip empty lines - let mut pos = match src.chars().position(|c| c != '\n') { - Some(pos) => pos, - None => return (Element::Empty, src.len(), None), - }; - let start = pos; - let bytes = src.as_bytes(); - let mut line_ends = memchr_iter(b'\n', &bytes[start..]).map(|i| i + start); - - loop { - let line_beg = pos; - - macro_rules! brk { - ($ele:expr, $off:expr) => { - break if line_beg == start || pos == start { - ($ele, pos + $off, None) - } else { - ( - Element::Paragraph { - cont_end: line_beg - start - 1, - end: line_beg - start, - }, - start, - Some(($ele, $off)), - ) - }; - }; - } - - let tail = &src[pos..]; - - // Unlike other element, footnote def must starts at column 0 - if tail.starts_with("[fn:") { - if let Some((label, cont, off)) = fn_def::parse(tail) { - brk!(Element::FnDef { label, cont }, off + 1); - } - } - - if bytes[pos] == b'\n' { - break ( - Element::Paragraph { - cont_end: pos - start - 1, - end: pos - start + 1, - }, - start, - None, - ); - } - - pos = skip_space!(src, pos); - - let tail = &src[pos..]; - - let (is_item, ordered) = list::is_item(tail); - if is_item { - let list = Element::List { - ident: pos - line_beg, - ordered, - }; - break if line_beg == start { - (list, start, None) - } else { - ( - Element::Paragraph { - cont_end: line_beg - start - 1, - end: line_beg - start, - }, - start, - Some((list, 0)), - ) - }; - } - - // TODO: LaTeX environment - if tail.starts_with("\\begin{") {} - - // rule - if tail.starts_with("-----") { - let off = rule::parse(tail); - if off != 0 { - brk!(Element::Rule, off); - } - } - - // fixed width - if tail.starts_with(": ") || tail.starts_with(":\n") { - let end = line_ends - .skip_while(|&i| src[i + 1..].starts_with(": ") || src[i + 1..].starts_with(":\n")) - .next() - .map(|i| i + 1) - .unwrap_or_else(|| src.len()); - let off = end - pos; - brk!(Element::FixedWidth(&tail[0..off]), off); - } - - // comment - if tail.starts_with("# ") || tail.starts_with("#\n") { - let end = line_ends - .skip_while(|&i| src[i + 1..].starts_with("# ") || src[i + 1..].starts_with("#\n")) - .next() - .map(|i| i + 1) - .unwrap_or_else(|| src.len()); - let off = end - pos; - brk!(Element::Comment(&tail[0..off]), off); - } - - if tail.starts_with("#+") { - if let Some((name, args, cont_beg, cont_end, end)) = block::parse(tail) { - let cont = &tail[cont_beg..cont_end]; - match &*name.to_uppercase() { - "COMMENT" => brk!(Element::CommentBlock { args, cont }, end), - "EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end), - "EXPORT" => brk!(Element::ExportBlock { args, cont }, end), - "SRC" => brk!(Element::SrcBlock { args, cont }, end), - "VERSE" => brk!(Element::VerseBlock { args, cont }, end), - "CENTER" => brk!( - Element::CtrBlock { - args, - cont_end: cont_end - cont_beg, - end: end - cont_beg, - }, - cont_beg - ), - "QUOTE" => brk!( - Element::QteBlock { - args, - cont_end: cont_end - cont_beg, - end: end - cont_beg, - }, - cont_beg - ), - _ => brk!( - Element::SplBlock { - name, - args, - cont_end: cont_end - cont_beg, - end: end - cont_beg, - }, - cont_beg - ), - }; - } - - if let Some((name, args, cont_beg, cont_end, end)) = dyn_block::parse(tail) { - brk!( - Element::DynBlock { - name, - args, - cont_end: cont_end - cont_beg, - end: end - cont_beg, - }, - cont_beg - ) - } - - if let Some((key, value, off)) = keyword::parse(tail) { - brk!( - if let Key::Call = key { - Element::Call { value } - } else { - Element::Keyword { key, value } - }, - off - ) - } - } - - // move to the beginning of the next line - if let Some(off) = line_ends.next() { - pos = off + 1; - - // the last character - if pos >= src.len() { - break ( - Element::Paragraph { - cont_end: src.len() - start - 1, - end: src.len() - start, - }, - start, - None, - ); - } - } else { - break ( - Element::Paragraph { - cont_end: src.len() - start, - end: src.len() - start, - }, - start, - None, - ); - } - } -} - -#[cfg(test)] -mod tests { - #[test] - fn parse() { - use super::keyword::Key; - use super::parse; - use super::Element::*; - - assert_eq!(parse("\n\n\n"), (Empty, 3, None)); - - let len = "Lorem ipsum dolor sit amet.".len(); - assert_eq!( - parse("\nLorem ipsum dolor sit amet.\n\n\n"), - ( - Paragraph { - cont_end: len, - end: len + 2, - }, - 1, - None - ) - ); - assert_eq!( - parse("\n\nLorem ipsum dolor sit amet.\n\n"), - ( - Paragraph { - cont_end: len, - end: len + 2, - }, - 2, - None - ) - ); - assert_eq!( - parse("\nLorem ipsum dolor sit amet.\n"), - ( - Paragraph { - cont_end: len, - end: len + 1, - }, - 1, - None - ) - ); - assert_eq!( - parse("\n\n\nLorem ipsum dolor sit amet."), - ( - Paragraph { - cont_end: len, - end: len, - }, - 3, - None - ) - ); - - assert_eq!( - parse("\n\n\n: Lorem ipsum dolor sit amet.\n"), - ( - FixedWidth(": Lorem ipsum dolor sit amet.\n"), - "\n\n\n: Lorem ipsum dolor sit amet.\n".len(), - None - ) - ); - assert_eq!( - parse("\n\n\n: Lorem ipsum dolor sit amet."), - ( - FixedWidth(": Lorem ipsum dolor sit amet."), - "\n\n\n: Lorem ipsum dolor sit amet.".len(), - None - ) - ); - - assert_eq!( - parse("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n"), - ( - Paragraph { - cont_end: len, - end: len + 1, - }, - 2, - Some((FixedWidth(": Lorem ipsum dolor sit amet.\n"), 30)) - ) - ); - - assert_eq!( - parse("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n:\n: Lorem ipsum dolor sit amet."), - ( - Paragraph { - cont_end: len, - end: len + 1, - }, - 2, - Some((FixedWidth(": Lorem ipsum dolor sit amet.\n:\n: Lorem ipsum dolor sit amet."), 61)) - ) - ); - - assert_eq!( - parse("\n\nLorem ipsum dolor sit amet.\n+ Lorem ipsum dolor sit amet.\n"), - ( - Paragraph { - cont_end: len, - end: len + 1, - }, - 2, - Some(( - List { - ident: 0, - ordered: false, - }, - 0 - )) - ) - ); - - assert_eq!( - parse("\n\nLorem ipsum dolor sit amet.\n#+BEGIN_QUOTE\nLorem ipsum dolor sit amet.\n#+END_QUOTE\n"), - ( - Paragraph { - cont_end: len, - end: len + 1, - }, - 2, - Some(( - QteBlock { - args: None, - cont_end: len + 1, - end: len + 1 + "#+END_QUOTE\n".len() - }, - "#+BEGIN_QUOTE\n".len() - )) - ) - ); - assert_eq!( - parse("\n #+ATTR_HTML: :width 200px"), - ( - Keyword { - key: Key::Attr { backend: "HTML" }, - value: ":width 200px" - }, - "\n #+ATTR_HTML: :width 200px".len(), - None - ) - ); - // TODO: more tests - } -} diff --git a/src/export/html.rs b/src/export/html.rs index 06b6dbe..64c217f 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -2,7 +2,7 @@ use crate::elements::Key; use crate::headline::Headline; -use crate::objects::Cookie; +use crate::objects::{Cookie, Timestamp}; use crate::parser::Parser; use jetscii::ascii_chars; use std::convert::From; @@ -11,94 +11,94 @@ use std::io::{Error, Write}; use std::marker::PhantomData; pub trait HtmlHandler> { - fn handle_headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), E> { + fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), E> { let level = if hdl.level <= 6 { hdl.level } else { 6 }; Ok(write!(w, "{1}", level, Escape(hdl.title))?) } - fn handle_headline_end(&mut self, w: &mut W) -> Result<(), E> { + fn headline_end(&mut self, w: &mut W) -> Result<(), E> { Ok(()) } - fn handle_section_beg(&mut self, w: &mut W) -> Result<(), E> { + fn section_beg(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "
")?) } - fn handle_section_end(&mut self, w: &mut W) -> Result<(), E> { + fn section_end(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "
")?) } - fn handle_paragraph_beg(&mut self, w: &mut W) -> Result<(), E> { + fn paragraph_beg(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "

")?) } - fn handle_paragraph_end(&mut self, w: &mut W) -> Result<(), E> { + fn paragraph_end(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "

")?) } - fn handle_ctr_block_beg(&mut self, w: &mut W) -> Result<(), E> { + fn ctr_block_beg(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, r#"
"#)?) } - fn handle_ctr_block_end(&mut self, w: &mut W) -> Result<(), E> { + fn ctr_block_end(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "
")?) } - fn handle_qte_block_beg(&mut self, w: &mut W) -> Result<(), E> { + fn qte_block_beg(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "
")?) } - fn handle_qte_block_end(&mut self, w: &mut W) -> Result<(), E> { + fn qte_block_end(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "
")?) } - fn handle_spl_block_beg(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> { + fn spl_block_beg(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> { Ok(write!(w, "
")?) } - fn handle_spl_block_end(&mut self, w: &mut W) -> Result<(), E> { + fn spl_block_end(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "
")?) } - fn handle_comment_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { + fn comment_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { Ok(()) } - fn handle_example_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { + fn example_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { Ok(write!(w, "
{}
", Escape(cont))?) } - fn handle_export_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { + fn export_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { Ok(()) } - fn handle_src_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { + fn src_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { Ok(write!(w, "
{}
", Escape(cont))?) } - fn handle_verse_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { + fn verse_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { Ok(()) } - fn handle_dyn_block_beg(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> { + fn dyn_block_beg(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> { Ok(()) } - fn handle_dyn_block_end(&mut self, w: &mut W) -> Result<(), E> { + fn dyn_block_end(&mut self, w: &mut W) -> Result<(), E> { Ok(()) } - fn handle_list_beg(&mut self, w: &mut W, ordered: bool) -> Result<(), E> { + fn list_beg(&mut self, w: &mut W, ordered: bool) -> Result<(), E> { if ordered { Ok(write!(w, "
    ")?) } else { Ok(write!(w, "
      ")?) } } - fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<(), E> { + fn list_end(&mut self, w: &mut W, ordered: bool) -> Result<(), E> { if ordered { Ok(write!(w, "
")?) } else { Ok(write!(w, "")?) } } - fn handle_list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<(), E> { + fn list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<(), E> { Ok(write!(w, "
  • ")?) } - fn handle_list_end_item(&mut self, w: &mut W) -> Result<(), E> { + fn list_end_item(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "
  • ")?) } - fn handle_call(&mut self, w: &mut W, value: &str) -> Result<(), E> { + fn call(&mut self, w: &mut W, value: &str) -> Result<(), E> { Ok(()) } - fn handle_clock(&mut self, w: &mut W) -> Result<(), E> { + fn clock(&mut self, w: &mut W) -> Result<(), E> { Ok(()) } - fn handle_comment(&mut self, w: &mut W, cont: &str) -> Result<(), E> { + fn comment(&mut self, w: &mut W, cont: &str) -> Result<(), E> { Ok(()) } - fn handle_fixed_width(&mut self, w: &mut W, cont: &str) -> Result<(), E> { + fn fixed_width(&mut self, w: &mut W, cont: &str) -> Result<(), E> { for line in cont.lines() { // remove leading colon write!(w, "
    {}
    ", Escape(&line[1..]))?; @@ -106,39 +106,34 @@ pub trait HtmlHandler> { Ok(()) } - fn handle_table_start(&mut self, w: &mut W) -> Result<(), E> { + fn table_start(&mut self, w: &mut W) -> Result<(), E> { Ok(()) } - fn handle_table_end(&mut self, w: &mut W) -> Result<(), E> { + fn table_end(&mut self, w: &mut W) -> Result<(), E> { Ok(()) } - fn handle_table_cell(&mut self, w: &mut W) -> Result<(), E> { + fn table_cell(&mut self, w: &mut W) -> Result<(), E> { Ok(()) } - fn handle_latex_env(&mut self, w: &mut W) -> Result<(), E> { + fn latex_env(&mut self, w: &mut W) -> Result<(), E> { Ok(()) } - fn handle_fn_def(&mut self, w: &mut W, label: &str, cont: &str) -> Result<(), E> { + fn fn_def(&mut self, w: &mut W, label: &str, cont: &str) -> Result<(), E> { Ok(()) } - fn handle_keyword(&mut self, w: &mut W, key: Key<'_>, value: &str) -> Result<(), E> { + fn keyword(&mut self, w: &mut W, key: Key<'_>, value: &str) -> Result<(), E> { Ok(()) } - fn handle_rule(&mut self, w: &mut W) -> Result<(), E> { + fn rule(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "
    ")?) } - fn handle_cookie(&mut self, w: &mut W, cookie: Cookie) -> Result<(), E> { + fn cookie(&mut self, w: &mut W, cookie: Cookie) -> Result<(), E> { Ok(()) } - fn handle_fn_ref( - &mut self, - w: &mut W, - label: Option<&str>, - def: Option<&str>, - ) -> Result<(), E> { + fn fn_ref(&mut self, w: &mut W, label: Option<&str>, def: Option<&str>) -> Result<(), E> { Ok(()) } - fn handle_inline_call( + fn inline_call( &mut self, w: &mut W, name: &str, @@ -148,7 +143,7 @@ pub trait HtmlHandler> { ) -> Result<(), E> { Ok(()) } - fn handle_inline_src( + fn inline_src( &mut self, w: &mut W, lang: &str, @@ -157,7 +152,7 @@ pub trait HtmlHandler> { ) -> Result<(), E> { Ok(write!(w, "{}", Escape(body))?) } - fn handle_link(&mut self, w: &mut W, path: &str, desc: Option<&str>) -> Result<(), E> { + fn link(&mut self, w: &mut W, path: &str, desc: Option<&str>) -> Result<(), E> { if let Some(desc) = desc { Ok(write!( w, @@ -169,53 +164,56 @@ pub trait HtmlHandler> { Ok(write!(w, r#"{0}"#, Escape(path))?) } } - fn handle_macros(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> { + fn macros(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> { Ok(()) } - fn handle_radio_target(&mut self, w: &mut W, target: &str) -> Result<(), E> { + fn radio_target(&mut self, w: &mut W, target: &str) -> Result<(), E> { Ok(()) } - fn handle_snippet(&mut self, w: &mut W, name: &str, value: &str) -> Result<(), E> { + fn snippet(&mut self, w: &mut W, name: &str, value: &str) -> Result<(), E> { if name.eq_ignore_ascii_case("HTML") { Ok(write!(w, "{}", value)?) } else { Ok(()) } } - fn handle_target(&mut self, w: &mut W, target: &str) -> Result<(), E> { + fn target(&mut self, w: &mut W, target: &str) -> Result<(), E> { Ok(()) } - fn handle_bold_beg(&mut self, w: &mut W) -> Result<(), E> { + fn timestamp(&mut self, w: &mut W, timestamp: Timestamp) -> Result<(), E> { + Ok(()) + } + fn bold_beg(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "")?) } - fn handle_bold_end(&mut self, w: &mut W) -> Result<(), E> { + fn bold_end(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "")?) } - fn handle_italic_beg(&mut self, w: &mut W) -> Result<(), E> { + fn italic_beg(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "")?) } - fn handle_italic_end(&mut self, w: &mut W) -> Result<(), E> { + fn italic_end(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "")?) } - fn handle_strike_beg(&mut self, w: &mut W) -> Result<(), E> { + fn strike_beg(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "")?) } - fn handle_strike_end(&mut self, w: &mut W) -> Result<(), E> { + fn strike_end(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "")?) } - fn handle_underline_beg(&mut self, w: &mut W) -> Result<(), E> { + fn underline_beg(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "")?) } - fn handle_underline_end(&mut self, w: &mut W) -> Result<(), E> { + fn underline_end(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "")?) } - fn handle_verbatim(&mut self, w: &mut W, cont: &str) -> Result<(), E> { + fn verbatim(&mut self, w: &mut W, cont: &str) -> Result<(), E> { Ok(write!(w, "{}", Escape(cont))?) } - fn handle_code(&mut self, w: &mut W, cont: &str) -> Result<(), E> { + fn code(&mut self, w: &mut W, cont: &str) -> Result<(), E> { Ok(write!(w, "{}", Escape(cont))?) } - fn handle_text(&mut self, w: &mut W, cont: &str) -> Result<(), E> { + fn text(&mut self, w: &mut W, cont: &str) -> Result<(), E> { Ok(write!(w, "{}", Escape(cont))?) } } diff --git a/src/export/mod.rs b/src/export/mod.rs index f5a16fb..14c0943 100644 --- a/src/export/mod.rs +++ b/src/export/mod.rs @@ -4,67 +4,66 @@ macro_rules! handle_event { use crate::parser::Event::*; match $event { - HeadlineBeg(hdl) => $handler.handle_headline_beg($writer, hdl)?, - HeadlineEnd => $handler.handle_headline_end($writer)?, - SectionBeg => $handler.handle_section_beg($writer)?, - SectionEnd => $handler.handle_section_end($writer)?, - ParagraphBeg => $handler.handle_paragraph_beg($writer)?, - ParagraphEnd => $handler.handle_paragraph_end($writer)?, - CtrBlockBeg => $handler.handle_ctr_block_beg($writer)?, - CtrBlockEnd => $handler.handle_ctr_block_end($writer)?, - QteBlockBeg => $handler.handle_qte_block_beg($writer)?, - QteBlockEnd => $handler.handle_qte_block_end($writer)?, - SplBlockBeg { name, args } => $handler.handle_spl_block_beg($writer, name, args)?, - SplBlockEnd => $handler.handle_spl_block_end($writer)?, - CommentBlock { cont, args } => $handler.handle_comment_block($writer, cont, args)?, - ExampleBlock { cont, args } => $handler.handle_example_block($writer, cont, args)?, - ExportBlock { cont, args } => $handler.handle_export_block($writer, cont, args)?, - SrcBlock { cont, args } => $handler.handle_src_block($writer, cont, args)?, - VerseBlock { cont, args } => $handler.handle_verse_block($writer, cont, args)?, - DynBlockBeg { name, args } => $handler.handle_dyn_block_beg($writer, name, args)?, - DynBlockEnd => $handler.handle_dyn_block_end($writer)?, - ListBeg { ordered } => $handler.handle_list_beg($writer, ordered)?, - ListEnd { ordered } => $handler.handle_list_end($writer, ordered)?, - ListItemBeg { bullet } => $handler.handle_list_beg_item($writer, bullet)?, - ListItemEnd => $handler.handle_list_end_item($writer)?, - Call { value } => $handler.handle_call($writer, value)?, - Clock => $handler.handle_clock($writer)?, - Comment(c) => $handler.handle_comment($writer, c)?, - FixedWidth(f) => $handler.handle_fixed_width($writer, f)?, - TableStart => $handler.handle_table_start($writer)?, - TableEnd => $handler.handle_table_end($writer)?, - TableCell => $handler.handle_table_cell($writer)?, - LatexEnv => $handler.handle_latex_env($writer)?, - FnDef { label, cont } => $handler.handle_fn_def($writer, label, cont)?, - Keyword { key, value } => $handler.handle_keyword($writer, key, value)?, - Rule => $handler.handle_rule($writer)?, - Cookie(cookie) => $handler.handle_cookie($writer, cookie)?, - FnRef { label, def } => $handler.handle_fn_ref($writer, label, def)?, - InlineSrc { lang, option, body } => { - $handler.handle_inline_src($writer, lang, option, body)? - } + HeadlineBeg(hdl) => $handler.headline_beg($writer, hdl)?, + HeadlineEnd => $handler.headline_end($writer)?, + SectionBeg => $handler.section_beg($writer)?, + SectionEnd => $handler.section_end($writer)?, + ParagraphBeg => $handler.paragraph_beg($writer)?, + ParagraphEnd => $handler.paragraph_end($writer)?, + CtrBlockBeg => $handler.ctr_block_beg($writer)?, + CtrBlockEnd => $handler.ctr_block_end($writer)?, + QteBlockBeg => $handler.qte_block_beg($writer)?, + QteBlockEnd => $handler.qte_block_end($writer)?, + SplBlockBeg { name, args } => $handler.spl_block_beg($writer, name, args)?, + SplBlockEnd => $handler.spl_block_end($writer)?, + CommentBlock { cont, args } => $handler.comment_block($writer, cont, args)?, + ExampleBlock { cont, args } => $handler.example_block($writer, cont, args)?, + ExportBlock { cont, args } => $handler.export_block($writer, cont, args)?, + SrcBlock { cont, args } => $handler.src_block($writer, cont, args)?, + VerseBlock { cont, args } => $handler.verse_block($writer, cont, args)?, + DynBlockBeg { name, args } => $handler.dyn_block_beg($writer, name, args)?, + DynBlockEnd => $handler.dyn_block_end($writer)?, + ListBeg { ordered } => $handler.list_beg($writer, ordered)?, + ListEnd { ordered } => $handler.list_end($writer, ordered)?, + ListItemBeg { bullet } => $handler.list_beg_item($writer, bullet)?, + ListItemEnd => $handler.list_end_item($writer)?, + Call { value } => $handler.call($writer, value)?, + Clock => $handler.clock($writer)?, + Timestamp(t) => $handler.timestamp($writer, t)?, + Comment(c) => $handler.comment($writer, c)?, + FixedWidth(f) => $handler.fixed_width($writer, f)?, + TableStart => $handler.table_start($writer)?, + TableEnd => $handler.table_end($writer)?, + TableCell => $handler.table_cell($writer)?, + LatexEnv => $handler.latex_env($writer)?, + FnDef { label, cont } => $handler.fn_def($writer, label, cont)?, + Keyword { key, value } => $handler.keyword($writer, key, value)?, + Rule => $handler.rule($writer)?, + Cookie(cookie) => $handler.cookie($writer, cookie)?, + FnRef { label, def } => $handler.fn_ref($writer, label, def)?, + InlineSrc { lang, option, body } => $handler.inline_src($writer, lang, option, body)?, InlineCall { name, args, inside_header, end_header, - } => $handler.handle_inline_call($writer, name, args, inside_header, end_header)?, - Link { path, desc } => $handler.handle_link($writer, path, desc)?, - Macros { name, args } => $handler.handle_macros($writer, name, args)?, - RadioTarget { target } => $handler.handle_radio_target($writer, target)?, - Snippet { name, value } => $handler.handle_snippet($writer, name, value)?, - Target { target } => $handler.handle_target($writer, target)?, - BoldBeg => $handler.handle_bold_beg($writer)?, - BoldEnd => $handler.handle_bold_end($writer)?, - ItalicBeg => $handler.handle_italic_beg($writer)?, - ItalicEnd => $handler.handle_italic_end($writer)?, - StrikeBeg => $handler.handle_strike_beg($writer)?, - StrikeEnd => $handler.handle_strike_end($writer)?, - UnderlineBeg => $handler.handle_underline_beg($writer)?, - UnderlineEnd => $handler.handle_underline_end($writer)?, - Verbatim(cont) => $handler.handle_verbatim($writer, cont)?, - Code(cont) => $handler.handle_code($writer, cont)?, - Text(cont) => $handler.handle_text($writer, cont)?, + } => $handler.inline_call($writer, name, args, inside_header, end_header)?, + Link { path, desc } => $handler.link($writer, path, desc)?, + Macros { name, args } => $handler.macros($writer, name, args)?, + RadioTarget { target } => $handler.radio_target($writer, target)?, + Snippet { name, value } => $handler.snippet($writer, name, value)?, + Target { target } => $handler.target($writer, target)?, + BoldBeg => $handler.bold_beg($writer)?, + BoldEnd => $handler.bold_end($writer)?, + ItalicBeg => $handler.italic_beg($writer)?, + ItalicEnd => $handler.italic_end($writer)?, + StrikeBeg => $handler.strike_beg($writer)?, + StrikeEnd => $handler.strike_end($writer)?, + UnderlineBeg => $handler.underline_beg($writer)?, + UnderlineEnd => $handler.underline_end($writer)?, + Verbatim(cont) => $handler.verbatim($writer, cont)?, + Code(cont) => $handler.code($writer, cont)?, + Text(cont) => $handler.text($writer, cont)?, } }; } diff --git a/src/headline.rs b/src/headline.rs index 8865405..5d95f67 100644 --- a/src/headline.rs +++ b/src/headline.rs @@ -2,7 +2,7 @@ use memchr::{memchr, memchr2, memrchr}; -const HEADLINE_DEFAULT_KEYWORDS: &[&str] = +pub(crate) const DEFAULT_KEYWORDS: &[&str] = &["TODO", "DONE", "NEXT", "WAITING", "LATER", "CANCELLED"]; #[cfg_attr(test, derive(PartialEq))] @@ -21,28 +21,7 @@ pub struct Headline<'a> { } impl<'a> Headline<'a> { - /// parsing the input string and returning the parsed headline - /// and the content-begin and the end of headline container. - /// - /// ```rust - /// use orgize::headline::Headline; - /// - /// let (hdl, _, _) = Headline::parse("* DONE [#A] COMMENT Title :tag:a2%:"); - /// - /// assert_eq!(hdl.level, 1); - /// assert_eq!(hdl.priority, Some('A')); - /// assert_eq!(hdl.tags, Some(":tag:a2%:")); - /// assert_eq!(hdl.title, "COMMENT Title"); - /// assert_eq!(hdl.keyword, Some("DONE")); - /// ``` - pub fn parse(text: &'a str) -> (Headline<'a>, usize, usize) { - Self::parse_with_keywords(text, HEADLINE_DEFAULT_KEYWORDS) - } - - pub fn parse_with_keywords( - text: &'a str, - keywords: &'a [&'a str], - ) -> (Headline<'a>, usize, usize) { + pub(crate) fn parse(text: &'a str, keywords: &'a [&'a str]) -> (Headline<'a>, usize, usize) { let level = memchr2(b'\n', b' ', text.as_bytes()).unwrap_or_else(|| text.len()); debug_assert!(level > 0); @@ -118,7 +97,7 @@ impl<'a> Headline<'a> { ) } - pub fn find_level(text: &str, level: usize) -> usize { + pub(crate) fn find_level(text: &str, level: usize) -> usize { use jetscii::ByteSubstring; let bytes = text.as_bytes(); @@ -159,12 +138,12 @@ impl<'a> Headline<'a> { #[cfg(test)] mod tests { - use super::Headline; + use super::*; #[test] fn parse() { assert_eq!( - Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:").0, + Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:", DEFAULT_KEYWORDS).0, Headline { level: 4, priority: Some('A'), @@ -174,7 +153,7 @@ mod tests { }, ); assert_eq!( - Headline::parse("**** ToDO [#A] COMMENT Title :tag:a2%:").0, + Headline::parse("**** ToDO [#A] COMMENT Title :tag:a2%:", DEFAULT_KEYWORDS).0, Headline { level: 4, priority: None, @@ -184,7 +163,7 @@ mod tests { }, ); assert_eq!( - Headline::parse("**** T0DO [#A] COMMENT Title :tag:a2%:").0, + Headline::parse("**** T0DO [#A] COMMENT Title :tag:a2%:", DEFAULT_KEYWORDS).0, Headline { level: 4, priority: None, @@ -194,7 +173,7 @@ mod tests { }, ); assert_eq!( - Headline::parse("**** TODO [#1] COMMENT Title :tag:a2%:").0, + Headline::parse("**** TODO [#1] COMMENT Title :tag:a2%:", DEFAULT_KEYWORDS).0, Headline { level: 4, priority: None, @@ -204,7 +183,7 @@ mod tests { }, ); assert_eq!( - Headline::parse("**** TODO [#a] COMMENT Title :tag:a2%:").0, + Headline::parse("**** TODO [#a] COMMENT Title :tag:a2%:", DEFAULT_KEYWORDS).0, Headline { level: 4, priority: None, @@ -214,7 +193,7 @@ mod tests { }, ); assert_eq!( - Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%").0, + Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%", DEFAULT_KEYWORDS).0, Headline { level: 4, priority: Some('A'), @@ -224,7 +203,7 @@ mod tests { }, ); assert_eq!( - Headline::parse("**** TODO [#A] COMMENT Title tag:a2%:").0, + Headline::parse("**** TODO [#A] COMMENT Title tag:a2%:", DEFAULT_KEYWORDS).0, Headline { level: 4, priority: Some('A'), @@ -234,7 +213,7 @@ mod tests { }, ); assert_eq!( - Headline::parse("**** COMMENT Title tag:a2%:").0, + Headline::parse("**** COMMENT Title tag:a2%:", DEFAULT_KEYWORDS).0, Headline { level: 4, priority: None, @@ -245,7 +224,7 @@ mod tests { ); assert_eq!( - Headline::parse_with_keywords("**** TODO [#A] COMMENT Title :tag:a2%:", &[]).0, + Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:", &[]).0, Headline { level: 4, priority: None, @@ -255,7 +234,7 @@ mod tests { }, ); assert_eq!( - Headline::parse_with_keywords("**** TASK [#A] COMMENT Title :tag:a2%:", &["TASK"]).0, + Headline::parse("**** TASK [#A] COMMENT Title :tag:a2%:", &["TASK"]).0, Headline { level: 4, priority: Some('A'), @@ -268,21 +247,43 @@ mod tests { #[test] fn is_commented() { - assert!(Headline::parse("* COMMENT Title").0.is_commented()); - assert!(!Headline::parse("* Title").0.is_commented()); - assert!(!Headline::parse("* C0MMENT Title").0.is_commented()); - assert!(!Headline::parse("* comment Title").0.is_commented()); + assert!(Headline::parse("* COMMENT Title", DEFAULT_KEYWORDS) + .0 + .is_commented()); + assert!(!Headline::parse("* Title", DEFAULT_KEYWORDS) + .0 + .is_commented()); + assert!(!Headline::parse("* C0MMENT Title", DEFAULT_KEYWORDS) + .0 + .is_commented()); + assert!(!Headline::parse("* comment Title", DEFAULT_KEYWORDS) + .0 + .is_commented()); } #[test] fn is_archived() { - assert!(Headline::parse("* Title :ARCHIVE:").0.is_archived()); - assert!(Headline::parse("* Title :tag:ARCHIVE:").0.is_archived()); - assert!(Headline::parse("* Title :ARCHIVE:tag:").0.is_archived()); - assert!(!Headline::parse("* Title").0.is_commented()); - assert!(!Headline::parse("* Title :ARCHIVED:").0.is_archived()); - assert!(!Headline::parse("* Title :ARCHIVES:").0.is_archived()); - assert!(!Headline::parse("* Title :archive:").0.is_archived()); + assert!(Headline::parse("* Title :ARCHIVE:", DEFAULT_KEYWORDS) + .0 + .is_archived()); + assert!(Headline::parse("* Title :tag:ARCHIVE:", DEFAULT_KEYWORDS) + .0 + .is_archived()); + assert!(Headline::parse("* Title :ARCHIVE:tag:", DEFAULT_KEYWORDS) + .0 + .is_archived()); + assert!(!Headline::parse("* Title", DEFAULT_KEYWORDS) + .0 + .is_commented()); + assert!(!Headline::parse("* Title :ARCHIVED:", DEFAULT_KEYWORDS) + .0 + .is_archived()); + assert!(!Headline::parse("* Title :ARCHIVES:", DEFAULT_KEYWORDS) + .0 + .is_archived()); + assert!(!Headline::parse("* Title :archive:", DEFAULT_KEYWORDS) + .0 + .is_archived()); } #[test] diff --git a/src/lib.rs b/src/lib.rs index 4c79565..4ecf830 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -59,7 +59,7 @@ //! struct CustomHtmlHandler; //! //! impl HtmlHandler for CustomHtmlHandler { -//! fn handle_headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<()> { +//! fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<()> { //! write!( //! w, //! r##"{2}"##, @@ -90,9 +90,6 @@ //! let result = String::from_utf8(cursor.into_inner()).expect("invalid utf-8"); //! ``` -#[macro_use] -mod utils; - pub mod elements; pub mod export; pub mod headline; diff --git a/src/objects/macros.rs b/src/objects/macros.rs index 10fe950..5ae4e19 100644 --- a/src/objects/macros.rs +++ b/src/objects/macros.rs @@ -6,9 +6,11 @@ use memchr::memchr2; pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize)> { debug_assert!(text.starts_with("{{{")); - expect!(text, 3, |c: u8| c.is_ascii_alphabetic())?; - let bytes = text.as_bytes(); + if text.len() <= 3 || !bytes[3].is_ascii_alphabetic() { + return None; + } + let (name, off) = memchr2(b'}', b'(', bytes) .filter(|&i| { bytes[3..i] @@ -18,8 +20,9 @@ pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize)> { .map(|i| (&text[3..i], i))?; let (args, off) = if bytes[off] == b'}' { - expect!(text, off + 1, b'}')?; - expect!(text, off + 2, b'}')?; + if text.len() <= off + 2 || bytes[off + 1] != b'}' || bytes[off + 2] != b'}' { + return None; + } (None, off + 3 /* }}} */) } else { Substring::new(")}}}") diff --git a/src/objects/mod.rs b/src/objects/mod.rs index 8c4606e..81b0465 100644 --- a/src/objects/mod.rs +++ b/src/objects/mod.rs @@ -1,200 +1,14 @@ -mod cookie; -mod emphasis; -mod fn_ref; -mod inline_call; -mod inline_src; -mod link; -mod macros; -mod radio_target; -mod snippet; -mod target; +pub(crate) mod cookie; +pub(crate) mod emphasis; +pub(crate) mod fn_ref; +pub(crate) mod inline_call; +pub(crate) mod inline_src; +pub(crate) mod link; +pub(crate) mod macros; +pub(crate) mod radio_target; +pub(crate) mod snippet; +pub(crate) mod target; +pub(crate) mod timestamp; pub use self::cookie::Cookie; -use jetscii::bytes; - -#[cfg_attr(test, derive(PartialEq, Debug))] -pub enum Object<'a> { - Cookie(Cookie<'a>), - FnRef { - label: Option<&'a str>, - def: Option<&'a str>, - }, - InlineCall { - name: &'a str, - args: &'a str, - inside_header: Option<&'a str>, - end_header: Option<&'a str>, - }, - InlineSrc { - lang: &'a str, - option: Option<&'a str>, - body: &'a str, - }, - Link { - path: &'a str, - desc: Option<&'a str>, - }, - Macros { - name: &'a str, - args: Option<&'a str>, - }, - RadioTarget { - target: &'a str, - }, - Snippet { - name: &'a str, - value: &'a str, - }, - Target { - target: &'a str, - }, - - // `end` indicates the position of the second marker - Bold { - end: usize, - }, - Italic { - end: usize, - }, - Strike { - end: usize, - }, - Underline { - end: usize, - }, - - Verbatim(&'a str), - Code(&'a str), - Text(&'a str), -} - -pub fn parse(src: &str) -> (Object<'_>, usize, Option<(Object<'_>, usize)>) { - let bytes = src.as_bytes(); - let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'['); - - let mut pos = 0; - while let Some(off) = if pos == 0 { - Some(0) - } else { - bs.find(&bytes[pos..]) - } { - pos += off; - - if src.len() - pos < 3 { - return (Object::Text(src), src.len(), None); - } - - macro_rules! brk { - ($obj:expr, $off:expr, $pos:expr) => { - return if $pos == 0 { - ($obj, $off, None) - } else { - (Object::Text(&src[0..$pos]), $pos, Some(($obj, $off))) - }; - }; - } - - let tail = &src[pos..]; - match bytes[pos] { - b'@' if bytes[pos + 1] == b'@' => { - if let Some((name, value, off)) = snippet::parse(tail) { - brk!(Object::Snippet { name, value }, off, pos); - } - } - b'{' if bytes[pos + 1] == b'{' && bytes[pos + 2] == b'{' => { - if let Some((name, args, off)) = macros::parse(tail) { - brk!(Object::Macros { name, args }, off, pos); - } - } - b'<' if bytes[pos + 1] == b'<' => { - if bytes[pos + 2] == b'<' { - if let Some((target, off)) = radio_target::parse(tail) { - brk!(Object::RadioTarget { target }, off, pos); - } - } else if bytes[pos + 2] != b'\n' { - if let Some((target, off)) = target::parse(tail) { - brk!(Object::Target { target }, off, pos); - } - } - } - b'[' => { - if tail[1..].starts_with("fn:") { - if let Some((label, def, off)) = fn_ref::parse(tail) { - brk!(Object::FnRef { label, def }, off, pos); - } - } - - if bytes[pos + 1] == b'[' { - if let Some((path, desc, off)) = link::parse(tail) { - brk!(Object::Link { path, desc }, off, pos); - } - } - - if let Some((cookie, off)) = cookie::parse(tail) { - brk!(Object::Cookie(cookie), off, pos); - } - // TODO: Timestamp - } - b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => { - if let Some((obj, off)) = parse_text_markup(&tail[1..]) { - brk!(obj, off, pos + 1); - } - } - _ => { - if let Some((obj, off)) = parse_text_markup(tail) { - brk!(obj, off, pos); - } - } - } - - pos += 1; - } - - (Object::Text(src), src.len(), None) -} - -fn parse_text_markup(src: &str) -> Option<(Object<'_>, usize)> { - match src.as_bytes()[0] { - b'*' => emphasis::parse(src, b'*').map(|end| (Object::Bold { end }, 1)), - b'+' => emphasis::parse(src, b'+').map(|end| (Object::Strike { end }, 1)), - b'/' => emphasis::parse(src, b'/').map(|end| (Object::Italic { end }, 1)), - b'_' => emphasis::parse(src, b'_').map(|end| (Object::Underline { end }, 1)), - b'=' => emphasis::parse(src, b'=').map(|end| (Object::Verbatim(&src[1..end]), end + 1)), - b'~' => emphasis::parse(src, b'~').map(|end| (Object::Code(&src[1..end]), end + 1)), - b's' if src.starts_with("src_") => inline_src::parse(src) - .map(|(lang, option, body, off)| (Object::InlineSrc { lang, option, body }, off)), - b'c' if src.starts_with("call_") => { - inline_call::parse(src).map(|(name, args, inside_header, end_header, off)| { - ( - Object::InlineCall { - name, - args, - inside_header, - end_header, - }, - off, - ) - }) - } - _ => None, - } -} - -#[cfg(test)] -mod tests { - #[test] - fn parse() { - use super::*; - - assert_eq!(parse("*bold*"), (Object::Bold { end: 5 }, 1, None)); - assert_eq!( - parse("Normal =verbatim="), - ( - Object::Text("Normal "), - "Normal ".len(), - Some((Object::Verbatim("verbatim"), "=verbatim=".len())) - ) - ); - // TODO: more tests - } -} +pub use self::timestamp::*; diff --git a/src/objects/timestamp.rs b/src/objects/timestamp.rs index 8856f98..567d260 100644 --- a/src/objects/timestamp.rs +++ b/src/objects/timestamp.rs @@ -1,9 +1,401 @@ +use memchr::memchr; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] -pub struct Time<'a> { - pub date: &'a str, +pub struct Datetime { + pub date: (u16, u8, u8), + pub time: Option<(u8, u8)>, } -pub enum Timestamp<'a> { - ActiveRange, +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub enum RepeaterType { + Cumulate, + CatchUp, + Restart, +} + +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub enum DelayType { + All, + First, +} + +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub enum TimeUnit { + Hour, + Day, + Week, + Month, + Year, +} + +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub struct Repeater { + pub ty: RepeaterType, + pub value: usize, + pub unit: TimeUnit, +} + +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub struct Delay { + pub ty: DelayType, + pub value: usize, + pub unit: TimeUnit, +} + +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub enum Timestamp<'a> { + Active { + start: Datetime, + repeater: Option, + delay: Option, + }, + Inactive { + start: Datetime, + repeater: Option, + delay: Option, + }, + ActiveRange { + start: Datetime, + end: Datetime, + repeater: Option, + delay: Option, + }, + InactiveRange { + start: Datetime, + end: Datetime, + repeater: Option, + delay: Option, + }, + Diary(&'a str), +} + +pub fn parse_active(text: &str) -> Option<(Timestamp<'_>, usize)> { + debug_assert!(text.starts_with('<')); + + let bytes = text.as_bytes(); + let mut off = memchr(b'>', bytes)?; + let (start, mut end) = parse_datetime(&bytes[1..off])?; + if end.is_none() + && off <= text.len() - 14 /* -- */ + && text[off + 1..].starts_with("--<") + { + if let Some(new_off) = memchr(b'>', &bytes[off + 1..]) { + if let Some((start, _)) = parse_datetime(&bytes[off + 4..off + 1 + new_off]) { + end = Some(start); + off += new_off + 1; + } + } + } + + Some(( + if let Some(end) = end { + Timestamp::ActiveRange { + start, + end, + repeater: None, + delay: None, + } + } else { + Timestamp::Active { + start, + repeater: None, + delay: None, + } + }, + off + 1, + )) +} + +pub fn parse_inactive(text: &str) -> Option<(Timestamp<'_>, usize)> { + debug_assert!(text.starts_with('[')); + + let bytes = text.as_bytes(); + let mut off = memchr(b']', bytes)?; + let (start, mut end) = parse_datetime(&bytes[1..off])?; + if end.is_none() + && off <= text.len() - 14 /* --[YYYY-MM-DD] */ + && text[off + 1..].starts_with("--[") + { + if let Some(new_off) = memchr(b']', &bytes[off + 1..]) { + if let Some((start, _)) = parse_datetime(&bytes[off + 4..off + 1 + new_off]) { + end = Some(start); + off += new_off + 1; + } + } + } + + Some(( + if let Some(end) = end { + Timestamp::InactiveRange { + start, + end, + repeater: None, + delay: None, + } + } else { + Timestamp::Inactive { + start, + repeater: None, + delay: None, + } + }, + off + 1, + )) +} + +fn parse_datetime(bytes: &[u8]) -> Option<(Datetime, Option)> { + if !bytes[0].is_ascii_digit() || !bytes[bytes.len() - 1].is_ascii_alphanumeric() { + return None; + } + + // similar to str::split_ascii_whitespace, but for &[u8] + let mut words = bytes + .split(u8::is_ascii_whitespace) + .filter(|s| !s.is_empty()); + + let date = words + .next() + .filter(|word| { + word.len() == 10 /* YYYY-MM-DD */ + && word[0..4].iter().all(u8::is_ascii_digit) + && word[4] == b'-' + && word[5..7].iter().all(u8::is_ascii_digit) + && word[7] == b'-' + && word[8..10].iter().all(u8::is_ascii_digit) + }) + .map(|word| { + ( + (u16::from(word[0]) - u16::from(b'0')) * 1000 + + (u16::from(word[1]) - u16::from(b'0')) * 100 + + (u16::from(word[2]) - u16::from(b'0')) * 10 + + (u16::from(word[3]) - u16::from(b'0')), + (word[5] - b'0') * 10 + (word[6] - b'0'), + (word[8] - b'0') * 10 + (word[9] - b'0'), + ) + })?; + + let _dayname = words.next().filter(|word| { + word.iter().all(|&c| { + !(c == b'+' || c == b'-' || c == b']' || c == b'>' || c.is_ascii_digit() || c == b'\n') + }) + })?; + + let (start, end) = if let Some(word) = words.next() { + macro_rules! datetime { + ($a:expr, $b:expr, $c:expr) => { + Datetime { + date, + time: Some((word[$a] - b'0', (word[$b] - b'0') * 10 + (word[$c] - b'0'))), + } + }; + ($a:expr, $b:expr, $c:expr, $d:expr) => { + Datetime { + date, + time: Some(( + (word[$a] - b'0') * 10 + (word[$b] - b'0'), + (word[$c] - b'0') * 10 + (word[$d] - b'0'), + )), + } + }; + } + + if word.len() == 4 // H:MM + && word[0].is_ascii_digit() + && word[1] == b':' + && word[2..4].iter().all(u8::is_ascii_digit) + { + (datetime!(0, 2, 3), None) + } else if word.len() == 5 // HH:MM + && word[0..2].iter().all(u8::is_ascii_digit) + && word[2] == b':' + && word[3..5].iter().all(u8::is_ascii_digit) + { + (datetime!(0, 1, 3, 4), None) + } else if word.len() == 9 // H:MM-H:MM + && word[0].is_ascii_digit() + && word[1] == b':' + && word[2..4].iter().all(u8::is_ascii_digit) + && word[4] == b'-' + && word[5].is_ascii_digit() + && word[6] == b':' + && word[7..9].iter().all(u8::is_ascii_digit) + { + (datetime!(0, 2, 3), Some(datetime!(5, 7, 8))) + } else if word.len() == 10 // H:MM-HH:MM + && word[0].is_ascii_digit() + && word[1] == b':' + && word[2..4].iter().all(u8::is_ascii_digit) + && word[4] == b'-' + && word[5..7].iter().all(u8::is_ascii_digit) + && word[7] == b':' + && word[8..10].iter().all(u8::is_ascii_digit) + { + (datetime!(0, 2, 3), Some(datetime!(5, 6, 8, 9))) + } else if word.len() == 10 // HH:MM-H:MM + && word[0..2].iter().all(u8::is_ascii_digit) + && word[2] == b':' + && word[3..5].iter().all(u8::is_ascii_digit) + && word[5] == b'-' + && word[6].is_ascii_digit() + && word[7] == b':' + && word[8..10].iter().all(u8::is_ascii_digit) + { + (datetime!(0, 1, 3, 4), Some(datetime!(6, 8, 9))) + } else if word.len() == 11 // HH:MM-HH:MM + && word[0..2].iter().all(u8::is_ascii_digit) + && word[2] == b':' + && word[3..5].iter().all(u8::is_ascii_digit) + && word[5] == b'-' + && word[6..8].iter().all(u8::is_ascii_digit) + && word[8] == b':' + && word[9..11].iter().all(u8::is_ascii_digit) + { + (datetime!(0, 1, 3, 4), Some(datetime!(6, 7, 9, 10))) + } else { + return None; + } + } else { + (Datetime { date, time: None }, None) + }; + + // TODO: repeater and delay + if words.next().is_some() { + None + } else { + Some((start, end)) + } +} + +pub fn parse_diary(text: &str) -> Option<(Timestamp<'_>, usize)> { + debug_assert!(text.starts_with('<')); + + if text.len() <= 6 /* <%%()> */ || &text[1..4] != "%%(" { + return None; + } + + let bytes = text.as_bytes(); + + memchr(b'>', bytes) + .filter(|i| bytes[i - 1] == b')' && bytes[4..i - 1].iter().all(|&c| c != b'\n')) + .map(|i| (Timestamp::Diary(&text[4..i - 1]), i)) +} + +#[cfg(test)] +mod tests { + #[test] + fn parse_range() { + use super::*; + + assert_eq!( + parse_inactive("[2003-09-16 Tue]"), + Some(( + Timestamp::Inactive { + start: Datetime { + date: (2003, 9, 16), + time: None + }, + repeater: None, + delay: None, + }, + "[2003-09-16 Tue]".len() + )) + ); + assert_eq!( + parse_inactive("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]"), + Some(( + Timestamp::InactiveRange { + start: Datetime { + date: (2003, 9, 16), + time: Some((9, 39)) + }, + end: Datetime { + date: (2003, 9, 16), + time: Some((10, 39)) + }, + repeater: None, + delay: None + }, + "[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]".len() + )) + ); + assert_eq!( + parse_active("<2003-09-16 Tue 09:39-10:39>"), + Some(( + Timestamp::ActiveRange { + start: Datetime { + date: (2003, 9, 16), + time: Some((9, 39)) + }, + end: Datetime { + date: (2003, 9, 16), + time: Some((10, 39)) + }, + repeater: None, + delay: None + }, + "<2003-09-16 Tue 09:39-10:39>".len() + )) + ); + } + + #[test] + fn parse_datetime() { + use super::*; + + assert_eq!( + parse_datetime(b"2003-09-16 Tue"), + Some(( + Datetime { + date: (2003, 9, 16), + time: None + }, + None + )) + ); + assert_eq!( + parse_datetime(b"2003-09-16 Tue 9:39"), + Some(( + Datetime { + date: (2003, 9, 16), + time: Some((9, 39)) + }, + None + )) + ); + assert_eq!( + parse_datetime(b"2003-09-16 Tue 09:39"), + Some(( + Datetime { + date: (2003, 9, 16), + time: Some((9, 39)) + }, + None + )) + ); + assert_eq!( + parse_datetime(b"2003-09-16 Tue 9:39-10:39"), + Some(( + Datetime { + date: (2003, 9, 16), + time: Some((9, 39)) + }, + Some(Datetime { + date: (2003, 9, 16), + time: Some((10, 39)) + }), + )) + ); + + assert_eq!(parse_datetime(b"2003-9-16 Tue"), None); + assert_eq!(parse_datetime(b"2003-09-16"), None); + assert_eq!(parse_datetime(b"2003-09-16 09:39"), None); + assert_eq!(parse_datetime(b"2003-09-16 Tue 0939"), None); + } } diff --git a/src/parser.rs b/src/parser.rs index 2a97195..4bfab96 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,8 +1,8 @@ //! Parser -use crate::elements::{self, *}; -use crate::headline::*; -use crate::objects::{self, *}; +use crate::{elements::*, headline::*, objects::*}; +use jetscii::bytes; +use memchr::memchr_iter; #[cfg_attr(test, derive(PartialEq))] #[derive(Copy, Clone, Debug)] @@ -105,6 +105,7 @@ pub enum Event<'a> { }, Rule, + Timestamp(Timestamp<'a>), Cookie(Cookie<'a>), FnRef { label: Option<&'a str>, @@ -158,10 +159,9 @@ pub struct Parser<'a> { text: &'a str, stack: Vec<(Container, usize, usize)>, off: usize, - ele_buf: Option<(Element<'a>, usize)>, - obj_buf: Option<(Object<'a>, usize)>, - keywords: Option<&'a [&'a str]>, - + ele_buf: Option<(Event<'a>, usize, usize, usize)>, + obj_buf: Option<(Event<'a>, usize, usize, usize)>, + keywords: &'a [&'a str], list_more_item: bool, } @@ -175,7 +175,7 @@ impl<'a> Parser<'a> { ele_buf: None, obj_buf: None, list_more_item: false, - keywords: None, + keywords: DEFAULT_KEYWORDS, } } @@ -190,15 +190,14 @@ impl<'a> Parser<'a> { } pub fn set_keywords(&mut self, keywords: &'a [&'a str]) { - self.keywords = Some(keywords) + self.keywords = keywords; } fn next_section_or_headline(&mut self) -> Event<'a> { let end = Headline::find_level(&self.text[self.off..], std::usize::MAX); debug_assert!(end <= self.text[self.off..].len()); if end != 0 { - self.stack - .push((Container::Section, self.off + end, self.off + end)); + self.push_stack(Container::Section, end, end); Event::SectionBeg } else { self.next_headline() @@ -206,165 +205,286 @@ impl<'a> Parser<'a> { } fn next_headline(&mut self) -> Event<'a> { - let (hdl, off, end) = if let Some(keywords) = self.keywords { - Headline::parse_with_keywords(&self.text[self.off..], keywords) - } else { - Headline::parse(&self.text[self.off..]) - }; + let (hdl, off, end) = Headline::parse(&self.text[self.off..], self.keywords); debug_assert!(end <= self.text[self.off..].len()); - self.stack.push(( - Container::Headline(self.off + off), - self.off + end, - self.off + end, - )); + self.push_stack(Container::Headline(self.off + off), end, end); self.off += off; Event::HeadlineBeg(hdl) } - fn next_ele(&mut self, end: usize) -> Event<'a> { - let text = &self.text[self.off..end]; - let (ele, off) = self.ele_buf.take().unwrap_or_else(|| { - let (ele, off, next_ele) = elements::parse(text); - self.ele_buf = next_ele; - (ele, off) - }); + fn next_ele(&mut self, text: &'a str) -> Event<'a> { + let (ele, off, limit, end) = self + .ele_buf + .take() + .or_else(|| self.real_next_ele(text)) + .unwrap_or_else(|| { + let len = text.len(); + let start = text.find(|c| c != '\n').unwrap_or(0); + if start == len - 1 { + (self.end(), len, 0, 0) + } else { + let mut pos = start; + for off in memchr_iter(b'\n', &text.as_bytes()[start..]) { + if text[pos..off + start].trim().is_empty() { + return (Event::ParagraphBeg, start, pos, off + start); + } else { + pos = off + start; + if let Some(buf) = self.real_next_ele(&text[pos + 1..]) { + self.ele_buf = Some(buf); + return (Event::ParagraphBeg, start, pos, pos); + } + } + } + ( + Event::ParagraphBeg, + start, + if text.ends_with('\n') { len - 1 } else { len }, + len, + ) + } + }); - debug_assert!(off <= text.len()); + debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len()); + + match ele { + Event::ParagraphBeg => self.push_stack(Container::Paragraph, limit, end), + Event::QteBlockBeg => self.push_stack(Container::QteBlock, limit, end), + Event::CtrBlockBeg => self.push_stack(Container::CtrBlock, limit, end), + Event::SplBlockBeg { .. } => self.push_stack(Container::SplBlock, limit, end), + Event::DynBlockBeg { .. } => self.push_stack(Container::DynBlock, limit, end), + Event::ListBeg { ordered, .. } => { + self.push_stack(Container::List(limit, ordered), end, end); + self.list_more_item = true; + } + _ => (), + } self.off += off; - match ele { - Element::Paragraph { cont_end, end } => { - debug_assert!(cont_end <= text.len() && end <= text.len()); - self.stack - .push((Container::Paragraph, cont_end + self.off, end + self.off)); - Event::ParagraphBeg + ele + } + + // returns (event, offset, container limit, container end) + fn real_next_ele(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { + if text.starts_with("[fn:") { + if let Some((label, cont, off)) = fn_def::parse(text) { + return Some((Event::FnDef { label, cont }, off + 1, 0, 0)); } - Element::QteBlock { end, cont_end, .. } => { - debug_assert!(cont_end <= text.len() && end <= text.len()); - self.stack - .push((Container::QteBlock, cont_end + self.off, end + self.off)); - Event::QteBlockBeg + } + + let (tail, line_begin) = text + .find(|c| c != ' ') + .map(|off| (&text[off..], off)) + .unwrap_or((text, 0)); + + let (is_item, ordered) = list::is_item(tail); + if is_item { + return Some((Event::ListBeg { ordered }, 0, line_begin, text.len())); + } + + // TODO: LaTeX environment + if tail.starts_with("\\begin{") {} + + // rule + if tail.starts_with("-----") { + let off = rule::parse(tail); + if off != 0 { + return Some((Event::Rule, off, 0, 0)); } - Element::CtrBlock { end, cont_end, .. } => { - debug_assert!(cont_end <= text.len() && end <= text.len()); - self.stack - .push((Container::CtrBlock, cont_end + self.off, end + self.off)); - Event::CtrBlockBeg - } - Element::SplBlock { - name, - args, - end, - cont_end, - } => { - debug_assert!(cont_end <= text.len() && end <= text.len()); - self.stack - .push((Container::SplBlock, cont_end + self.off, end + self.off)); - Event::SplBlockBeg { name, args } - } - Element::DynBlock { - name, - args, - cont_end, - end, - } => { - debug_assert!(cont_end <= text.len() && end <= text.len()); - self.stack - .push((Container::DynBlock, cont_end + self.off, end + self.off)); - Event::DynBlockBeg { name, args } - } - Element::List { ident, ordered } => { - self.stack.push((Container::List(ident, ordered), end, end)); - self.list_more_item = true; - Event::ListBeg { ordered } - } - Element::Call { value } => Event::Call { value }, - Element::Comment(c) => Event::Comment(c), - Element::CommentBlock { args, cont } => Event::CommentBlock { args, cont }, - Element::ExampleBlock { args, cont } => Event::ExampleBlock { args, cont }, - Element::ExportBlock { args, cont } => Event::ExportBlock { args, cont }, - Element::FixedWidth(f) => Event::FixedWidth(f), - Element::FnDef { label, cont } => Event::FnDef { label, cont }, - Element::Keyword { key, value } => Event::Keyword { key, value }, - Element::Rule => Event::Rule, - Element::SrcBlock { args, cont } => Event::SrcBlock { args, cont }, - Element::VerseBlock { args, cont } => Event::VerseBlock { args, cont }, - Element::Empty => self.end(), + } + + // fixed width + if tail.starts_with(": ") || tail.starts_with(":\n") { + // let end = line_ends + // .skip_while(|&i| { + // text[i + 1..].starts_with(": ") || text[i + 1..].starts_with(":\n") + // }) + // .next() + // .map(|i| i + 1) + // .unwrap_or_else(|| text.len()); + // let off = end - pos; + // brk!(Element::FixedWidth(&tail[0..off]), off); + } + + // comment + if tail.starts_with("# ") || tail.starts_with("#\n") { + // let end = line_ends + // .skip_while(|&i| { + // text[i + 1..].starts_with("# ") || text[i + 1..].starts_with("#\n") + // }) + // .next() + // .map(|i| i + 1) + // .unwrap_or_else(|| text.len()); + // let off = end - pos; + // brk!(Element::Comment(&tail[0..off]), off); + } + + if tail.starts_with("#+") { + block::parse(tail) + .map(|(name, args, begin, limit, end)| { + let cont = &tail[begin..limit]; + match &*name.to_uppercase() { + "COMMENT" => (Event::CommentBlock { args, cont }, end, 0, 0), + "EXAMPLE" => (Event::ExampleBlock { args, cont }, end, 0, 0), + "EXPORT" => (Event::ExportBlock { args, cont }, end, 0, 0), + "SRC" => (Event::SrcBlock { args, cont }, end, 0, 0), + "VERSE" => (Event::VerseBlock { args, cont }, end, 0, 0), + "CENTER" => (Event::CtrBlockBeg, begin, limit, end), + "QUOTE" => (Event::QteBlockBeg, begin, limit, end), + _ => (Event::SplBlockBeg { name, args }, begin, limit, end), + } + }) + .or_else(|| { + dyn_block::parse(tail).map(|(name, args, begin, limit, end)| { + (Event::DynBlockBeg { name, args }, begin, limit, end) + }) + }) + .or_else(|| { + keyword::parse(tail).map(|(key, value, off)| { + if let Key::Call = key { + (Event::Call { value }, off, 0, 0) + } else { + (Event::Keyword { key, value }, off, 0, 0) + } + }) + }) + } else { + None } } - fn next_obj(&mut self, end: usize) -> Event<'a> { - let text = &self.text[self.off..end]; - let (obj, off) = self.obj_buf.take().unwrap_or_else(|| { - let (obj, off, next_obj) = objects::parse(text); - self.obj_buf = next_obj; - (obj, off) - }); + fn next_obj(&mut self, text: &'a str) -> Event<'a> { + let (obj, off, limit, end) = self + .obj_buf + .take() + .or_else(|| self.real_next_obj(text)) + .unwrap_or_else(|| { + let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'['); + let bytes = text.as_bytes(); + let mut pos = 0; - debug_assert!(off <= text.len()); + while let Some(off) = bs.find(&bytes[pos..]) { + pos += off + 1; + + if let Some(buf) = self.real_next_obj(&text[pos..]) { + self.obj_buf = Some(buf); + return (Event::Text(&text[0..pos]), pos, 0, 0); + } + } + + (Event::Text(text), text.len(), 0, 0) + }); + + debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len()); self.off += off; match obj { - Object::Underline { end } => { - debug_assert!(end <= text.len()); - self.stack - .push((Container::Underline, end + self.off - 1, end + self.off)); - Event::UnderlineBeg + Event::UnderlineBeg => self.push_stack(Container::Underline, limit, end), + Event::StrikeBeg => self.push_stack(Container::Strike, limit, end), + Event::ItalicBeg => self.push_stack(Container::Italic, limit, end), + Event::BoldBeg => self.push_stack(Container::Bold, limit, end), + _ => (), + } + + obj + } + + fn real_next_obj(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { + if text.len() < 3 { + return None; + } + + let bytes = text.as_bytes(); + match bytes[0] { + b'@' if bytes[1] == b'@' => snippet::parse(text) + .map(|(name, value, off)| (Event::Snippet { name, value }, off, 0, 0)), + b'{' if bytes[1] == b'{' && bytes[2] == b'{' => macros::parse(text) + .map(|(name, args, off)| (Event::Macros { name, args }, off, 0, 0)), + b'<' if bytes[1] == b'<' => { + if bytes[2] == b'<' { + radio_target::parse(text) + .map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0)) + } else { + target::parse(text).map(|(target, off)| (Event::Target { target }, off, 0, 0)) + } } - Object::Strike { end } => { - debug_assert!(end <= text.len()); - self.stack - .push((Container::Strike, end + self.off - 1, end + self.off)); - Event::StrikeBeg + b'<' => timestamp::parse_active(text) + .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)) + .or_else(|| { + timestamp::parse_diary(text) + .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)) + }), + b'[' => { + if text[1..].starts_with("fn:") { + fn_ref::parse(text) + .map(|(label, def, off)| (Event::FnRef { label, def }, off, 0, 0)) + } else if bytes[1] == b'[' { + link::parse(text) + .map(|(path, desc, off)| (Event::Link { path, desc }, off, 0, 0)) + } else { + cookie::parse(text) + .map(|(cookie, off)| (Event::Cookie(cookie), off, 0, 0)) + .or_else(|| { + timestamp::parse_inactive(text) + .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)) + }) + } } - Object::Italic { end } => { - debug_assert!(end <= text.len()); - self.stack - .push((Container::Italic, end + self.off - 1, end + self.off)); - Event::ItalicBeg - } - Object::Bold { end } => { - debug_assert!(end <= text.len()); - self.stack - .push((Container::Bold, end + self.off - 1, end + self.off)); - Event::BoldBeg - } - Object::Code(c) => Event::Code(c), - Object::Cookie(c) => Event::Cookie(c), - Object::FnRef { label, def } => Event::FnRef { label, def }, - Object::InlineCall { - name, - args, - inside_header, - end_header, - } => Event::InlineCall { - name, - args, - inside_header, - end_header, - }, - Object::InlineSrc { lang, option, body } => Event::InlineSrc { lang, option, body }, - Object::Link { path, desc } => Event::Link { path, desc }, - Object::Macros { name, args } => Event::Macros { name, args }, - Object::RadioTarget { target } => Event::RadioTarget { target }, - Object::Snippet { name, value } => Event::Snippet { name, value }, - Object::Target { target } => Event::Target { target }, - Object::Text(t) => Event::Text(t), - Object::Verbatim(v) => Event::Verbatim(v), + b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => self.next_inline(&text[1..]), + _ => self.next_inline(text), } } - fn next_list_item(&mut self, ident: usize, end: usize) -> Event<'a> { - let (bullet, off, cont_end, end, has_more) = list::parse(&self.text[self.off..end], ident); - self.stack - .push((Container::ListItem, cont_end + self.off, end + self.off)); + fn next_inline(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { + match text.as_bytes()[0] { + b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)), + b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)), + b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)), + b'_' => emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end)), + b'=' => emphasis::parse(text, b'=') + .map(|end| (Event::Verbatim(&text[1..end]), end + 1, 0, 0)), + b'~' => { + emphasis::parse(text, b'~').map(|end| (Event::Code(&text[1..end]), end + 1, 0, 0)) + } + b's' if text.starts_with("src_") => { + inline_src::parse(text).map(|(lang, option, body, off)| { + (Event::InlineSrc { lang, option, body }, off, 0, 0) + }) + } + b'c' if text.starts_with("call_") => { + inline_call::parse(text).map(|(name, args, inside_header, end_header, off)| { + ( + Event::InlineCall { + name, + args, + inside_header, + end_header, + }, + off, + 0, + 0, + ) + }) + } + _ => None, + } + } + + fn next_list_item(&mut self, ident: usize, text: &'a str) -> Event<'a> { + let (bullet, off, limit, end, has_more) = list::parse(text, ident); + self.push_stack(Container::ListItem, limit, end); self.off += off; self.list_more_item = has_more; Event::ListItemBeg { bullet } } + #[inline] + fn push_stack(&mut self, container: Container, limit: usize, end: usize) { + self.stack + .push((container, self.off + limit, self.off + end)); + } + #[inline] fn end(&mut self) -> Event<'a> { let (container, _, _) = self.stack.pop().unwrap(); @@ -390,53 +510,46 @@ impl<'a> Iterator for Parser<'a> { type Item = Event<'a>; fn next(&mut self) -> Option> { - self.stack - .last() - .cloned() - .map(|(container, cont_end, end)| { - if self.off >= cont_end { - debug_assert!(self.off <= cont_end); - debug_assert!(self.off <= end); - self.off = end; - self.end() - } else { - match container { - Container::Headline(beg) => { - debug_assert!(self.off >= beg); - if self.off == beg { - self.next_section_or_headline() - } else { - self.next_headline() - } + if let Some(&(container, limit, end)) = self.stack.last() { + Some(if self.off >= limit { + debug_assert!(self.off <= limit && self.off <= end); + self.off = end; + self.end() + } else { + match container { + Container::Headline(beg) => { + debug_assert!(self.off >= beg); + if self.off == beg { + self.next_section_or_headline() + } else { + self.next_headline() } - Container::DynBlock - | Container::CtrBlock - | Container::QteBlock - | Container::SplBlock - | Container::ListItem - | Container::Section => self.next_ele(end), - Container::List(ident, _) => { - if self.list_more_item { - self.next_list_item(ident, end) - } else { - self.end() - } - } - Container::Paragraph - | Container::Bold - | Container::Underline - | Container::Italic - | Container::Strike => self.next_obj(cont_end), } + Container::DynBlock + | Container::CtrBlock + | Container::QteBlock + | Container::SplBlock + | Container::ListItem + | Container::Section => self.next_ele(&self.text[self.off..limit]), + Container::List(ident, _) => { + if self.list_more_item { + self.next_list_item(ident, &self.text[self.off..limit]) + } else { + self.end() + } + } + Container::Paragraph + | Container::Bold + | Container::Underline + | Container::Italic + | Container::Strike => self.next_obj(&self.text[self.off..limit]), } }) - .or_else(|| { - if self.off >= self.text.len() { - None - } else { - Some(self.next_section_or_headline()) - } - }) + } else if self.off < self.text.len() { + Some(self.next_section_or_headline()) + } else { + None + } } } @@ -454,6 +567,7 @@ fn parse() { }), SectionBeg, ParagraphBeg, + Text("test "), BoldBeg, Text("Section 1"), BoldEnd, @@ -507,14 +621,10 @@ fn parse() { assert_eq!( Parser::new( - r#"* Title 1 -*Section 1* -** Title 2 -_Section 2_ -* Title 3 -/Section 3/ -* Title 4 -=Section 4="# + r#"#+OPTIONS: H:3 num:nil toc:t \n:nil ::t |:t ^:t -:t f:t *:t tex:t d:(HIDE) tags:not-in-toc + +* Definitions +"# ) .collect::>(), expected diff --git a/src/tools.rs b/src/tools.rs index 71eed53..d9f1282 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -1,5 +1,5 @@ use crate::elements::{fn_def, keyword, Key}; -use crate::headline::Headline; +use crate::headline::{Headline, DEFAULT_KEYWORDS}; use memchr::memchr; type Headlines<'a> = Vec>; @@ -15,7 +15,7 @@ pub fn metadata(src: &str) -> (Headlines<'_>, Keywords<'_>, Footnotes<'_>) { if line.starts_with('*') { let level = memchr(b' ', line.as_bytes()).unwrap_or_else(|| line.len()); if line.as_bytes()[0..level].iter().all(|&c| c == b'*') { - headlines.push(Headline::parse(line).0) + headlines.push(Headline::parse(line, DEFAULT_KEYWORDS).0) } } else if line.starts_with("#+") { if let Some((key, value, _)) = keyword::parse(line) { diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index 4b21a45..0000000 --- a/src/utils.rs +++ /dev/null @@ -1,29 +0,0 @@ -//! Utils macros - -#[macro_export] -macro_rules! expect { - ($src:ident, $index:expr, $expect:tt) => { - $src.as_bytes().get($index).filter(|&&b| b == $expect) - }; - ($src:ident, $index:expr, $expect:expr) => { - $src.as_bytes().get($index).filter(|&&b| $expect(b)) - }; -} - -#[macro_export] -macro_rules! skip_space { - ($src:ident) => { - $src.as_bytes() - .iter() - .position(|c| c != b' ' && c != b'\t') - .unwrap_or(0) - }; - ($src:ident, $from:expr) => { - $src[$from..] - .as_bytes() - .iter() - .position(|&c| c != b' ' && c != b'\t') - .map(|i| i + $from) - .unwrap_or(0) - }; -}