diff --git a/Cargo.toml b/Cargo.toml index ca43f63..7908ad0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,11 +12,18 @@ keywords = ["orgmode","emacs","parser"] [badges] travis-ci = { repository = "PoiScript/orgize" } +[features] +default = ["serde", "chrono"] +extra-serde-info = [] + [dependencies] bytecount = "0.5" chrono = { version = "0.4", optional = true } +indextree = "3.2.0" jetscii = "0.4" memchr = "2" +serde = { version = "1.0.93", features = ["derive"], optional = true } +serde_json = "1.0.39" [dev-dependencies] slugify = "0.1.0" diff --git a/benches/parse.rs b/benches/parse.rs index 469f7a3..db832b1 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -3,14 +3,14 @@ extern crate orgize; extern crate test; -use orgize::Parser; +use orgize::Org; use test::Bencher; #[bench] fn org_syntax(b: &mut Bencher) { // wget https://orgmode.org/worg/sources/dev/org-syntax.org b.iter(|| { - let _ = Parser::new(include_str!("org-syntax.org")).collect::>(); + Org::new(include_str!("org-syntax.org")).parse(); }) } @@ -18,7 +18,7 @@ fn org_syntax(b: &mut Bencher) { fn doc(b: &mut Bencher) { // wget https://orgmode.org/worg/sources/doc.org b.iter(|| { - let _ = Parser::new(include_str!("doc.org")).collect::>(); + Org::new(include_str!("doc.org")).parse(); }) } @@ -26,6 +26,6 @@ fn doc(b: &mut Bencher) { fn org_faq(b: &mut Bencher) { // wget https://orgmode.org/worg/sources/org-faq.org b.iter(|| { - let _ = Parser::new(include_str!("org-faq.org")).collect::>(); + Org::new(include_str!("org-faq.org")).parse(); }) } diff --git a/src/elements/block.rs b/src/elements/block.rs index f5f8ed6..e739cc6 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -1,69 +1,77 @@ use memchr::{memchr, memchr_iter}; -// return (name, args, contents-begin, contents-end, end) -#[inline] -pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { - debug_assert!(text.starts_with("#+")); +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub struct Block<'a> { + pub name: &'a str, + pub args: Option<&'a str>, +} - if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" { - return None; - } +impl Block<'_> { + #[inline] + // return (block, contents-begin, contents-end, end) + pub fn parse(text: &str) -> Option<(Block<'_>, usize, usize, usize)> { + debug_assert!(text.starts_with("#+")); - let mut lines = memchr_iter(b'\n', text.as_bytes()); - - let (name, para, off) = lines - .next() - .map(|i| { - memchr(b' ', &text.as_bytes()[8..i]) - .map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1)) - .unwrap_or((&text[8..i], None, i + 1)) - }) - .filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?; - - let mut pos = off; - let end = format!(r"#+END_{}", name.to_uppercase()); - - for i in lines { - if text[pos..i].trim().eq_ignore_ascii_case(&end) { - return Some((name, para, off, pos, i + 1)); + if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" { + return None; } - pos = i + 1; - } + let mut lines = memchr_iter(b'\n', text.as_bytes()); - if text[pos..].trim().eq_ignore_ascii_case(&end) { - Some((name, para, off, pos, text.len())) - } else { - None + let (name, args, off) = lines + .next() + .map(|i| { + memchr(b' ', &text.as_bytes()[8..i]) + .map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1)) + .unwrap_or((&text[8..i], None, i + 1)) + }) + .filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?; + + let mut pos = off; + let end = format!(r"#+END_{}", name.to_uppercase()); + + for i in lines { + if text[pos..i].trim().eq_ignore_ascii_case(&end) { + return Some((Block { name, args }, off, pos, i + 1)); + } + + pos = i + 1; + } + + if text[pos..].trim().eq_ignore_ascii_case(&end) { + Some((Block { name, args }, off, pos, text.len())) + } else { + None + } } } -#[cfg(test)] -mod tests { - #[test] - fn parse() { - use super::parse; - - assert_eq!( - parse("#+BEGIN_SRC\n#+END_SRC"), - Some(( - "SRC", - None, - "#+BEGIN_SRC\n".len(), - "#+BEGIN_SRC\n".len(), - "#+BEGIN_SRC\n#+END_SRC".len() - )) - ); - assert_eq!( - parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"), - Some(( - "SRC", - Some("javascript"), - "#+BEGIN_SRC javascript \n".len(), - "#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n".len(), - "#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n".len() - )) - ); - // TODO: more testing - } +#[test] +fn parse() { + assert_eq!( + Block::parse("#+BEGIN_SRC\n#+END_SRC"), + Some(( + Block { + name: "SRC", + args: None, + }, + "#+BEGIN_SRC\n".len(), + "#+BEGIN_SRC\n".len(), + "#+BEGIN_SRC\n#+END_SRC".len() + )) + ); + assert_eq!( + Block::parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"), + Some(( + Block { + name: "SRC", + args: Some("javascript"), + }, + "#+BEGIN_SRC javascript \n".len(), + "#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n".len(), + "#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n".len() + )) + ); + // TODO: more testing } diff --git a/src/elements/clock.rs b/src/elements/clock.rs index 69a1155..91c7091 100644 --- a/src/elements/clock.rs +++ b/src/elements/clock.rs @@ -1,4 +1,4 @@ -use crate::objects::timestamp::{Datetime, Timestamp}; +use crate::elements::{Datetime, Timestamp}; use memchr::memchr; /// clock elements @@ -23,8 +23,8 @@ pub enum Clock<'a> { }, } -impl<'a> Clock<'a> { - pub(crate) fn parse(text: &'a str) -> Option<(Clock<'a>, usize)> { +impl Clock<'_> { + pub(crate) fn parse(text: &str) -> Option<(Clock<'_>, usize)> { let (text, eol) = memchr(b'\n', text.as_bytes()) .map(|i| (text[..i].trim(), i + 1)) .unwrap_or_else(|| (text.trim(), text.len())); @@ -104,7 +104,7 @@ impl<'a> Clock<'a> { } /// returns `Some` if the clock is closed, `None` if running - pub fn duration(&self) -> Option<&'a str> { + pub fn duration(&self) -> Option<&str> { match self { Clock::Closed { duration, .. } => Some(duration), Clock::Running { .. } => None, diff --git a/src/objects/cookie.rs b/src/elements/cookie.rs similarity index 97% rename from src/objects/cookie.rs rename to src/elements/cookie.rs index 4c2fafb..94ffb15 100644 --- a/src/objects/cookie.rs +++ b/src/elements/cookie.rs @@ -7,8 +7,9 @@ pub enum Cookie<'a> { Slash(&'a str, &'a str), } -impl<'a> Cookie<'a> { +impl Cookie<'_> { #[inline] + // return (clock, offset) pub(crate) fn parse(src: &str) -> Option<(Cookie<'_>, usize)> { debug_assert!(src.starts_with('[')); diff --git a/src/elements/drawer.rs b/src/elements/drawer.rs index 367d7a5..afc0d60 100644 --- a/src/elements/drawer.rs +++ b/src/elements/drawer.rs @@ -1,51 +1,69 @@ use memchr::memchr_iter; -// return (name, offset, limit, end) -pub(crate) fn parse(text: &str) -> Option<(&str, usize, usize, usize)> { - debug_assert!(text.starts_with(':')); +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub struct Drawer<'a> { + pub name: &'a str, +} - let mut lines = memchr_iter(b'\n', text.as_bytes()); +impl<'a> Drawer<'a> { + #[inline] + // return (drawer, contents-begin, contents-end , end) + pub(crate) fn parse(text: &'a str) -> Option<(Drawer<'a>, usize, usize, usize)> { + debug_assert!(text.starts_with(':')); - let (name, off) = lines - .next() - .map(|i| (text[1..i].trim_end(), i + 1)) - .filter(|(name, _)| { - name.ends_with(':') - && name[0..name.len() - 1] - .as_bytes() - .iter() - .all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_') - })?; + let mut lines = memchr_iter(b'\n', text.as_bytes()); - let mut pos = off; - for i in lines { - if text[pos..i].trim().eq_ignore_ascii_case(":END:") { - return Some((&name[0..name.len() - 1], off, pos, i + 1)); + let (name, off) = lines + .next() + .map(|i| (text[1..i].trim_end(), i + 1)) + .filter(|(name, _)| { + name.ends_with(':') + && name[0..name.len() - 1] + .as_bytes() + .iter() + .all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_') + })?; + + let mut pos = off; + for i in lines { + if text[pos..i].trim().eq_ignore_ascii_case(":END:") { + return Some(( + Drawer { + name: &name[0..name.len() - 1], + }, + off, + pos, + i + 1, + )); + } + pos = i + 1; } - pos = i + 1; - } - if text[pos..].trim().eq_ignore_ascii_case(":END:") { - Some((&name[0..name.len() - 1], off, pos, text.len())) - } else { - None - } -} - -#[cfg(test)] -mod tests { - #[test] - fn parse() { - use super::parse; - - assert_eq!( - parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"), + if text[pos..].trim().eq_ignore_ascii_case(":END:") { Some(( - "PROPERTIES", - ":PROPERTIES:\n".len(), - ":PROPERTIES:\n :CUSTOM_ID: id\n".len(), - ":PROPERTIES:\n :CUSTOM_ID: id\n :END:".len() + Drawer { + name: &name[0..name.len() - 1], + }, + off, + pos, + text.len(), )) - ) + } else { + None + } } } + +#[test] +fn parse() { + assert_eq!( + Drawer::parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"), + Some(( + Drawer { name: "PROPERTIES" }, + ":PROPERTIES:\n".len(), + ":PROPERTIES:\n :CUSTOM_ID: id\n".len(), + ":PROPERTIES:\n :CUSTOM_ID: id\n :END:".len() + )) + ) +} diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index 839b35f..bf4a915 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -1,65 +1,87 @@ use memchr::{memchr, memchr_iter}; -// return (name, parameters, offset, limit, end) -#[inline] -pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { - debug_assert!(text.starts_with("#+")); +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub struct DynBlock<'a> { + pub block_name: &'a str, + pub arguments: Option<&'a str>, +} - if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") { - return None; - } +impl DynBlock<'_> { + #[inline] + // return (dyn_block, contents-begin, contents-end, end) + pub(crate) fn parse(text: &str) -> Option<(DynBlock<'_>, usize, usize, usize)> { + debug_assert!(text.starts_with("#+")); - let bytes = text.as_bytes(); - let mut lines = memchr_iter(b'\n', bytes); - - let (name, para, off) = lines - .next() - .map(|i| { - memchr(b' ', &bytes["#+BEGIN: ".len()..i]) - .map(|x| { - ( - &text["#+BEGIN: ".len().."#+BEGIN: ".len() + x], - Some(text["#+BEGIN: ".len() + x..i].trim()), - i + 1, - ) - }) - .unwrap_or((&text["#+BEGIN: ".len()..i], None, i + 1)) - }) - .filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?; - - let mut pos = off; - - for i in lines { - if text[pos..i].trim().eq_ignore_ascii_case("#+END:") { - return Some((name, para, off, pos, i + 1)); + if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") { + return None; } - pos = i + 1; - } + let bytes = text.as_bytes(); + let mut lines = memchr_iter(b'\n', bytes); - if text[pos..].trim().eq_ignore_ascii_case("#+END:") { - Some((name, para, off, pos, text.len())) - } else { - None - } -} + let (name, para, off) = lines + .next() + .map(|i| { + memchr(b' ', &bytes["#+BEGIN: ".len()..i]) + .map(|x| { + ( + &text["#+BEGIN: ".len().."#+BEGIN: ".len() + x], + Some(text["#+BEGIN: ".len() + x..i].trim()), + i + 1, + ) + }) + .unwrap_or((&text["#+BEGIN: ".len()..i], None, i + 1)) + }) + .filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?; -#[cfg(test)] -mod tests { - #[test] - fn parse() { - use super::parse; + let mut pos = off; - // TODO: testing - assert_eq!( - parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"), + for i in lines { + if text[pos..i].trim().eq_ignore_ascii_case("#+END:") { + return Some(( + DynBlock { + block_name: name, + arguments: para, + }, + off, + pos, + i + 1, + )); + } + + pos = i + 1; + } + + if text[pos..].trim().eq_ignore_ascii_case("#+END:") { Some(( - "clocktable", - Some(":scope file"), - "#+BEGIN: clocktable :scope file\n".len(), - "#+BEGIN: clocktable :scope file\nCONTENTS\n".len(), - "#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n".len(), + DynBlock { + block_name: name, + arguments: para, + }, + off, + pos, + text.len(), )) - ); + } else { + None + } } } + +#[test] +fn parse() { + // TODO: testing + assert_eq!( + DynBlock::parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"), + Some(( + DynBlock { + block_name: "clocktable", + arguments: Some(":scope file"), + }, + "#+BEGIN: clocktable :scope file\n".len(), + "#+BEGIN: clocktable :scope file\nCONTENTS\n".len(), + "#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n".len(), + )) + ); +} diff --git a/src/objects/emphasis.rs b/src/elements/emphasis.rs similarity index 100% rename from src/objects/emphasis.rs rename to src/elements/emphasis.rs diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs index 0cfc603..11b558c 100644 --- a/src/elements/fn_def.rs +++ b/src/elements/fn_def.rs @@ -1,60 +1,68 @@ use memchr::memchr; -#[inline] -pub fn parse(text: &str) -> Option<(&str, &str, usize)> { - if text.starts_with("[fn:") { - let (label, off) = memchr(b']', text.as_bytes()) - .filter(|&i| { - i != 4 - && text.as_bytes()["[fn:".len()..i] - .iter() - .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') - }) - .map(|i| (&text["[fn:".len()..i], i + 1))?; +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub struct FnDef<'a> { + pub label: &'a str, +} - let (content, off) = memchr(b'\n', text.as_bytes()) - .map(|i| (&text[off..i], i)) - .unwrap_or_else(|| (&text[off..], text.len())); +impl FnDef<'_> { + #[inline] + pub fn parse(text: &str) -> Option<(FnDef<'_>, usize, usize)> { + if text.starts_with("[fn:") { + let (label, off) = memchr(b']', text.as_bytes()) + .filter(|&i| { + i != 4 + && text.as_bytes()["[fn:".len()..i] + .iter() + .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') + }) + .map(|i| (&text["[fn:".len()..i], i + 1))?; - Some((label, content, off)) - } else { - None + let end = memchr(b'\n', text.as_bytes()).unwrap_or_else(|| text.len()); + + Some((FnDef { label }, off, end)) + } else { + None + } } } -#[cfg(test)] -mod tests { - #[test] - fn parse() { - use super::parse; - - assert_eq!( - parse("[fn:1] https://orgmode.org"), - Some(( - "1", - " https://orgmode.org", - "[fn:1] https://orgmode.org".len() - )) - ); - assert_eq!( - parse("[fn:word_1] https://orgmode.org"), - Some(( - "word_1", - " https://orgmode.org", - "[fn:word_1] https://orgmode.org".len() - )) - ); - assert_eq!( - parse("[fn:WORD-1] https://orgmode.org"), - Some(( - "WORD-1", - " https://orgmode.org", - "[fn:WORD-1] https://orgmode.org".len() - )) - ); - assert_eq!(parse("[fn:WORD]"), Some(("WORD", "", "[fn:WORD]".len()))); - assert_eq!(parse("[fn:] https://orgmode.org"), None); - assert_eq!(parse("[fn:wor d] https://orgmode.org"), None); - assert_eq!(parse("[fn:WORD https://orgmode.org"), None); - } +#[test] +fn parse() { + assert_eq!( + FnDef::parse("[fn:1] https://orgmode.org"), + Some(( + FnDef { label: "1" }, + "[fn:1]".len(), + "[fn:1] https://orgmode.org".len() + )) + ); + assert_eq!( + FnDef::parse("[fn:word_1] https://orgmode.org"), + Some(( + FnDef { label: "word_1" }, + "[fn:word_1]".len(), + "[fn:word_1] https://orgmode.org".len() + )) + ); + assert_eq!( + FnDef::parse("[fn:WORD-1] https://orgmode.org"), + Some(( + FnDef { label: "WORD-1" }, + "[fn:WORD-1]".len(), + "[fn:WORD-1] https://orgmode.org".len() + )) + ); + assert_eq!( + FnDef::parse("[fn:WORD]"), + Some(( + FnDef { label: "WORD" }, + "[fn:WORD]".len(), + "[fn:WORD]".len() + )) + ); + assert_eq!(FnDef::parse("[fn:] https://orgmode.org"), None); + assert_eq!(FnDef::parse("[fn:wor d] https://orgmode.org"), None); + assert_eq!(FnDef::parse("[fn:WORD https://orgmode.org"), None); } diff --git a/src/objects/fn_ref.rs b/src/elements/fn_ref.rs similarity index 98% rename from src/objects/fn_ref.rs rename to src/elements/fn_ref.rs index 0ef302f..0df41e1 100644 --- a/src/objects/fn_ref.rs +++ b/src/elements/fn_ref.rs @@ -7,8 +7,9 @@ pub struct FnRef<'a> { pub definition: Option<&'a str>, } -impl<'a> FnRef<'a> { +impl FnRef<'_> { #[inline] + // return (fn_ref, offset) pub fn parse(text: &str) -> Option<(FnRef<'_>, usize)> { debug_assert!(text.starts_with("[fn:")); diff --git a/src/objects/fragment.rs b/src/elements/fragment.rs similarity index 100% rename from src/objects/fragment.rs rename to src/elements/fragment.rs diff --git a/src/headline.rs b/src/elements/headline.rs similarity index 98% rename from src/headline.rs rename to src/elements/headline.rs index ef5ea49..c48f54f 100644 --- a/src/headline.rs +++ b/src/elements/headline.rs @@ -21,8 +21,8 @@ pub struct Headline<'a> { pub keyword: Option<&'a str>, } -impl<'a> Headline<'a> { - pub(crate) fn parse(text: &'a str, keywords: &'a [&'a str]) -> (Headline<'a>, usize, usize) { +impl Headline<'_> { + pub(crate) fn parse<'a>(text: &'a str, keywords: &[&str]) -> (Headline<'a>, usize, usize) { let level = memchr2(b'\n', b' ', text.as_bytes()).unwrap_or_else(|| text.len()); debug_assert!(level > 0); diff --git a/src/objects/inline_call.rs b/src/elements/inline_call.rs similarity index 100% rename from src/objects/inline_call.rs rename to src/elements/inline_call.rs diff --git a/src/objects/inline_src.rs b/src/elements/inline_src.rs similarity index 100% rename from src/objects/inline_src.rs rename to src/elements/inline_src.rs diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs index 953032b..153043a 100644 --- a/src/elements/keyword.rs +++ b/src/elements/keyword.rs @@ -1,59 +1,16 @@ use memchr::{memchr, memchr2}; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub enum Key<'a> { - // Affiliated Keywords - // Only "CAPTION" and "RESULTS" keywords can have an optional value. - Caption { option: Option<&'a str> }, - Header, - Name, - Plot, - Results { option: Option<&'a str> }, - Attr { backend: &'a str }, - - // Keywords - Author, - Date, - Title, - Custom(&'a str), -} - #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct Keyword<'a> { - pub key: Key<'a>, + pub key: &'a str, + pub option: Option<&'a str>, pub value: &'a str, } -impl<'a> Keyword<'a> { - #[inline] - pub(crate) fn new(key: &'a str, option: Option<&'a str>, value: &'a str) -> Keyword<'a> { - Keyword { - key: match &*key.to_uppercase() { - "AUTHOR" => Key::Author, - "DATE" => Key::Date, - "HEADER" => Key::Header, - "NAME" => Key::Name, - "PLOT" => Key::Plot, - "TITLE" => Key::Title, - "RESULTS" => Key::Results { option }, - "CAPTION" => Key::Caption { option }, - k => { - if k.starts_with("ATTR_") { - Key::Attr { - backend: &key["ATTR_".len()..], - } - } else { - Key::Custom(key) - } - } - }, - value, - } - } - +impl Keyword<'_> { #[inline] + // return (key, option, value, offset) pub(crate) fn parse(text: &str) -> Option<(&str, Option<&str>, &str, usize)> { debug_assert!(text.starts_with("#+")); @@ -79,11 +36,11 @@ impl<'a> Keyword<'a> { (None, off) }; - let (value, off) = memchr(b'\n', bytes) - .map(|i| (&text[off..i], i + 1)) - .unwrap_or_else(|| (&text[off..], text.len())); + let end = memchr(b'\n', bytes) + .map(|i| i + 1) + .unwrap_or_else(|| text.len()); - Some((key, option, value.trim(), off)) + Some((key, option, &text[off..end].trim(), end)) } } diff --git a/src/objects/link.rs b/src/elements/link.rs similarity index 97% rename from src/objects/link.rs rename to src/elements/link.rs index 06045e6..8f89d81 100644 --- a/src/objects/link.rs +++ b/src/elements/link.rs @@ -8,8 +8,9 @@ pub struct Link<'a> { pub desc: Option<&'a str>, } -impl<'a> Link<'a> { +impl Link<'_> { #[inline] + // return (link, offset) pub(crate) fn parse(text: &str) -> Option<(Link<'_>, usize)> { debug_assert!(text.starts_with("[[")); diff --git a/src/elements/list.rs b/src/elements/list.rs index 10676d2..f3c8c20 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -1,54 +1,104 @@ use memchr::memchr_iter; use std::iter::once; -// (indentation, ordered, limit, end) -#[inline] -pub fn parse(text: &str) -> Option<(usize, bool, usize, usize)> { - let (indent, tail) = text - .find(|c| c != ' ') - .map(|off| (off, &text[off..])) - .unwrap_or((0, text)); +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub struct List { + pub indent: usize, + pub ordered: bool, +} - let ordered = is_item(tail)?; - let bytes = text.as_bytes(); - let mut lines = memchr_iter(b'\n', bytes) - .map(|i| i + 1) - .chain(once(text.len())); - let mut pos = lines.next()?; +impl List { + #[inline] + // return (list, begin, end) + pub(crate) fn parse(text: &str) -> Option<(List, usize, usize)> { + let (indent, tail) = text + .find(|c| c != ' ') + .map(|off| (off, &text[off..])) + .unwrap_or((0, text)); - while let Some(i) = lines.next() { - let line = &text[pos..i]; - return if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) { - // this line is no empty - if line_indent < indent - || (line_indent == indent && is_item(&line[line_indent..]).is_none()) - { - Some((indent, ordered, pos, pos)) - } else { - pos = i; - continue; - } - } else if let Some(next_i) = lines.next() { - // this line is empty - let line = &text[i..next_i]; - if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) { + let ordered = is_item(tail)?; + let bytes = text.as_bytes(); + let mut lines = memchr_iter(b'\n', bytes) + .map(|i| i + 1) + .chain(once(text.len())); + let mut pos = lines.next()?; + + while let Some(i) = lines.next() { + let line = &text[pos..i]; + return if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) { + // this line is no empty if line_indent < indent || (line_indent == indent && is_item(&line[line_indent..]).is_none()) { - Some((indent, ordered, pos, pos)) + Some((List { indent, ordered }, pos, pos)) } else { - pos = next_i; + pos = i; continue; } + } else if let Some(next_i) = lines.next() { + // this line is empty + let line = &text[i..next_i]; + if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) { + if line_indent < indent + || (line_indent == indent && is_item(&line[line_indent..]).is_none()) + { + Some((List { indent, ordered }, pos, pos)) + } else { + pos = next_i; + continue; + } + } else { + Some((List { indent, ordered }, pos, next_i)) + } } else { - Some((indent, ordered, pos, next_i)) - } - } else { - Some((indent, ordered, pos, i)) - }; - } + Some((List { indent, ordered }, pos, i)) + }; + } - Some((indent, ordered, pos, pos)) + Some((List { indent, ordered }, pos, pos)) + } +} + +pub struct ListItem<'a> { + pub bullet: &'a str, +} + +impl ListItem<'_> { + pub fn parse(text: &str, indent: usize) -> (ListItem<'_>, usize, usize) { + debug_assert!(&text[0..indent].trim().is_empty()); + let off = &text[indent..].find(' ').unwrap() + 1 + indent; + + let bytes = text.as_bytes(); + let mut lines = memchr_iter(b'\n', bytes) + .map(|i| i + 1) + .chain(once(text.len())); + let mut pos = lines.next().unwrap(); + + for i in lines { + let line = &text[pos..i]; + if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) { + if line_indent == indent { + return ( + ListItem { + bullet: &text[indent..off], + }, + off, + pos, + ); + } + } + pos = i; + } + + ( + ListItem { + bullet: &text[indent..off], + }, + off, + text.len(), + ) + } } #[inline] @@ -97,60 +147,91 @@ fn test_is_item() { } #[test] -fn test_parse() { +fn list_parse() { assert_eq!( - parse("+ item1\n+ item2"), - Some((0, false, "+ item1\n+ item2".len(), "+ item1\n+ item2".len())) + List::parse("+ item1\n+ item2"), + Some(( + List { + indent: 0, + ordered: false, + }, + "+ item1\n+ item2".len(), + "+ item1\n+ item2".len() + )) ); assert_eq!( - parse("* item1\n \n* item2"), + List::parse("* item1\n \n* item2"), Some(( - 0, - false, + List { + indent: 0, + ordered: false + }, "* item1\n \n* item2".len(), "* item1\n \n* item2".len() )) ); assert_eq!( - parse("* item1\n \n \n* item2"), - Some((0, false, "* item1\n".len(), "* item1\n \n \n".len())) - ); - assert_eq!( - parse("* item1\n \n "), - Some((0, false, "+ item1\n".len(), "* item1\n \n ".len())) - ); - assert_eq!( - parse("+ item1\n + item2\n "), + List::parse("* item1\n \n \n* item2"), Some(( - 0, - false, + List { + indent: 0, + ordered: false, + }, + "* item1\n".len(), + "* item1\n \n \n".len() + )) + ); + assert_eq!( + List::parse("* item1\n \n "), + Some(( + List { + indent: 0, + ordered: false, + }, + "+ item1\n".len(), + "* item1\n \n ".len() + )) + ); + assert_eq!( + List::parse("+ item1\n + item2\n "), + Some(( + List { + indent: 0, + ordered: false, + }, "+ item1\n + item2\n".len(), "+ item1\n + item2\n ".len() )) ); assert_eq!( - parse("+ item1\n \n + item2\n \n+ item 3"), + List::parse("+ item1\n \n + item2\n \n+ item 3"), Some(( - 0, - false, + List { + indent: 0, + ordered: false, + }, "+ item1\n \n + item2\n \n+ item 3".len(), "+ item1\n \n + item2\n \n+ item 3".len() )) ); assert_eq!( - parse(" + item1\n \n + item2"), + List::parse(" + item1\n \n + item2"), Some(( - 2, - false, + List { + indent: 2, + ordered: false, + }, " + item1\n \n + item2".len(), " + item1\n \n + item2".len() )) ); assert_eq!( - parse("+ 1\n\n - 2\n\n - 3\n\n+ 4"), + List::parse("+ 1\n\n - 2\n\n - 3\n\n+ 4"), Some(( - 0, - false, + List { + indent: 0, + ordered: false, + }, "+ 1\n\n - 2\n\n - 3\n\n+ 4".len(), "+ 1\n\n - 2\n\n - 3\n\n+ 4".len() )) diff --git a/src/objects/macros.rs b/src/elements/macros.rs similarity index 95% rename from src/objects/macros.rs rename to src/elements/macros.rs index 4f779d6..94f5b3b 100644 --- a/src/objects/macros.rs +++ b/src/elements/macros.rs @@ -8,9 +8,9 @@ pub struct Macros<'a> { pub arguments: Option<&'a str>, } -impl<'a> Macros<'a> { +impl Macros<'_> { #[inline] - pub fn parse(text: &str) -> Option<(Macros<'_>, usize)> { + pub(crate) fn parse(text: &str) -> Option<(Macros<'_>, usize)> { debug_assert!(text.starts_with("{{{")); let bytes = text.as_bytes(); diff --git a/src/elements/mod.rs b/src/elements/mod.rs index d91a158..a366e97 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -1,16 +1,224 @@ -/// elements -/// -/// elements means some syntactical parts that have the same level with paragraph. -pub(crate) mod block; -pub(crate) mod clock; -pub(crate) mod drawer; -pub(crate) mod dyn_block; -pub(crate) mod fn_def; -pub(crate) mod keyword; -pub(crate) mod list; -pub(crate) mod planning; -pub(crate) mod rule; +mod block; +mod clock; +mod cookie; +mod drawer; +mod dyn_block; +mod fn_def; +mod fn_ref; +mod fragment; +mod headline; +mod inline_call; +mod inline_src; +mod keyword; +mod link; +mod list; +mod macros; +mod planning; +mod radio_target; +mod rule; +mod snippet; +mod target; +mod timestamp; -pub use self::clock::Clock; -pub use self::keyword::{Key, Keyword}; -pub use self::planning::Planning; +pub mod emphasis; + +pub use self::{ + block::Block, + clock::Clock, + cookie::Cookie, + drawer::Drawer, + dyn_block::DynBlock, + fn_def::FnDef, + fn_ref::FnRef, + headline::Headline, + inline_call::InlineCall, + inline_src::InlineSrc, + keyword::Keyword, + link::Link, + list::{List, ListItem}, + macros::Macros, + planning::Planning, + radio_target::RadioTarget, + rule::Rule, + snippet::Snippet, + target::Target, + timestamp::*, +}; + +#[derive(Debug)] +pub enum Element<'a> { + Block { + block: Block<'a>, + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + BabelCall { + value: &'a str, + begin: usize, + end: usize, + }, + Section { + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + Clock { + clock: Clock<'a>, + begin: usize, + end: usize, + }, + Cookie { + cookie: Cookie<'a>, + begin: usize, + end: usize, + }, + RadioTarget { + radio_target: RadioTarget<'a>, + begin: usize, + end: usize, + }, + Drawer { + drawer: Drawer<'a>, + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + Document { + begin: usize, + end: usize, + }, + DynBlock { + dyn_block: DynBlock<'a>, + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + FnDef { + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + fn_def: FnDef<'a>, + }, + FnRef { + fn_ref: FnRef<'a>, + begin: usize, + end: usize, + }, + Headline { + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + headline: Headline<'a>, + }, + InlineCall { + inline_call: InlineCall<'a>, + begin: usize, + end: usize, + }, + InlineSrc { + inline_src: InlineSrc<'a>, + begin: usize, + end: usize, + }, + Keyword { + keyword: Keyword<'a>, + begin: usize, + end: usize, + }, + Link { + link: Link<'a>, + begin: usize, + end: usize, + }, + List { + list: List, + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + ListItem { + list_item: ListItem<'a>, + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + Macros { + macros: Macros<'a>, + begin: usize, + end: usize, + }, + Planning(Planning<'a>), + Snippet { + begin: usize, + end: usize, + snippet: Snippet<'a>, + }, + Text { + value: &'a str, + begin: usize, + end: usize, + }, + Paragraph { + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + Rule { + begin: usize, + end: usize, + }, + Timestamp { + begin: usize, + end: usize, + timestamp: Timestamp<'a>, + }, + Target { + target: Target<'a>, + begin: usize, + end: usize, + }, + Bold { + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + Strike { + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + Italic { + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + Underline { + begin: usize, + end: usize, + contents_begin: usize, + contents_end: usize, + }, + Verbatim { + begin: usize, + end: usize, + value: &'a str, + }, + Code { + begin: usize, + end: usize, + value: &'a str, + }, +} diff --git a/src/elements/planning.rs b/src/elements/planning.rs index 81e7d96..33fa879 100644 --- a/src/elements/planning.rs +++ b/src/elements/planning.rs @@ -1,4 +1,4 @@ -use crate::objects::Timestamp; +use crate::elements::Timestamp; use memchr::memchr; /// palnning elements @@ -13,8 +13,9 @@ pub struct Planning<'a> { pub closed: Option>, } -impl<'a> Planning<'a> { - pub(crate) fn parse(text: &'a str) -> Option<(Planning<'a>, usize)> { +impl Planning<'_> { + #[inline] + pub(crate) fn parse(text: &str) -> Option<(Planning<'_>, usize)> { let (mut deadline, mut scheduled, mut closed) = (None, None, None); let (mut tail, off) = memchr(b'\n', text.as_bytes()) .map(|i| (text[..i].trim(), i + 1)) @@ -60,7 +61,7 @@ impl<'a> Planning<'a> { #[test] fn prase() { - use crate::objects::Datetime; + use crate::elements::Datetime; assert_eq!( Planning::parse("SCHEDULED: <2019-04-08 Mon>\n"), diff --git a/src/elements/radio_target.rs b/src/elements/radio_target.rs new file mode 100644 index 0000000..a40df2a --- /dev/null +++ b/src/elements/radio_target.rs @@ -0,0 +1,53 @@ +use jetscii::Substring; + +// TODO: text-markup, entities, latex-fragments, subscript and superscript +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub struct RadioTarget<'a> { + contents: &'a str, +} + +impl RadioTarget<'_> { + #[inline] + // return (radio_target, offset) + pub(crate) fn parse(src: &str) -> Option<(RadioTarget<'_>, usize)> { + debug_assert!(src.starts_with("<<<")); + + let bytes = src.as_bytes(); + let (contents, off) = Substring::new(">>>") + .find(src) + .filter(|&i| { + bytes[3] != b' ' + && bytes[i - 1] != b' ' + && bytes[3..i] + .iter() + .all(|&c| c != b'<' && c != b'\n' && c != b'>') + }) + .map(|i| (&src[3..i], i + ">>>".len()))?; + + Some((RadioTarget { contents }, off)) + } +} + +#[test] +fn parse() { + assert_eq!( + RadioTarget::parse("<<>>"), + Some((RadioTarget { contents: "target" }, "<<>>".len())) + ); + assert_eq!( + RadioTarget::parse("<<>>"), + Some(( + RadioTarget { + contents: "tar get" + }, + "<<>>".len() + )) + ); + assert_eq!(RadioTarget::parse("<<>>"), None); + assert_eq!(RadioTarget::parse("<<< target>>>"), None); + assert_eq!(RadioTarget::parse("<<>>"), None); + assert_eq!(RadioTarget::parse("<<get>>>"), None); + assert_eq!(RadioTarget::parse("<<>>"), None); + assert_eq!(RadioTarget::parse("<<>"), None); +} diff --git a/src/elements/rule.rs b/src/elements/rule.rs index 89e877d..d763b96 100644 --- a/src/elements/rule.rs +++ b/src/elements/rule.rs @@ -1,37 +1,37 @@ -#[inline] -pub fn parse(text: &str) -> usize { - let (text, off) = memchr::memchr(b'\n', text.as_bytes()) - .map(|i| (text[..i].trim(), i + 1)) - .unwrap_or_else(|| (text.trim(), text.len())); +pub struct Rule; - if text.len() >= 5 && text.as_bytes().iter().all(|&c| c == b'-') { - off - } else { - 0 +impl Rule { + #[inline] + // return offset + pub(crate) fn parse(text: &str) -> Option { + let (text, off) = memchr::memchr(b'\n', text.as_bytes()) + .map(|i| (text[..i].trim(), i + 1)) + .unwrap_or_else(|| (text.trim(), text.len())); + + if text.len() >= 5 && text.as_bytes().iter().all(|&c| c == b'-') { + Some(off) + } else { + None + } } } -#[cfg(test)] -mod tests { - #[test] - fn parse() { - use super::parse; - - assert_eq!(parse("-----"), "-----".len()); - assert_eq!(parse("--------"), "--------".len()); - assert_eq!(parse(" -----"), " -----".len()); - assert_eq!(parse("\t\t-----"), "\t\t-----".len()); - assert_eq!(parse("\t\t-----\n"), "\t\t-----\n".len()); - assert_eq!(parse("\t\t----- \n"), "\t\t----- \n".len()); - assert_eq!(parse(""), 0); - assert_eq!(parse("----"), 0); - assert_eq!(parse(" ----"), 0); - assert_eq!(parse(" 0----"), 0); - assert_eq!(parse("0 ----"), 0); - assert_eq!(parse("0------"), 0); - assert_eq!(parse("----0----"), 0); - assert_eq!(parse("\t\t----"), 0); - assert_eq!(parse("------0"), 0); - assert_eq!(parse("----- 0"), 0); - } +#[test] +fn parse() { + assert_eq!(Rule::parse("-----"), Some("-----".len())); + assert_eq!(Rule::parse("--------"), Some("--------".len())); + assert_eq!(Rule::parse(" -----"), Some(" -----".len())); + assert_eq!(Rule::parse("\t\t-----"), Some("\t\t-----".len())); + assert_eq!(Rule::parse("\t\t-----\n"), Some("\t\t-----\n".len())); + assert_eq!(Rule::parse("\t\t----- \n"), Some("\t\t----- \n".len())); + assert_eq!(Rule::parse(""), None); + assert_eq!(Rule::parse("----"), None); + assert_eq!(Rule::parse(" ----"), None); + assert_eq!(Rule::parse(" None----"), None); + assert_eq!(Rule::parse("None ----"), None); + assert_eq!(Rule::parse("None------"), None); + assert_eq!(Rule::parse("----None----"), None); + assert_eq!(Rule::parse("\t\t----"), None); + assert_eq!(Rule::parse("------None"), None); + assert_eq!(Rule::parse("----- None"), None); } diff --git a/src/objects/snippet.rs b/src/elements/snippet.rs similarity index 97% rename from src/objects/snippet.rs rename to src/elements/snippet.rs index 5727d26..ec62b9a 100644 --- a/src/objects/snippet.rs +++ b/src/elements/snippet.rs @@ -8,8 +8,9 @@ pub struct Snippet<'a> { pub value: &'a str, } -impl<'a> Snippet<'a> { +impl Snippet<'_> { #[inline] + // return (snippet offset) pub(crate) fn parse(text: &str) -> Option<(Snippet<'_>, usize)> { debug_assert!(text.starts_with("@@")); diff --git a/src/elements/target.rs b/src/elements/target.rs new file mode 100644 index 0000000..3ba809d --- /dev/null +++ b/src/elements/target.rs @@ -0,0 +1,53 @@ +use jetscii::Substring; + +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] +pub struct Target<'a> { + pub target: &'a str, +} + +impl Target<'_> { + #[inline] + // return (target, offset) + pub(crate) fn parse(text: &str) -> Option<(Target<'_>, usize)> { + debug_assert!(text.starts_with("<<")); + + let bytes = text.as_bytes(); + + Substring::new(">>") + .find(text) + .filter(|&i| { + bytes[2] != b' ' + && bytes[i - 1] != b' ' + && bytes[2..i] + .iter() + .all(|&c| c != b'<' && c != b'\n' && c != b'>') + }) + .map(|i| { + ( + Target { + target: &text[2..i], + }, + i + ">>".len(), + ) + }) + } +} + +#[test] +fn parse() { + assert_eq!( + Target::parse("<>"), + Some((Target { target: "target" }, "<>".len())) + ); + assert_eq!( + Target::parse("<>"), + Some((Target { target: "tar get" }, "<>".len())) + ); + assert_eq!(Target::parse("<>"), None); + assert_eq!(Target::parse("<< target>>"), None); + assert_eq!(Target::parse("<>"), None); + assert_eq!(Target::parse("<get>>"), None); + assert_eq!(Target::parse("<>"), None); + assert_eq!(Target::parse("<"), None); +} diff --git a/src/objects/timestamp.rs b/src/elements/timestamp.rs similarity index 97% rename from src/objects/timestamp.rs rename to src/elements/timestamp.rs index fc35291..6ef3468 100644 --- a/src/objects/timestamp.rs +++ b/src/elements/timestamp.rs @@ -9,7 +9,7 @@ pub struct Datetime<'a> { pub(crate) dayname: &'a str, } -impl<'a> Datetime<'a> { +impl Datetime<'_> { pub fn year(&self) -> u32 { u32::from_str(&self.date[0..4]).unwrap() } @@ -145,8 +145,8 @@ pub enum Timestamp<'a> { Diary(&'a str), } -impl<'a> Timestamp<'a> { - pub(crate) fn parse(text: &'a str) -> Option<(Timestamp<'a>, usize)> { +impl Timestamp<'_> { + pub(crate) fn parse(text: &str) -> Option<(Timestamp<'_>, usize)> { if text.starts_with('<') { Timestamp::parse_active(text).or_else(|| Timestamp::parse_diary(text)) } else if text.starts_with('[') { @@ -156,7 +156,7 @@ impl<'a> Timestamp<'a> { } } - pub(crate) fn parse_active(text: &'a str) -> Option<(Timestamp<'a>, usize)> { + pub(crate) fn parse_active(text: &str) -> Option<(Timestamp<'_>, usize)> { debug_assert!(text.starts_with('<')); let bytes = text.as_bytes(); @@ -194,7 +194,7 @@ impl<'a> Timestamp<'a> { )) } - pub(crate) fn parse_inactive(text: &'a str) -> Option<(Timestamp<'a>, usize)> { + pub(crate) fn parse_inactive(text: &str) -> Option<(Timestamp<'_>, usize)> { debug_assert!(text.starts_with('[')); let bytes = text.as_bytes(); @@ -231,7 +231,7 @@ impl<'a> Timestamp<'a> { )) } - fn parse_datetime(text: &'a str) -> Option<(Datetime<'a>, Option>)> { + fn parse_datetime(text: &str) -> Option<(Datetime<'_>, Option>)> { if text.is_empty() || !text.starts_with(|c: char| c.is_ascii_digit()) || !text.ends_with(|c: char| c.is_ascii_alphanumeric()) diff --git a/src/lib.rs b/src/lib.rs index 50d1df7..1812d2c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -140,10 +140,8 @@ //! ``` pub mod elements; -pub mod export; -pub mod headline; -pub mod objects; -mod parser; -pub mod tools; +pub mod org; +#[cfg(feature = "serde")] +mod serde; -pub use parser::{Event, Parser}; +pub use org::Org; diff --git a/src/objects/entity.rs b/src/objects/entity.rs deleted file mode 100644 index a4d20c5..0000000 --- a/src/objects/entity.rs +++ /dev/null @@ -1,38 +0,0 @@ -pub struct Entity<'a> { - pub name: &'a str, - pub contents: Option<&'a str>, -} - -impl<'a> Entity<'a> { - pub fn parse(src: &'a str) -> Option<(Entity<'a>, usize)> { - expect!(src, 0, b'\\')?; - - let name = 0; - - if src.as_bytes()[name] == b'[' { - Some(( - Entity { - name: &src[1..name], - contents: None, - }, - name, - )) - } else if src.as_bytes()[name] == b'{' { - Some(( - Entity { - name: &src[1..name], - contents: None, - }, - name, - )) - } else { - Some(( - Entity { - name: &src[1..name], - contents: None, - }, - name, - )) - } - } -} diff --git a/src/objects/mod.rs b/src/objects/mod.rs deleted file mode 100644 index 8cae4ab..0000000 --- a/src/objects/mod.rs +++ /dev/null @@ -1,23 +0,0 @@ -/// objects -/// -/// objects is something that included in an element. -pub(crate) mod cookie; -pub(crate) mod emphasis; -pub(crate) mod fn_ref; -pub(crate) mod inline_call; -pub(crate) mod inline_src; -pub(crate) mod link; -pub(crate) mod macros; -pub(crate) mod radio_target; -pub(crate) mod snippet; -pub(crate) mod target; -pub(crate) mod timestamp; - -pub use self::cookie::Cookie; -pub use self::fn_ref::FnRef; -pub use self::inline_call::InlineCall; -pub use self::inline_src::InlineSrc; -pub use self::link::Link; -pub use self::macros::Macros; -pub use self::snippet::Snippet; -pub use self::timestamp::*; diff --git a/src/objects/radio_target.rs b/src/objects/radio_target.rs deleted file mode 100644 index 43740ec..0000000 --- a/src/objects/radio_target.rs +++ /dev/null @@ -1,44 +0,0 @@ -use jetscii::Substring; - -// TODO: text-markup, entities, latex-fragments, subscript and superscript -#[inline] -pub fn parse(src: &str) -> Option<(&str, usize)> { - debug_assert!(src.starts_with("<<<")); - - let bytes = src.as_bytes(); - let (target, off) = Substring::new(">>>") - .find(src) - .filter(|&i| { - bytes[3] != b' ' - && bytes[i - 1] != b' ' - && bytes[3..i] - .iter() - .all(|&c| c != b'<' && c != b'\n' && c != b'>') - }) - .map(|i| (&src[3..i], i + ">>>".len()))?; - - Some((target, off)) -} - -#[cfg(test)] -mod tests { - #[test] - fn parse() { - use super::parse; - - assert_eq!( - parse("<<>>"), - Some(("target", "<<>>".len())) - ); - assert_eq!( - parse("<<>>"), - Some(("tar get", "<<>>".len())) - ); - assert_eq!(parse("<<>>"), None); - assert_eq!(parse("<<< target>>>"), None); - assert_eq!(parse("<<>>"), None); - assert_eq!(parse("<<get>>>"), None); - assert_eq!(parse("<<>>"), None); - assert_eq!(parse("<<>"), None); - } -} diff --git a/src/objects/target.rs b/src/objects/target.rs deleted file mode 100644 index 2f37470..0000000 --- a/src/objects/target.rs +++ /dev/null @@ -1,36 +0,0 @@ -use jetscii::Substring; - -#[inline] -pub fn parse(text: &str) -> Option<(&str, usize)> { - debug_assert!(text.starts_with("<<")); - - let bytes = text.as_bytes(); - - Substring::new(">>") - .find(text) - .filter(|&i| { - bytes[2] != b' ' - && bytes[i - 1] != b' ' - && bytes[2..i] - .iter() - .all(|&c| c != b'<' && c != b'\n' && c != b'>') - }) - .map(|i| (&text[2..i], i + ">>".len())) -} - -#[cfg(test)] -mod tests { - #[test] - fn parse() { - use super::parse; - - assert_eq!(parse("<>"), Some(("target", "<>".len()))); - assert_eq!(parse("<>"), Some(("tar get", "<>".len()))); - assert_eq!(parse("<>"), None); - assert_eq!(parse("<< target>>"), None); - assert_eq!(parse("<>"), None); - assert_eq!(parse("<get>>"), None); - assert_eq!(parse("<>"), None); - assert_eq!(parse("<"), None); - } -} diff --git a/src/org.rs b/src/org.rs new file mode 100644 index 0000000..58a3ece --- /dev/null +++ b/src/org.rs @@ -0,0 +1,647 @@ +use crate::elements::*; + +use indextree::{Arena, NodeId}; +use jetscii::bytes; +use memchr::{memchr_iter, memrchr_iter}; + +pub struct Org<'a> { + pub(crate) arena: Arena>, + pub(crate) root: NodeId, + text: &'a str, +} + +impl<'a> Org<'a> { + pub fn new(text: &'a str) -> Self { + let mut arena = Arena::new(); + let root = arena.new_node(Element::Document { + begin: 0, + end: text.len(), + }); + + Org { arena, root, text } + } + + pub fn finish(&self) -> bool { + self.arena[self.root].first_child().is_some() + } + + pub fn parse(&mut self) { + if self.finish() { + return; + } + + let mut node = self.root; + loop { + match self.arena[node].data { + Element::Document { begin, end, .. } + | Element::Headline { + contents_begin: begin, + contents_end: end, + .. + } => { + let mut begin = begin; + if begin < end { + let off = Headline::find_level(&self.text[begin..end], std::usize::MAX); + if off != 0 { + let (contents_begin, contents_end) = + skip_empty_lines(&self.text[begin..begin + off]); + let section = Element::Section { + begin, + end: begin + off, + contents_begin: begin + contents_begin, + contents_end: begin + contents_end, + }; + let new_node = self.arena.new_node(section); + node.append(new_node, &mut self.arena).unwrap(); + begin += off; + } + } + while begin < end { + let (headline, off, end) = Headline::parse(&self.text[begin..end], &[]); + let headline = Element::Headline { + headline, + begin, + end: begin + end, + contents_begin: begin + off, + contents_end: begin + end, + }; + let new_node = self.arena.new_node(headline); + node.append(new_node, &mut self.arena).unwrap(); + begin += end; + } + } + Element::Section { + contents_begin, + contents_end, + .. + } + | Element::Block { + contents_begin, + contents_end, + .. + } + | Element::ListItem { + contents_begin, + contents_end, + .. + } => { + self.parse_elements_children(contents_begin, contents_end, node); + } + Element::Paragraph { + contents_begin, + contents_end, + .. + } + | Element::Bold { + contents_begin, + contents_end, + .. + } + | Element::Underline { + contents_begin, + contents_end, + .. + } + | Element::Italic { + contents_begin, + contents_end, + .. + } + | Element::Strike { + contents_begin, + contents_end, + .. + } => { + self.parse_objects_children(contents_begin, contents_end, node); + } + Element::List { + list: List { indent, .. }, + contents_begin, + contents_end, + .. + } => { + self.parse_list_items(contents_begin, contents_end, indent, node); + } + _ => (), + } + + if let Some(next_node) = self.next_node(node) { + node = next_node; + } else { + break; + } + } + } + + fn next_node(&self, mut node: NodeId) -> Option { + if let Some(child) = self.arena[node].first_child() { + return Some(child); + } + + loop { + if let Some(sibling) = self.arena[node].next_sibling() { + return Some(sibling); + } else if let Some(parent) = self.arena[node].parent() { + node = parent; + } else { + return None; + } + } + } + + fn parse_elements_children(&mut self, mut begin: usize, end: usize, node: NodeId) { + 'out: while begin < end { + let text = &self.text[begin..end]; + let mut pos = 0; + for i in memchr_iter(b'\n', text.as_bytes()) { + if text.as_bytes()[pos..i].iter().all(u8::is_ascii_whitespace) { + let (end, _) = skip_empty_lines(&text[i..]); + let new_node = self.arena.new_node(Element::Paragraph { + begin, + end: begin + i + end, + contents_begin: begin, + contents_end: begin + pos, + }); + node.append(new_node, &mut self.arena).unwrap(); + begin += i + end; + continue 'out; + } else if let Some((ty, off)) = self.parse_element(begin + pos, end) { + let new_node = self.arena.new_node(Element::Paragraph { + begin, + end: begin + pos, + contents_begin: begin, + contents_end: begin + pos, + }); + node.append(new_node, &mut self.arena).unwrap(); + let new_node = self.arena.new_node(ty); + node.append(new_node, &mut self.arena).unwrap(); + begin += pos + off; + continue 'out; + } + pos = i + 1; + } + + let new_node = self.arena.new_node(Element::Paragraph { + begin, + end, + contents_begin: begin, + contents_end: if text.ends_with('\n') { end - 1 } else { end }, + }); + begin = end; + node.append(new_node, &mut self.arena).unwrap(); + } + } + + fn parse_element(&self, begin: usize, end: usize) -> Option<(Element<'a>, usize)> { + let text = &self.text[begin..end]; + + if let Some((fn_def, off, end)) = FnDef::parse(text) { + let fn_def = Element::FnDef { + begin, + end: begin + end, + contents_begin: begin + off, + contents_end: begin + end, + fn_def, + }; + return Some((fn_def, end)); + } else if let Some((list, limit, end)) = List::parse(text) { + let list = Element::List { + list, + begin, + end: begin + end, + contents_begin: begin, + contents_end: begin + limit, + }; + return Some((list, end)); + } + + let line_begin = text.find(|c: char| !c.is_ascii_whitespace()).unwrap_or(0); + let tail = &text[line_begin..]; + + if let Some((clock, end)) = Clock::parse(tail) { + let clock = Element::Clock { + clock, + begin, + end: begin + line_begin + end, + }; + return Some((clock, line_begin + end)); + } + + // TODO: LaTeX environment + if tail.starts_with("\\begin{") {} + + // rule + if tail.starts_with("-----") { + if let Some(end) = Rule::parse(tail) { + let rule = Element::Rule { + begin, + end: begin + line_begin + end, + }; + return Some((rule, line_begin + end)); + } + } + + if tail.starts_with(':') { + if let Some((drawer, off, limit, end)) = Drawer::parse(tail) { + let drawer = Element::Drawer { + drawer, + begin, + end: begin + line_begin + end, + contents_begin: begin + line_begin + off, + contents_end: begin + line_begin + limit, + }; + return Some((drawer, line_begin + end)); + } + } + + // fixed width + if tail.starts_with(": ") || tail.starts_with(":\n") { + // let end = line_ends + // .skip_while(|&i| { + // text[i + 1..].starts_with(": ") || text[i + 1..].starts_with(":\n") + // }) + // .next() + // .map(|i| i + 1) + // .unwrap_or_else(|| text.len()); + // let off = end - pos; + // brk!(Element::FixedWidth(&tail[0..off]), off); + } + + // comment + if tail.starts_with("# ") || tail.starts_with("#\n") { + // let end = line_ends + // .skip_while(|&i| { + // text[i + 1..].starts_with("# ") || text[i + 1..].starts_with("#\n") + // }) + // .next() + // .map(|i| i + 1) + // .unwrap_or_else(|| text.len()); + // let off = end - pos; + // brk!(Element::Comment(&tail[0..off]), off); + } + + if tail.starts_with("#+") { + if let Some((block, off, limit, end)) = Block::parse(tail) { + let block = Element::Block { + block, + begin, + end: begin + line_begin + end, + contents_begin: begin + line_begin + off, + contents_end: begin + line_begin + limit, + }; + return Some((block, line_begin + end)); + } else if let Some((dyn_block, off, limit, end)) = DynBlock::parse(tail) { + let dyn_block = Element::DynBlock { + dyn_block, + begin, + end: begin + line_begin + end, + contents_begin: begin + line_begin + off, + contents_end: begin + line_begin + limit, + }; + return Some((dyn_block, line_begin + end)); + } else if let Some((key, option, value, end)) = Keyword::parse(tail) { + if key.eq_ignore_ascii_case("CALL") { + let call = Element::BabelCall { + value, + begin, + end: begin + line_begin + end, + }; + return Some((call, line_begin + end)); + } else { + let kw = Element::Keyword { + keyword: Keyword { key, option, value }, + begin, + end: begin + line_begin + end, + }; + return Some((kw, line_begin + end)); + } + } + } + + None + } + + fn parse_objects_children(&mut self, mut begin: usize, end: usize, node: NodeId) { + 'out: while begin < end { + let bytes = self.text[begin..end].as_bytes(); + + match bytes[0] { + b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => { + if let Some((ty, off)) = self.parse_object(begin + 1, end) { + let new_node = self.arena.new_node(Element::Text { + value: &self.text[begin..=begin], + begin, + end, + }); + node.append(new_node, &mut self.arena).unwrap(); + let new_node = self.arena.new_node(ty); + node.append(new_node, &mut self.arena).unwrap(); + begin += 1 + off; + continue; + } + } + _ => { + if let Some((ty, off)) = self.parse_object(begin, end) { + let new_node = self.arena.new_node(ty); + node.append(new_node, &mut self.arena).unwrap(); + begin += off; + continue; + } + } + } + + let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'['); + let mut pos = 0; + while let Some(off) = bs.find(&bytes[pos..]) { + pos += off; + assert!(begin + pos <= end); + match bytes[pos] { + b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => { + if let Some((ty, off)) = self.parse_object(begin + pos + 1, end) { + let new_node = self.arena.new_node(Element::Text { + value: &self.text[begin..=begin + pos], + begin, + end, + }); + node.append(new_node, &mut self.arena).unwrap(); + let new_node = self.arena.new_node(ty); + node.append(new_node, &mut self.arena).unwrap(); + begin += pos + 1 + off; + continue 'out; + } + } + _ => { + if let Some((ty, off)) = self.parse_object(begin + pos, end) { + let new_node = self.arena.new_node(Element::Text { + value: &self.text[begin..begin + pos], + begin, + end, + }); + node.append(new_node, &mut self.arena).unwrap(); + let new_node = self.arena.new_node(ty); + node.append(new_node, &mut self.arena).unwrap(); + begin += pos + off; + continue 'out; + } + } + } + pos += 1; + } + + let new_node = self.arena.new_node(Element::Text { + value: &self.text[begin..end], + begin, + end, + }); + node.append(new_node, &mut self.arena).unwrap(); + begin = end; + } + } + + fn parse_object(&self, begin: usize, end: usize) -> Option<(Element<'a>, usize)> { + let text = &self.text[begin..end]; + if text.len() < 3 { + None + } else { + let bytes = text.as_bytes(); + match bytes[0] { + b'@' if bytes[1] == b'@' => Snippet::parse(text).map(|(snippet, off)| { + ( + Element::Snippet { + snippet, + begin, + end: begin + off, + }, + off, + ) + }), + b'{' if bytes[1] == b'{' && bytes[2] == b'{' => { + Macros::parse(text).map(|(macros, off)| { + ( + Element::Macros { + macros, + begin, + end: begin + off, + }, + off, + ) + }) + } + b'<' if bytes[1] == b'<' => { + if bytes[2] == b'<' { + RadioTarget::parse(text).map(|(radio_target, off)| { + ( + Element::RadioTarget { + radio_target, + begin, + end: begin + off, + }, + off, + ) + }) + } else { + Target::parse(text).map(|(target, off)| { + ( + Element::Target { + target, + begin, + end: begin + off, + }, + off, + ) + }) + } + } + b'<' => Timestamp::parse_active(text) + .or_else(|| (Timestamp::parse_diary(text))) + .map(|(timestamp, off)| { + ( + Element::Timestamp { + timestamp, + begin, + end: begin + off, + }, + off, + ) + }), + b'[' => { + if text[1..].starts_with("fn:") { + FnRef::parse(text).map(|(fn_ref, off)| { + ( + Element::FnRef { + fn_ref, + begin, + end: begin + off, + }, + off, + ) + }) + } else if bytes[1] == b'[' { + Link::parse(text).map(|(link, off)| { + ( + Element::Link { + link, + begin, + end: begin + off, + }, + off, + ) + }) + } else { + Cookie::parse(text) + .map(|(cookie, off)| { + ( + Element::Cookie { + cookie, + begin, + end: begin + off, + }, + off, + ) + }) + .or_else(|| { + Timestamp::parse_inactive(text).map(|(timestamp, off)| { + ( + Element::Timestamp { + timestamp, + begin, + end: begin + off, + }, + off, + ) + }) + }) + } + } + b'*' => emphasis::parse(text, b'*').map(|off| { + ( + Element::Bold { + begin, + contents_begin: begin + 1, + contents_end: begin + off - 1, + end: begin + off, + }, + off, + ) + }), + b'+' => emphasis::parse(text, b'+').map(|off| { + ( + Element::Strike { + begin, + contents_begin: begin + 1, + contents_end: begin + off - 1, + end: begin + off, + }, + off, + ) + }), + b'/' => emphasis::parse(text, b'/').map(|off| { + ( + Element::Italic { + begin, + contents_begin: begin + 1, + contents_end: begin + off - 1, + end: begin + off, + }, + off, + ) + }), + b'_' => emphasis::parse(text, b'_').map(|off| { + ( + Element::Underline { + begin, + contents_begin: begin + 1, + contents_end: begin + off - 1, + end: begin + off, + }, + off, + ) + }), + b'=' => emphasis::parse(text, b'=').map(|off| { + ( + Element::Verbatim { + begin, + end: begin + off, + value: &text[1..off - 1], + }, + off, + ) + }), + b'~' => emphasis::parse(text, b'~').map(|off| { + ( + Element::Code { + begin, + end: begin + off, + value: &text[1..off - 1], + }, + off, + ) + }), + b's' if text.starts_with("src_") => { + InlineSrc::parse(text).map(|(inline_src, off)| { + ( + Element::InlineSrc { + inline_src, + begin, + end: begin + off, + }, + off, + ) + }) + } + b'c' if text.starts_with("call_") => { + InlineCall::parse(text).map(|(inline_call, off)| { + ( + Element::InlineCall { + inline_call, + begin, + end: begin + off, + }, + off, + ) + }) + } + _ => None, + } + } + } + + fn parse_list_items(&mut self, mut begin: usize, end: usize, indent: usize, node: NodeId) { + while begin < end { + let text = &self.text[begin..end]; + let (list_item, off, end) = ListItem::parse(text, indent); + let list_item = Element::ListItem { + list_item, + begin, + end: begin + end, + contents_begin: begin + off, + contents_end: begin + end, + }; + let new_node = self.arena.new_node(list_item); + node.append(new_node, &mut self.arena).unwrap(); + begin += end; + } + } +} + +fn skip_empty_lines(text: &str) -> (usize, usize) { + let mut i = 0; + let mut j = text.len(); + for pos in memchr_iter(b'\n', text.as_bytes()) { + if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) { + i = pos + 1; + } else { + break; + } + } + + for pos in memrchr_iter(b'\n', text.as_bytes()) { + if text.as_bytes()[pos..j].iter().all(u8::is_ascii_whitespace) { + j = pos; + } else { + break; + } + } + + (i, j) +} diff --git a/src/parser.rs b/src/parser.rs deleted file mode 100644 index 313d7f8..0000000 --- a/src/parser.rs +++ /dev/null @@ -1,703 +0,0 @@ -//! Parser - -use crate::{elements::*, headline::*, objects::*}; -use jetscii::bytes; -use memchr::memchr_iter; - -#[derive(Copy, Clone, Debug)] -enum Container { - Headline(usize), - Section(usize), - Drawer, - Paragraph, - CtrBlock, - QteBlock, - SplBlock, - DynBlock, - List(usize, bool), - ListItem, - Italic, - Strike, - Bold, - Underline, -} - -#[derive(Debug)] -pub enum Event<'a> { - HeadlineBeg(Headline<'a>), - HeadlineEnd, - - SectionBeg, - SectionEnd, - - ParagraphBeg, - ParagraphEnd, - - CtrBlockBeg, - CtrBlockEnd, - QteBlockBeg, - QteBlockEnd, - SplBlockBeg { - name: &'a str, - args: Option<&'a str>, - }, - SplBlockEnd, - DynBlockBeg { - name: &'a str, - args: Option<&'a str>, - }, - DynBlockEnd, - - CommentBlock { - args: Option<&'a str>, - cont: &'a str, - }, - ExampleBlock { - args: Option<&'a str>, - cont: &'a str, - }, - ExportBlock { - args: Option<&'a str>, - cont: &'a str, - }, - SrcBlock { - args: Option<&'a str>, - cont: &'a str, - }, - VerseBlock { - args: Option<&'a str>, - cont: &'a str, - }, - - ListBeg { - indent: usize, - ordered: bool, - }, - ListEnd { - indent: usize, - ordered: bool, - }, - ListItemBeg { - bullet: &'a str, - }, - ListItemEnd, - - Call { - value: &'a str, - }, - - Clock(Clock<'a>), - - Comment(&'a str), - FixedWidth(&'a str), - - Planning(Planning<'a>), - - DrawerBeg(&'a str), - DrawerEnd, - - TableStart, - TableEnd, - TableCell, - - LatexEnv, - FnDef { - label: &'a str, - cont: &'a str, - }, - Keyword(Keyword<'a>), - Rule, - - Timestamp(Timestamp<'a>), - Cookie(Cookie<'a>), - FnRef(FnRef<'a>), - InlineCall(InlineCall<'a>), - InlineSrc(InlineSrc<'a>), - Link(Link<'a>), - Macros(Macros<'a>), - RadioTarget { - target: &'a str, - }, - Snippet(Snippet<'a>), - Target { - target: &'a str, - }, - - BoldBeg, - BoldEnd, - ItalicBeg, - ItalicEnd, - StrikeBeg, - StrikeEnd, - UnderlineBeg, - UnderlineEnd, - - Verbatim(&'a str), - Code(&'a str), - Text(&'a str), -} - -pub struct Parser<'a> { - text: &'a str, - stack: Vec<(Container, usize, usize)>, - off: usize, - ele_buf: Option<(Event<'a>, usize, usize, usize)>, - obj_buf: Option<(Event<'a>, usize, usize, usize)>, - todo_keywords: &'a [&'a str], -} - -impl<'a> Parser<'a> { - /// creates a new parser from string - pub fn new(text: &'a str) -> Parser<'a> { - Parser { - text, - stack: Vec::new(), - off: 0, - ele_buf: None, - obj_buf: None, - todo_keywords: DEFAULT_TODO_KEYWORDS, - } - } - - /// creates a new parser from string, with the specified keywords - pub fn with_todo_keywrods(text: &'a str, todo_keywords: &'a [&'a str]) -> Parser<'a> { - Parser { - text, - stack: Vec::new(), - off: 0, - ele_buf: None, - obj_buf: None, - todo_keywords, - } - } - - /// returns current offset - pub fn offset(&self) -> usize { - self.off - } - - /// returns current stack depth - pub fn stack_depth(&self) -> usize { - self.stack.len() - } - - /// set todo keywords - pub fn set_todo_keywords(&mut self, todo_keywords: &'a [&'a str]) { - self.todo_keywords = todo_keywords; - } - - /// set text - pub fn set_text(&mut self, text: &'a str) { - self.off = 0; - self.stack.clear(); - self.ele_buf = None; - self.obj_buf = None; - self.text = text; - } - - /// skip the current container if exists and return its Event - pub fn skip_container(&mut self) -> Option> { - let (container, _, end) = self.stack.pop()?; - self.off = end; - Some(match container { - Container::Bold => Event::BoldEnd, - Container::Drawer => Event::DrawerEnd, - Container::CtrBlock => Event::CtrBlockEnd, - Container::DynBlock => Event::DynBlockEnd, - Container::Headline(_) => Event::HeadlineEnd, - Container::Italic => Event::ItalicEnd, - Container::List(indent, ordered) => Event::ListEnd { indent, ordered }, - Container::ListItem => Event::ListItemEnd, - Container::Paragraph => Event::ParagraphEnd, - Container::QteBlock => Event::QteBlockEnd, - Container::Section(_) => Event::SectionEnd, - Container::SplBlock => Event::SplBlockEnd, - Container::Strike => Event::StrikeEnd, - Container::Underline => Event::UnderlineEnd, - }) - } - - fn next_section_or_headline(&mut self, text: &'a str) -> Event<'a> { - let end = Headline::find_level(text, std::usize::MAX); - if end != 0 { - self.push_stack(Container::Section(self.off), end, end); - Event::SectionBeg - } else { - self.next_headline(text) - } - } - - fn next_headline(&mut self, text: &'a str) -> Event<'a> { - let (hdl, off, end) = Headline::parse(text, self.todo_keywords); - self.push_stack(Container::Headline(self.off + off), end, end); - self.off += off; - Event::HeadlineBeg(hdl) - } - - fn next_ele(&mut self, text: &'a str) -> Event<'a> { - fn skip_empty_lines(text: &str) -> usize { - let mut i = 0; - for pos in memchr_iter(b'\n', text.as_bytes()) { - if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) { - i = pos + 1; - } else { - return i; - } - } - if text.as_bytes()[i..].iter().all(u8::is_ascii_whitespace) { - text.len() - } else { - i - } - } - - let start = skip_empty_lines(text); - if start == text.len() { - self.off += text.len(); - return self.end(); - }; - let tail = &text[start..]; - - let (ele, off, limit, end) = self - .ele_buf - .take() - .or_else(|| self.real_next_ele(tail)) - .unwrap_or_else(|| { - let mut pos = 0; - for i in memchr_iter(b'\n', tail.as_bytes()) { - if tail.as_bytes()[pos..i].iter().all(u8::is_ascii_whitespace) { - return (Event::ParagraphBeg, 0, pos - 1 + start, i + 1 + start); - } else if let Some(buf) = self.real_next_ele(&tail[pos..]) { - self.ele_buf = Some(buf); - return (Event::ParagraphBeg, 0, pos - 1 + start, pos + start); - } - pos = i + 1; - } - let len = text.len(); - ( - Event::ParagraphBeg, - 0, - if text.ends_with('\n') { len - 1 } else { len }, - len, - ) - }); - - debug_assert!( - (limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()), - "{} <= {} <= {} <= {}", - off, - limit, - end, - text.len() - ); - - match ele { - Event::DrawerBeg(_) => self.push_stack(Container::Drawer, limit, end), - Event::ParagraphBeg => self.push_stack(Container::Paragraph, limit, end), - Event::QteBlockBeg => self.push_stack(Container::QteBlock, limit, end), - Event::CtrBlockBeg => self.push_stack(Container::CtrBlock, limit, end), - Event::SplBlockBeg { .. } => self.push_stack(Container::SplBlock, limit, end), - Event::DynBlockBeg { .. } => self.push_stack(Container::DynBlock, limit, end), - Event::ListBeg { ordered, indent } => { - self.push_stack(Container::List(indent, ordered), limit, end) - } - _ => (), - } - - self.off += off + start; - - ele - } - - // returns (event, offset, container limit, container end) - fn real_next_ele(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { - debug_assert!(!text.starts_with('\n')); - - if let Some((label, cont, off)) = fn_def::parse(text) { - return Some((Event::FnDef { label, cont }, off + 1, 0, 0)); - } else if let Some((indent, ordered, limit, end)) = list::parse(text) { - return Some((Event::ListBeg { indent, ordered }, 0, limit, end)); - } - - let (tail, line_begin) = text - .find(|c| c != ' ') - .map(|off| (&text[off..], off)) - .unwrap_or((text, 0)); - - if let Some((clock, off)) = Clock::parse(tail) { - return Some((Event::Clock(clock), off + line_begin, 0, 0)); - } - - // TODO: LaTeX environment - if tail.starts_with("\\begin{") {} - - // rule - if tail.starts_with("-----") { - let off = rule::parse(tail); - if off != 0 { - return Some((Event::Rule, off + line_begin, 0, 0)); - } - } - - if tail.starts_with(':') { - if let Some((name, off, limit, end)) = drawer::parse(tail) { - return Some(( - Event::DrawerBeg(name), - off + line_begin, - limit + line_begin, - end + line_begin, - )); - } - } - - // fixed width - if tail.starts_with(": ") || tail.starts_with(":\n") { - // let end = line_ends - // .skip_while(|&i| { - // text[i + 1..].starts_with(": ") || text[i + 1..].starts_with(":\n") - // }) - // .next() - // .map(|i| i + 1) - // .unwrap_or_else(|| text.len()); - // let off = end - pos; - // brk!(Element::FixedWidth(&tail[0..off]), off); - } - - // comment - if tail.starts_with("# ") || tail.starts_with("#\n") { - // let end = line_ends - // .skip_while(|&i| { - // text[i + 1..].starts_with("# ") || text[i + 1..].starts_with("#\n") - // }) - // .next() - // .map(|i| i + 1) - // .unwrap_or_else(|| text.len()); - // let off = end - pos; - // brk!(Element::Comment(&tail[0..off]), off); - } - - if tail.starts_with("#+") { - block::parse(tail) - .map(|(name, args, begin, limit, end)| { - let cont = &tail[begin..limit]; - match &*name.to_uppercase() { - "COMMENT" => (Event::CommentBlock { args, cont }, end + line_begin, 0, 0), - "EXAMPLE" => (Event::ExampleBlock { args, cont }, end + line_begin, 0, 0), - "EXPORT" => (Event::ExportBlock { args, cont }, end + line_begin, 0, 0), - "SRC" => (Event::SrcBlock { args, cont }, end + line_begin, 0, 0), - "VERSE" => (Event::VerseBlock { args, cont }, end + line_begin, 0, 0), - "CENTER" => ( - Event::CtrBlockBeg, - begin + line_begin, - limit + line_begin, - end + line_begin, - ), - "QUOTE" => ( - Event::QteBlockBeg, - begin + line_begin, - limit + line_begin, - end + line_begin, - ), - _ => ( - Event::SplBlockBeg { name, args }, - begin + line_begin, - limit + line_begin, - end + line_begin, - ), - } - }) - .or_else(|| { - dyn_block::parse(tail).map(|(name, args, begin, limit, end)| { - ( - Event::DynBlockBeg { name, args }, - begin + line_begin, - limit + line_begin, - end + line_begin, - ) - }) - }) - .or_else(|| { - Keyword::parse(tail).map(|(key, option, value, off)| { - ( - if key.eq_ignore_ascii_case("CALL") { - Event::Call { value } - } else { - Event::Keyword(Keyword::new(key, option, value)) - }, - off + line_begin, - 0, - 0, - ) - }) - }) - } else { - None - } - } - - fn next_obj(&mut self, text: &'a str) -> Event<'a> { - let bytes = text.as_bytes(); - let (obj, off, limit, end) = self - .obj_buf - .take() - .or_else(|| match bytes[0] { - b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => { - if let Some(buf) = self.real_next_obj(&text[1..]) { - self.obj_buf = Some(buf); - Some((Event::Text(&text[0..1]), 1, 0, 0)) - } else { - None - } - } - _ => self.real_next_obj(text), - }) - .unwrap_or_else(|| { - let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'['); - let mut pos = 0; - while let Some(off) = bs.find(&bytes[pos..]) { - pos += off; - match bytes[pos] { - b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => { - if let Some(buf) = self.real_next_obj(&text[pos + 1..]) { - self.obj_buf = Some(buf); - return (Event::Text(&text[0..=pos]), pos + 1, 0, 0); - } - } - _ => { - if let Some(buf) = self.real_next_obj(&text[pos..]) { - self.obj_buf = Some(buf); - return (Event::Text(&text[0..pos]), pos, 0, 0); - } - } - } - pos += 1; - } - (Event::Text(text), text.len(), 0, 0) - }); - - debug_assert!( - (limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()), - "{} <= {} <= {} <= {}", - off, - limit, - end, - text.len() - ); - - match obj { - Event::UnderlineBeg => self.push_stack(Container::Underline, limit, end), - Event::StrikeBeg => self.push_stack(Container::Strike, limit, end), - Event::ItalicBeg => self.push_stack(Container::Italic, limit, end), - Event::BoldBeg => self.push_stack(Container::Bold, limit, end), - _ => (), - } - - self.off += off; - - obj - } - - fn real_next_obj(&self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { - if text.len() < 3 { - None - } else { - let bytes = text.as_bytes(); - match bytes[0] { - b'@' if bytes[1] == b'@' => { - Snippet::parse(text).map(|(snippet, off)| (Event::Snippet(snippet), off, 0, 0)) - } - b'{' if bytes[1] == b'{' && bytes[2] == b'{' => { - Macros::parse(text).map(|(macros, off)| (Event::Macros(macros), off, 0, 0)) - } - b'<' if bytes[1] == b'<' => { - if bytes[2] == b'<' { - radio_target::parse(text) - .map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0)) - } else { - target::parse(text) - .map(|(target, off)| (Event::Target { target }, off, 0, 0)) - } - } - b'<' => Timestamp::parse_active(text) - .or_else(|| Timestamp::parse_diary(text)) - .map(|(ts, off)| (Event::Timestamp(ts), off, 0, 0)), - b'[' => { - if text[1..].starts_with("fn:") { - FnRef::parse(text).map(|(fn_ref, off)| (Event::FnRef(fn_ref), off, 0, 0)) - } else if bytes[1] == b'[' { - Link::parse(text).map(|(link, off)| (Event::Link(link), off, 0, 0)) - } else if let Some((cookie, off)) = Cookie::parse(text) { - Some((Event::Cookie(cookie), off, 0, 0)) - } else { - Timestamp::parse_inactive(text) - .map(|(ts, off)| (Event::Timestamp(ts), off, 0, 0)) - } - } - b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)), - b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)), - b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)), - b'_' => { - emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end)) - } - b'=' => emphasis::parse(text, b'=') - .map(|end| (Event::Verbatim(&text[1..end - 1]), end, 0, 0)), - b'~' => emphasis::parse(text, b'~') - .map(|end| (Event::Code(&text[1..end - 1]), end, 0, 0)), - b's' if text.starts_with("src_") => { - InlineSrc::parse(text).map(|(src, off)| (Event::InlineSrc(src), off, 0, 0)) - } - b'c' if text.starts_with("call_") => { - InlineCall::parse(text).map(|(call, off)| (Event::InlineCall(call), off, 0, 0)) - } - _ => None, - } - } - } - - fn next_list_item(&self, text: &'a str, indent: usize) -> (&'a str, usize, usize, usize) { - use std::iter::once; - - debug_assert!(&text[0..indent].trim().is_empty()); - let off = &text[indent..].find(' ').unwrap() + 1 + indent; - - let bytes = text.as_bytes(); - let mut lines = memchr_iter(b'\n', bytes) - .map(|i| i + 1) - .chain(once(text.len())); - let mut pos = lines.next().unwrap(); - - for i in lines { - let line = &text[pos..i]; - if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) { - if line_indent == indent { - return (&text[indent..off], off, pos, pos); - } - } - pos = i; - } - - (&text[indent..off], off, text.len(), text.len()) - } - - #[inline] - fn push_stack(&mut self, container: Container, limit: usize, end: usize) { - self.stack - .push((container, self.off + limit, self.off + end)); - } - - #[inline] - fn end(&mut self) -> Event<'a> { - let (container, _, _) = self.stack.pop().unwrap(); - match container { - Container::Bold => Event::BoldEnd, - Container::Drawer => Event::DrawerEnd, - Container::CtrBlock => Event::CtrBlockEnd, - Container::DynBlock => Event::DynBlockEnd, - Container::Headline(_) => Event::HeadlineEnd, - Container::Italic => Event::ItalicEnd, - Container::List(indent, ordered) => Event::ListEnd { indent, ordered }, - Container::ListItem => Event::ListItemEnd, - Container::Paragraph => Event::ParagraphEnd, - Container::QteBlock => Event::QteBlockEnd, - Container::Section(_) => Event::SectionEnd, - Container::SplBlock => Event::SplBlockEnd, - Container::Strike => Event::StrikeEnd, - Container::Underline => Event::UnderlineEnd, - } - } -} - -impl<'a> Iterator for Parser<'a> { - type Item = Event<'a>; - - fn next(&mut self) -> Option> { - if let Some(&(container, limit, end)) = self.stack.last() { - // eprint!("{:1$}", ' ', self.stack_depth()); - - debug_assert!( - self.off <= limit && limit <= end && end <= self.text.len(), - "{} <= {} <= {} <= {}", - self.off, - limit, - end, - self.text.len() - ); - - let tail = &self.text[self.off..limit]; - - // eprintln!("{:?} {:?}", container, tail); - - Some(match container { - Container::Headline(beg) => { - if self.off >= limit { - self.off = end; - self.stack.pop(); - Event::HeadlineEnd - } else if self.off == beg { - self.next_section_or_headline(tail) - } else { - self.next_headline(tail) - } - } - Container::Drawer - | Container::DynBlock - | Container::CtrBlock - | Container::QteBlock - | Container::SplBlock - | Container::ListItem => { - if self.off >= limit { - self.off = end; - self.end() - } else { - self.next_ele(tail) - } - } - Container::Section(beg) => { - // planning should be the first line of section - if self.off >= limit { - self.off = end; - self.stack.pop(); - Event::SectionEnd - } else if self.off == beg { - if let Some((planning, off)) = Planning::parse(tail) { - self.off += off; - Event::Planning(planning) - } else { - self.next_ele(tail) - } - } else { - self.next_ele(tail) - } - } - Container::List(indent, ordered) => { - if self.off < limit { - let (bullet, off, limit, end) = self.next_list_item(tail, indent); - self.push_stack(Container::ListItem, limit, end); - self.off += off; - Event::ListItemBeg { bullet } - } else { - self.off = end; - self.stack.pop(); - Event::ListEnd { indent, ordered } - } - } - Container::Paragraph - | Container::Bold - | Container::Underline - | Container::Italic - | Container::Strike => { - if self.off >= limit { - self.off = end; - self.end() - } else { - self.next_obj(tail) - } - } - }) - } else if self.off < self.text.len() { - Some(self.next_section_or_headline(&self.text[self.off..])) - } else { - None - } - } -} diff --git a/src/serde.rs b/src/serde.rs new file mode 100644 index 0000000..7b7bf6f --- /dev/null +++ b/src/serde.rs @@ -0,0 +1,458 @@ +use indextree::{Arena, NodeId}; +use serde::ser::{SerializeSeq, SerializeStruct, Serializer}; +use serde::Serialize; + +use crate::elements::Element; +use crate::org::Org; + +impl Serialize for Org<'_> { + fn serialize(&self, serializer: S) -> Result { + serializer.serialize_newtype_struct( + "Element", + &ElementNode { + node: self.root, + arena: &self.arena, + }, + ) + } +} + +struct ElementNode<'a> { + node: NodeId, + arena: &'a Arena>, +} + +impl Serialize for ElementNode<'_> { + #[allow(unused_variables)] + fn serialize(&self, serializer: S) -> Result { + let mut state; + match &self.arena[self.node].data { + Element::Document { begin, end } => { + state = serializer.serialize_struct("Element::Document", 2)?; + state.serialize_field("type", "document")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Block { + block, + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::Block", 2)?; + state.serialize_field("type", "block")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::Section { + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::Section", 2)?; + state.serialize_field("type", "section")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::Drawer { + drawer, + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::Drawer", 2)?; + state.serialize_field("type", "drawer")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::DynBlock { + dyn_block, + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::DynBlock", 2)?; + state.serialize_field("type", "dynamic_block")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::FnDef { + begin, + end, + contents_begin, + contents_end, + fn_def, + } => { + state = serializer.serialize_struct("Element::FnDef", 2)?; + state.serialize_field("type", "footnote_definition")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::Headline { + begin, + end, + contents_begin, + contents_end, + headline, + } => { + state = serializer.serialize_struct("Element::Headline", 2)?; + state.serialize_field("type", "headline")?; + state.serialize_field("level", &headline.level)?; + state.serialize_field("title", &headline.title)?; + if let Some(prior) = &headline.priority { + state.serialize_field("priority", prior)?; + } + if let Some(kw) = &headline.keyword { + state.serialize_field("keyword", kw)?; + } + if !headline.tags.is_empty() { + state.serialize_field("tags", &headline.tags)?; + } + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::List { + list, + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::List", 2)?; + state.serialize_field("type", "list")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::ListItem { + list_item, + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::ListItem", 2)?; + state.serialize_field("type", "list_item")?; + state.serialize_field("bullet", list_item.bullet)?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::Paragraph { + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::Paragraph", 2)?; + state.serialize_field("type", "paragraph")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::Clock { clock, begin, end } => { + state = serializer.serialize_struct("Element::Clock", 2)?; + state.serialize_field("type", "clock")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::BabelCall { value, begin, end } => { + state = serializer.serialize_struct("Element::BabelCall", 2)?; + state.serialize_field("type", "babel_call")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Cookie { cookie, begin, end } => { + state = serializer.serialize_struct("Element::Cookie", 2)?; + state.serialize_field("type", "cookie")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::FnRef { fn_ref, begin, end } => { + state = serializer.serialize_struct("Element::FnRef", 2)?; + state.serialize_field("type", "footnote_reference")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::InlineCall { + inline_call, + begin, + end, + } => { + state = serializer.serialize_struct("Element::InlineCall", 2)?; + state.serialize_field("type", "inline_call")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::InlineSrc { + inline_src, + begin, + end, + } => { + state = serializer.serialize_struct("Element::InlineSrc", 2)?; + state.serialize_field("type", "inlne_source_block")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Keyword { + keyword, + begin, + end, + } => { + state = serializer.serialize_struct("Element::Keyword", 2)?; + state.serialize_field("type", "keyword")?; + state.serialize_field("key", keyword.key)?; + if let Some(option) = keyword.option { + state.serialize_field("option", option)?; + } + state.serialize_field("value", keyword.value)?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Link { link, begin, end } => { + state = serializer.serialize_struct("Element::Link", 2)?; + state.serialize_field("type", "link")?; + state.serialize_field("path", link.path)?; + if let Some(desc) = link.desc { + state.serialize_field("desc", desc)?; + } + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Macros { macros, begin, end } => { + state = serializer.serialize_struct("Element::Macros", 2)?; + state.serialize_field("type", "macros")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Planning(_) => { + state = serializer.serialize_struct("Element::Planning", 2)?; + state.serialize_field("type", "planning")?; + } + Element::Snippet { + begin, + end, + snippet, + } => { + state = serializer.serialize_struct("Element::Snippet", 2)?; + state.serialize_field("type", "snippet")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Text { value, begin, end } => { + state = serializer.serialize_struct("Element::Text", 2)?; + state.serialize_field("type", "text")?; + state.serialize_field("value", value)?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Rule { begin, end } => { + state = serializer.serialize_struct("Element::Rule", 2)?; + state.serialize_field("type", "rule")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Timestamp { + begin, + end, + timestamp, + } => { + state = serializer.serialize_struct("Element::Timestamp", 2)?; + state.serialize_field("type", "timestamp")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Bold { + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::Bold", 2)?; + state.serialize_field("type", "bold")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::Strike { + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::Strike", 2)?; + state.serialize_field("type", "strike")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::Italic { + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::Italic", 2)?; + state.serialize_field("type", "italic")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::Underline { + begin, + end, + contents_begin, + contents_end, + } => { + state = serializer.serialize_struct("Element::Underline", 2)?; + state.serialize_field("type", "underline")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + state.serialize_field("contents_begin", contents_begin)?; + state.serialize_field("contents_end", contents_end)?; + } + } + Element::Code { begin, end, value } => { + state = serializer.serialize_struct("Element::Code", 2)?; + state.serialize_field("type", "code")?; + state.serialize_field("value", value)?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Verbatim { begin, end, value } => { + state = serializer.serialize_struct("Element::Verbatim", 2)?; + state.serialize_field("type", "verbatim")?; + state.serialize_field("value", value)?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::RadioTarget { + radio_target, + begin, + end, + } => { + state = serializer.serialize_struct("Element::RadioTarget", 2)?; + state.serialize_field("type", "radio_target")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + Element::Target { target, begin, end } => { + state = serializer.serialize_struct("Element::Target", 2)?; + state.serialize_field("type", "target")?; + if cfg!(feature = "extra-serde-info") { + state.serialize_field("begin", begin)?; + state.serialize_field("end", end)?; + } + } + } + if let Some(first) = self.arena[self.node].first_child() { + state.serialize_field( + "children", + &ElementChildrenNode { + first, + arena: self.arena, + }, + )?; + } + state.end() + } +} + +struct ElementChildrenNode<'a> { + first: NodeId, + arena: &'a Arena>, +} + +impl Serialize for ElementChildrenNode<'_> { + fn serialize(&self, serializer: S) -> Result { + let mut seq = serializer.serialize_seq(None)?; + for node in self.first.following_siblings(&self.arena) { + seq.serialize_element(&ElementNode { + node, + arena: &self.arena, + })?; + } + seq.end() + } +} diff --git a/src/tools.rs b/src/tools.rs deleted file mode 100644 index 85792d8..0000000 --- a/src/tools.rs +++ /dev/null @@ -1,42 +0,0 @@ -use crate::elements::{fn_def, Keyword}; -use crate::headline::{Headline, DEFAULT_TODO_KEYWORDS}; -use memchr::memchr; - -type Headlines<'a> = Vec>; -type Keywords<'a> = Vec<(&'a str, &'a str)>; -type Footnotes<'a> = Vec<&'a str>; - -pub fn metadata(src: &str) -> (Headlines<'_>, Keywords<'_>, Footnotes<'_>) { - let (mut headlines, mut keywords, mut footnotes) = (Vec::new(), Vec::new(), Vec::new()); - - for line in src.lines().filter(|l| !l.is_empty()) { - if line.starts_with('*') { - let level = memchr(b' ', line.as_bytes()).unwrap_or_else(|| line.len()); - if line.as_bytes()[0..level].iter().all(|&c| c == b'*') { - headlines.push(Headline::parse(line, DEFAULT_TODO_KEYWORDS).0) - } - } else if line.starts_with("#+") { - if let Some((key, _, value, _)) = Keyword::parse(line) { - keywords.push((key, value)) - } - } else if line.starts_with("[fn:") { - if let Some((label, _, _)) = fn_def::parse(line) { - footnotes.push(label) - } - } - } - - (headlines, keywords, footnotes) -} - -pub fn toc(src: &str) -> Headlines<'_> { - metadata(src).0 -} - -pub fn keywords(src: &str) -> Keywords<'_> { - metadata(src).1 -} - -pub fn fn_def(src: &str) -> Footnotes<'_> { - metadata(src).2 -}