From 6fa43f7571f4afab01cdc4aaf20d89871fafa6c6 Mon Sep 17 00:00:00 2001 From: PoiScript Date: Sun, 7 Apr 2019 20:10:43 +0800 Subject: [PATCH] feat(parser): drawer parsing --- src/elements/block.rs | 77 +++++++++++++++++++++----------------- src/elements/drawer.rs | 51 +++++++++++++++++++++++++ src/elements/dyn_block.rs | 66 ++++++++++++++++---------------- src/elements/list.rs | 5 ++- src/elements/mod.rs | 1 + src/export/html.rs | 6 +++ src/export/mod.rs | 2 + src/parser.rs | 79 ++++++++++++++++++++++++++++++--------- 8 files changed, 202 insertions(+), 85 deletions(-) create mode 100644 src/elements/drawer.rs diff --git a/src/elements/block.rs b/src/elements/block.rs index 30b1306..0a94c27 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -1,37 +1,42 @@ -use crate::lines::Lines; -use memchr::memchr2; +use memchr::{memchr, memchr_iter}; // return (name, args, contents-begin, contents-end, end) #[inline] -pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { - debug_assert!(src.starts_with("#+")); +pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { + debug_assert!(text.starts_with("#+")); - if src.len() <= 8 || src[2..8].to_uppercase() != "BEGIN_" { + if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" { return None; } - let name = memchr2(b' ', b'\n', src.as_bytes()) - .filter(|&i| src.as_bytes()[8..i].iter().all(u8::is_ascii_alphabetic))?; - let mut lines = Lines::new(src); - let (pre_limit, begin, _) = lines.next()?; - let args = if pre_limit == name { - None - } else { - Some(&src[name..pre_limit]) - }; - let name = &src[8..name]; - let end_line = format!(r"#+END_{}", name.to_uppercase()); - let mut pre_end = begin; + let bytes = text.as_bytes(); + let mut lines = memchr_iter(b'\n', text.as_bytes()); - for (_, end, line) in lines { - if line.trim() == end_line { - return Some((name, args, begin, pre_end, end)); - } else { - pre_end = end; + let (name, para, off) = lines + .next() + .map(|i| { + memchr(b' ', &bytes[8..i]) + .map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1)) + .unwrap_or((&text[8..i], None, i + 1)) + }) + .filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?; + + let mut pos = off; + let end = format!(r"#+END_{}", name.to_uppercase()); + + for i in lines { + if text[pos..i].trim().eq_ignore_ascii_case(&end) { + return Some((name, para, off, pos, i + 1)); } + + pos = i + 1; } - None + if text[pos..].trim().eq_ignore_ascii_case(&end) { + Some((name, para, off, pos, text.len())) + } else { + None + } } #[cfg(test)] @@ -42,19 +47,23 @@ mod tests { assert_eq!( parse("#+BEGIN_SRC\n#+END_SRC"), - Some(("SRC", None, 12, 12, 21)) + Some(( + "SRC", + None, + "#+BEGIN_SRC\n".len(), + "#+BEGIN_SRC\n".len(), + "#+BEGIN_SRC\n#+END_SRC".len() + )) ); assert_eq!( - parse( - r#"#+BEGIN_SRC rust -fn main() { - // print "Hello World!" to the console - println!("Hello World!"); -} -#+END_SRC -"# - ), - Some(("SRC", Some(" rust"), 17, 104, 114)) + parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"), + Some(( + "SRC", + Some("javascript"), + "#+BEGIN_SRC javascript \n".len(), + "#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n".len(), + "#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n".len() + )) ); // TODO: more testing } diff --git a/src/elements/drawer.rs b/src/elements/drawer.rs new file mode 100644 index 0000000..3b075a3 --- /dev/null +++ b/src/elements/drawer.rs @@ -0,0 +1,51 @@ +use memchr::memchr_iter; + +// return (name, offset, limit, end) +pub(crate) fn parse<'a>(text: &'a str) -> Option<(&'a str, usize, usize, usize)> { + debug_assert!(text.starts_with(':')); + + let mut lines = memchr_iter(b'\n', text.as_bytes()); + + let (name, off) = lines + .next() + .map(|i| (text[1..i].trim_end(), i + 1)) + .filter(|(name, _)| { + name.ends_with(':') + && name[0..name.len() - 1] + .as_bytes() + .iter() + .all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_') + })?; + + let mut pos = off; + for i in lines { + if text[pos..i].trim().eq_ignore_ascii_case(":END:") { + return Some((&name[0..name.len() - 1], off, pos, i + 1)); + } + pos = i + 1; + } + + if text[pos..].trim().eq_ignore_ascii_case(":END:") { + Some((&name[0..name.len() - 1], off, pos, text.len())) + } else { + None + } +} + +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + assert_eq!( + parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"), + Some(( + "PROPERTIES", + ":PROPERTIES:\n".len(), + ":PROPERTIES:\n :CUSTOM_ID: id\n".len(), + ":PROPERTIES:\n :CUSTOM_ID: id\n :END:".len() + )) + ) + } +} diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index 6c0edfd..cb165f0 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -1,40 +1,41 @@ -use crate::lines::Lines; -use memchr::{memchr, memchr2}; +use memchr::{memchr, memchr_iter}; -/// return (name, parameters, contents-begin, contents-end, end) +// return (name, parameters, offset, limit, end) #[inline] -pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { - debug_assert!(src.starts_with("#+")); +pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { + debug_assert!(text.starts_with("#+")); - if src.len() <= 9 || !src[2..9].eq_ignore_ascii_case("BEGIN: ") { + if text.len() <= 9 || !text[2..9].eq_ignore_ascii_case("BEGIN: ") { return None; } - let mut lines = Lines::new(src); - let (mut pre_limit, _, _) = lines.next()?; + let bytes = text.as_bytes(); + let mut lines = memchr_iter(b'\n', bytes); - for (limit, end, line) in lines { - if line.trim().eq_ignore_ascii_case("#+END:") { - let bytes = src.as_bytes(); + let (name, para, off) = lines + .next() + .map(|i| { + memchr(b' ', &bytes[9..i]) + .map(|x| (&text[9..9 + x], Some(text[9 + x..i].trim()), i + 1)) + .unwrap_or((&text[9..i], None, i + 1)) + }) + .filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?; - let i = memchr2(b' ', b'\n', &bytes[9..]) - .map(|i| i + 9) - .filter(|&i| bytes[9..i].iter().all(|&c| c.is_ascii_alphabetic()))?; - let name = &src[8..i].trim(); + let mut pos = off; - return Some(if bytes[i] == b'\n' { - (name, None, i, pre_limit, end) - } else { - let begin = memchr(b'\n', bytes) - .map(|i| i + 1) - .unwrap_or_else(|| src.len()); - (name, Some(&src[i..begin].trim()), begin, pre_limit, end) - }); + for i in lines { + if text[pos..i].trim().eq_ignore_ascii_case("#+END:") { + return Some((name, para, off, pos, i + 1)); } - pre_limit = limit; + + pos = i + 1; } - None + if text[pos..].trim().eq_ignore_ascii_case("#+END:") { + Some((name, para, off, pos, text.len())) + } else { + None + } } #[cfg(test)] @@ -45,13 +46,14 @@ mod tests { // TODO: testing assert_eq!( - parse( - r"#+BEGIN: clocktable :scope file -CONTENTS -#+END: -" - ), - Some(("clocktable", Some(":scope file"), 32, 40, 48)) + parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"), + Some(( + "clocktable", + Some(":scope file"), + "#+BEGIN: clocktable :scope file\n".len(), + "#+BEGIN: clocktable :scope file\nCONTENTS\n".len(), + "#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n".len(), + )) ); } } diff --git a/src/elements/list.rs b/src/elements/list.rs index ad3b5fa..435b5b1 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -1,4 +1,5 @@ use crate::lines::Lines; +use memchr::memchr; #[inline] pub fn is_item(text: &str) -> Option { @@ -33,7 +34,7 @@ pub fn is_item(text: &str) -> Option { } } -// returns (bullets, contents begin, contents end, end, has more) +// return (bullets, offset, limit, end, has more) #[inline] pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) { debug_assert!( @@ -50,7 +51,7 @@ pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) { let mut lines = Lines::new(src); let (mut pre_limit, mut pre_end, first_line) = lines.next().unwrap(); - let begin = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) { + let begin = match memchr(b' ', &first_line.as_bytes()[ident..]) { Some(i) => i + ident + 1, None => { let len = first_line.len(); diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 6824e22..953d0d7 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -1,5 +1,6 @@ pub(crate) mod block; pub(crate) mod clock; +pub(crate) mod drawer; pub(crate) mod dyn_block; pub(crate) mod fn_def; pub(crate) mod keyword; diff --git a/src/export/html.rs b/src/export/html.rs index c81a393..b85312d 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -28,6 +28,12 @@ pub trait HtmlHandler> { fn section_end(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "")?) } + fn drawer_beg(&mut self, w: &mut W, name: &str) -> Result<(), E> { + Ok(()) + } + fn drawer_end(&mut self, w: &mut W) -> Result<(), E> { + Ok(()) + } fn paragraph_beg(&mut self, w: &mut W) -> Result<(), E> { Ok(write!(w, "

")?) } diff --git a/src/export/mod.rs b/src/export/mod.rs index a235d44..e5992eb 100644 --- a/src/export/mod.rs +++ b/src/export/mod.rs @@ -10,6 +10,8 @@ macro_rules! handle_event { SectionEnd => $handler.section_end($writer)?, ParagraphBeg => $handler.paragraph_beg($writer)?, ParagraphEnd => $handler.paragraph_end($writer)?, + DrawerBeg(n) => $handler.drawer_beg($writer, n)?, + DrawerEnd => $handler.drawer_end($writer)?, CtrBlockBeg => $handler.ctr_block_beg($writer)?, CtrBlockEnd => $handler.ctr_block_end($writer)?, QteBlockBeg => $handler.qte_block_beg($writer)?, diff --git a/src/parser.rs b/src/parser.rs index 8e97095..670e73b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -9,6 +9,7 @@ use memchr::memchr_iter; enum Container { Headline(usize), Section(usize), + Drawer, Paragraph, CtrBlock, QteBlock, @@ -92,6 +93,9 @@ pub enum Event<'a> { Planning(Planning<'a>), + DrawerBeg(&'a str), + DrawerEnd, + TableStart, TableEnd, TableCell, @@ -243,16 +247,16 @@ impl<'a> Parser<'a> { .unwrap_or_else(|| { let mut pos = 0; for off in memchr_iter(b'\n', tail.as_bytes()) { - if tail.as_bytes()[pos + 1..off] + if tail.as_bytes()[pos..off] .iter() .all(u8::is_ascii_whitespace) { return (Event::ParagraphBeg, 0, pos + start, off + start); - } else if let Some(buf) = self.real_next_ele(&tail[pos + 1..]) { + } else if let Some(buf) = self.real_next_ele(&tail[pos..]) { self.ele_buf = Some(buf); return (Event::ParagraphBeg, 0, pos + start, pos + start); } - pos = off; + pos = off + 1; } let len = text.len(); ( @@ -263,9 +267,17 @@ impl<'a> Parser<'a> { ) }); - debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len()); + debug_assert!( + (limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()), + "{} <= {} <= {} <= {}", + off, + limit, + end, + text.len() + ); match ele { + Event::DrawerBeg(_) => self.push_stack(Container::Drawer, limit, end), Event::ParagraphBeg => self.push_stack(Container::Paragraph, limit, end), Event::QteBlockBeg => self.push_stack(Container::QteBlock, limit, end), Event::CtrBlockBeg => self.push_stack(Container::CtrBlock, limit, end), @@ -309,7 +321,18 @@ impl<'a> Parser<'a> { if tail.starts_with("-----") { let off = rule::parse(tail); if off != 0 { - return Some((Event::Rule, off, 0, 0)); + return Some((Event::Rule, off + line_begin, 0, 0)); + } + } + + if tail.starts_with(':') { + if let Some((name, off, limit, end)) = drawer::parse(tail) { + return Some(( + Event::DrawerBeg(name), + off + line_begin, + limit + line_begin, + end + line_begin, + )); } } @@ -344,27 +367,47 @@ impl<'a> Parser<'a> { .map(|(name, args, begin, limit, end)| { let cont = &tail[begin..limit]; match &*name.to_uppercase() { - "COMMENT" => (Event::CommentBlock { args, cont }, end, 0, 0), - "EXAMPLE" => (Event::ExampleBlock { args, cont }, end, 0, 0), - "EXPORT" => (Event::ExportBlock { args, cont }, end, 0, 0), - "SRC" => (Event::SrcBlock { args, cont }, end, 0, 0), - "VERSE" => (Event::VerseBlock { args, cont }, end, 0, 0), - "CENTER" => (Event::CtrBlockBeg, begin, limit, end), - "QUOTE" => (Event::QteBlockBeg, begin, limit, end), - _ => (Event::SplBlockBeg { name, args }, begin, limit, end), + "COMMENT" => (Event::CommentBlock { args, cont }, end + line_begin, 0, 0), + "EXAMPLE" => (Event::ExampleBlock { args, cont }, end + line_begin, 0, 0), + "EXPORT" => (Event::ExportBlock { args, cont }, end + line_begin, 0, 0), + "SRC" => (Event::SrcBlock { args, cont }, end + line_begin, 0, 0), + "VERSE" => (Event::VerseBlock { args, cont }, end + line_begin, 0, 0), + "CENTER" => ( + Event::CtrBlockBeg, + begin + line_begin, + limit + line_begin, + end + line_begin, + ), + "QUOTE" => ( + Event::QteBlockBeg, + begin + line_begin, + limit + line_begin, + end + line_begin, + ), + _ => ( + Event::SplBlockBeg { name, args }, + begin + line_begin, + limit + line_begin, + end + line_begin, + ), } }) .or_else(|| { dyn_block::parse(tail).map(|(name, args, begin, limit, end)| { - (Event::DynBlockBeg { name, args }, begin, limit, end) + ( + Event::DynBlockBeg { name, args }, + begin + line_begin, + limit + line_begin, + end + line_begin, + ) }) }) .or_else(|| { keyword::parse(tail).map(|(key, value, off)| { if let Key::Call = key { - (Event::Call { value }, off, 0, 0) + (Event::Call { value }, off + line_begin, 0, 0) } else { - (Event::Keyword { key, value }, off, 0, 0) + (Event::Keyword { key, value }, off + line_begin, 0, 0) } }) }) @@ -510,6 +553,7 @@ impl<'a> Parser<'a> { let (container, _, _) = self.stack.pop().unwrap(); match container { Container::Bold => Event::BoldEnd, + Container::Drawer => Event::DrawerEnd, Container::CtrBlock => Event::CtrBlockEnd, Container::DynBlock => Event::DynBlockEnd, Container::Headline(_) => Event::HeadlineEnd, @@ -553,7 +597,8 @@ impl<'a> Iterator for Parser<'a> { self.next_headline(tail) } } - Container::DynBlock + Container::Drawer + | Container::DynBlock | Container::CtrBlock | Container::QteBlock | Container::SplBlock