diff --git a/src/elements/drawer.rs b/src/elements/drawer.rs index 3b075a3..367d7a5 100644 --- a/src/elements/drawer.rs +++ b/src/elements/drawer.rs @@ -1,7 +1,7 @@ use memchr::memchr_iter; // return (name, offset, limit, end) -pub(crate) fn parse<'a>(text: &'a str) -> Option<(&'a str, usize, usize, usize)> { +pub(crate) fn parse(text: &str) -> Option<(&str, usize, usize, usize)> { debug_assert!(text.starts_with(':')); let mut lines = memchr_iter(b'\n', text.as_bytes()); diff --git a/src/elements/list.rs b/src/elements/list.rs index 435b5b1..2de5a0c 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -1,8 +1,7 @@ -use crate::lines::Lines; -use memchr::memchr; +use memchr::memchr_iter; #[inline] -pub fn is_item(text: &str) -> Option { +pub fn is_item(text: &str) -> Option<(bool, &str)> { if text.is_empty() { return None; } @@ -11,7 +10,7 @@ pub fn is_item(text: &str) -> Option { match bytes[0] { b'*' | b'-' | b'+' => { if text.len() > 1 && (bytes[1] == b' ' || bytes[1] == b'\n') { - Some(false) + Some((false, &text[0..2])) } else { None } @@ -25,7 +24,7 @@ pub fn is_item(text: &str) -> Option { && i + 1 < text.len() && (bytes[i + 1] == b' ' || bytes[i + 1] == b'\n') { - Some(true) + Some((true, &text[0..i + 2])) } else { None } @@ -34,84 +33,72 @@ pub fn is_item(text: &str) -> Option { } } -// return (bullets, offset, limit, end, has more) +// check if list item ends at this line #[inline] -pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) { - debug_assert!( - is_item(&src[ident..]).is_some(), - "{:?} is not a list item", - src - ); - debug_assert!( - src[..ident].chars().all(|c| c == ' ' || c == '\t'), - "{:?} doesn't starts with indentation {}", - src, - ident - ); +fn is_item_ends(line: &str, ident: usize) -> Option<&str> { + debug_assert!(!line.is_empty()); - let mut lines = Lines::new(src); - let (mut pre_limit, mut pre_end, first_line) = lines.next().unwrap(); - let begin = match memchr(b' ', &first_line.as_bytes()[ident..]) { - Some(i) => i + ident + 1, - None => { - let len = first_line.len(); - return ( - first_line, - len, - len, - len, - is_item(lines.next().unwrap().2).is_some(), - ); - } - }; - let bullet = &src[0..begin]; - - while let Some((mut limit, mut end, mut line)) = lines.next() { - // this line is emtpy - if line.is_empty() { - if let Some((next_limit, next_end, next_line)) = lines.next() { - // next line is emtpy, too - if next_line.is_empty() { - return (bullet, begin, pre_limit, next_end, false); - } else { - // move to next line - pre_end = end; - limit = next_limit; - end = next_end; - line = next_line; - } - } else { - return (bullet, begin, pre_limit, end, false); - } - } - - let line_ident = count_ident(line); - - if line_ident < ident { - return (bullet, begin, pre_limit, pre_end, false); - } else if line_ident == ident { - return ( - bullet, - begin, - pre_limit, - pre_end, - is_item(&line[ident..]).is_some(), - ); - } - - pre_end = end; - pre_limit = limit; - } - - (bullet, begin, src.len(), src.len(), false) -} - -#[inline] -fn count_ident(src: &str) -> usize { - src.as_bytes() + let line_ident = line + .as_bytes() .iter() .position(|&c| c != b' ' && c != b'\t') - .unwrap_or(0) + .unwrap_or(0); + + debug_assert!(line_ident >= ident, "{} >= {}", line_ident, ident); + + if line_ident == ident { + is_item(&line[ident..]).map(|(_, bullet)| bullet) + } else { + None + } +} + +// return (limit, end, next item bullet) +#[inline] +pub fn parse(text: &str, ident: usize) -> (usize, usize, Option<&str>) { + let bytes = text.as_bytes(); + let mut lines = memchr_iter(b'\n', bytes); + let mut pos = if let Some(i) = lines.next() { + i + 1 + } else { + return (text.len(), text.len(), None); + }; + + while let Some(i) = lines.next() { + return if bytes[pos..i].iter().all(u8::is_ascii_whitespace) { + if let Some(nexti) = lines.next() { + if bytes[i + 1..nexti].iter().all(u8::is_ascii_whitespace) { + // two consecutive empty lines + (pos - 1, nexti + 1, None) + } else if let Some(next) = is_item_ends(&text[i + 1..nexti], ident) { + (pos - 1, i + 1, Some(next)) + } else { + pos = nexti + 1; + continue; + } + } else if bytes[i + 1..].iter().all(u8::is_ascii_whitespace) { + // two consecutive empty lines + (pos - 1, text.len(), None) + } else if let Some(next) = is_item_ends(&text[i + 1..], ident) { + (pos - 1, i + 1, Some(next)) + } else { + (text.len(), text.len(), None) + } + } else if let Some(next) = is_item_ends(&text[pos..i], ident) { + (pos - 1, pos, Some(next)) + } else { + pos = i + 1; + continue; + }; + } + + if bytes[pos..].iter().all(u8::is_ascii_whitespace) { + (pos - 1, text.len(), None) + } else if let Some(next) = is_item_ends(&text[pos..], ident) { + (pos - 1, pos, Some(next)) + } else { + (text.len(), text.len(), None) + } } #[cfg(test)] @@ -120,14 +107,14 @@ mod tests { fn is_item() { use super::is_item; - assert_eq!(is_item("+ item"), Some(false)); - assert_eq!(is_item("- item"), Some(false)); - assert_eq!(is_item("10. item"), Some(true)); - assert_eq!(is_item("10) item"), Some(true)); - assert_eq!(is_item("1. item"), Some(true)); - assert_eq!(is_item("1) item"), Some(true)); - assert_eq!(is_item("10. "), Some(true)); - assert_eq!(is_item("10.\n"), Some(true)); + assert_eq!(is_item("+ item"), Some((false, "+ "))); + assert_eq!(is_item("- item"), Some((false, "- "))); + assert_eq!(is_item("10. item"), Some((true, "10. "))); + assert_eq!(is_item("10) item"), Some((true, "10) "))); + assert_eq!(is_item("1. item"), Some((true, "1. "))); + assert_eq!(is_item("1) item"), Some((true, "1) "))); + assert_eq!(is_item("10. "), Some((true, "10. "))); + assert_eq!(is_item("10.\n"), Some((true, "10.\n"))); assert_eq!(is_item("10."), None); assert_eq!(is_item("+"), None); assert_eq!(is_item("-item"), None); @@ -138,30 +125,49 @@ mod tests { fn parse() { use super::parse; - assert_eq!(parse("+ item1\n+ item2\n+ item3", 0), ("+ ", 2, 7, 8, true)); assert_eq!( - parse("* item1\n\n* item2\n* item3", 0), - ("* ", 2, 7, 9, true) + parse("item1\n+ item2", 0), + ("item1".len(), "item1\n".len(), Some("+ ")) ); assert_eq!( - parse("- item1\n\n\n- item2\n- item3", 0), - ("- ", 2, 7, 10, false) + parse("item1\n \n* item2", 0), + ("item1".len(), "item1\n \n".len(), Some("* ")) ); assert_eq!( - parse("1. item1\n\n\n\n2. item2\n3. item3", 0), - ("1. ", 3, 8, 11, false) + parse("item1\n \n \n* item2", 0), + ("item1".len(), "item1\n \n \n".len(), None) ); assert_eq!( - parse(" + item1\n + item2\n+ item3", 2), - (" + ", 4, 21, 22, false) + parse("item1\n \n ", 0), + ("item1".len(), "item1\n \n ".len(), None) ); assert_eq!( - parse(" + item1\n + item2\n + item3", 2), - (" + ", 4, 9, 10, true) + parse("item1\n + item2\n ", 0), + ( + "item1\n + item2".len(), + "item1\n + item2\n ".len(), + None + ) + ); + assert_eq!( + parse("item1\n \n + item2\n \n+ item 3", 0), + ( + "item1\n \n + item2".len(), + "item1\n \n + item2\n \n".len(), + Some("+ ") + ) + ); + assert_eq!( + parse("item1\n \n + item2", 2), + ("item1".len(), "item1\n \n".len(), Some("+ ")) + ); + assert_eq!( + parse("1\n\n - 2\n\n - 3\n\n+ 4", 0), + ( + "1\n\n - 2\n\n - 3".len(), + "1\n\n - 2\n\n - 3\n\n".len(), + Some("+ ") + ) ); - assert_eq!(parse("+\n", 0), ("+", 1, 1, 1, false)); - assert_eq!(parse("+\n+ item2\n+ item3", 0), ("+", 1, 1, 1, true)); - assert_eq!(parse("1) item1", 0), ("1) ", 3, 8, 8, false)); - assert_eq!(parse("1) item1\n", 0), ("1) ", 3, 8, 9, false)); } } diff --git a/src/lib.rs b/src/lib.rs index 4ecf830..ae6c32d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -93,7 +93,6 @@ pub mod elements; pub mod export; pub mod headline; -mod lines; pub mod objects; mod parser; pub mod tools; diff --git a/src/lines.rs b/src/lines.rs deleted file mode 100644 index 836f6e5..0000000 --- a/src/lines.rs +++ /dev/null @@ -1,52 +0,0 @@ -use memchr::{memchr_iter, Memchr}; -use std::iter::{once, Chain, Once}; - -pub struct Lines<'a> { - src: &'a str, - iter: Chain, Once>, - start: usize, -} - -impl<'a> Lines<'a> { - pub fn new(src: &'a str) -> Lines<'a> { - Lines { - src, - iter: memchr_iter(b'\n', &src.as_bytes()).chain(once(src.len())), - start: 0, - } - } -} - -impl<'a> Iterator for Lines<'a> { - type Item = (usize, usize, &'a str); - - #[inline] - fn next(&mut self) -> Option<(usize, usize, &'a str)> { - self.iter.next().map(|i| { - let (line, limit) = if i != self.src.len() && self.src.as_bytes()[i - 1] == b'\r' { - (&self.src[self.start..i - 1], i - 1) - } else { - (&self.src[self.start..i], i) - }; - self.start = if i != self.src.len() { i + 1 } else { i }; - (limit, self.start, line) - }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } -} - -#[test] -fn lines() { - let mut lines = Lines::new("foo\r\nbar\n\nbaz\n"); - - assert_eq!(Some((3, 5, "foo")), lines.next()); - assert_eq!(Some((8, 9, "bar")), lines.next()); - assert_eq!(Some((9, 10, "")), lines.next()); - assert_eq!(Some((13, 14, "baz")), lines.next()); - assert_eq!(Some((14, 14, "")), lines.next()); - assert_eq!(None, lines.next()); -} diff --git a/src/parser.rs b/src/parser.rs index 33331ec..1775197 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -164,11 +164,11 @@ pub enum Event<'a> { pub struct Parser<'a> { text: &'a str, stack: Vec<(Container, usize, usize)>, + next_item: Vec>, off: usize, ele_buf: Option<(Event<'a>, usize, usize, usize)>, obj_buf: Option<(Event<'a>, usize, usize, usize)>, keywords: &'a [&'a str], - list_more_item: bool, } impl<'a> Parser<'a> { @@ -177,10 +177,10 @@ impl<'a> Parser<'a> { Parser { text, stack: Vec::new(), + next_item: Vec::new(), off: 0, ele_buf: None, obj_buf: None, - list_more_item: false, keywords: DEFAULT_KEYWORDS, } } @@ -199,6 +199,15 @@ impl<'a> Parser<'a> { self.keywords = keywords; } + pub fn set_text(&mut self, text: &'a str) { + self.off = 0; + self.stack.clear(); + self.next_item.clear(); + self.ele_buf = None; + self.obj_buf = None; + self.text = text; + } + fn next_section_or_headline(&mut self, text: &'a str) -> Event<'a> { let end = Headline::find_level(text, std::usize::MAX); if end != 0 { @@ -246,17 +255,14 @@ impl<'a> Parser<'a> { .or_else(|| self.real_next_ele(tail)) .unwrap_or_else(|| { let mut pos = 0; - for off in memchr_iter(b'\n', tail.as_bytes()) { - if tail.as_bytes()[pos..off] - .iter() - .all(u8::is_ascii_whitespace) - { - return (Event::ParagraphBeg, 0, pos + start, off + start); + for i in memchr_iter(b'\n', tail.as_bytes()) { + if tail.as_bytes()[pos..i].iter().all(u8::is_ascii_whitespace) { + return (Event::ParagraphBeg, 0, pos - 1 + start, i + 1 + start); } else if let Some(buf) = self.real_next_ele(&tail[pos..]) { self.ele_buf = Some(buf); - return (Event::ParagraphBeg, 0, pos + start, pos + start); + return (Event::ParagraphBeg, 0, pos - 1 + start, pos + start); } - pos = off + 1; + pos = i + 1; } let len = text.len(); ( @@ -284,8 +290,7 @@ impl<'a> Parser<'a> { Event::SplBlockBeg { .. } => self.push_stack(Container::SplBlock, limit, end), Event::DynBlockBeg { .. } => self.push_stack(Container::DynBlock, limit, end), Event::ListBeg { ordered, .. } => { - self.push_stack(Container::List(limit, ordered), end, end); - self.list_more_item = true; + self.push_stack(Container::List(limit, ordered), end, end) } _ => (), } @@ -296,7 +301,7 @@ impl<'a> Parser<'a> { } // returns (event, offset, container limit, container end) - fn real_next_ele(&self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { + fn real_next_ele(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { debug_assert!(!text.starts_with('\n')); if text.starts_with("[fn:") { @@ -310,7 +315,8 @@ impl<'a> Parser<'a> { .map(|off| (&text[off..], off)) .unwrap_or((text, 0)); - if let Some(ordered) = list::is_item(tail) { + if let Some((ordered, bullet)) = list::is_item(tail) { + self.next_item.push(Some(bullet)); return Some((Event::ListBeg { ordered }, 0, line_begin, text.len())); } @@ -438,7 +444,14 @@ impl<'a> Parser<'a> { (Event::Text(text), text.len(), 0, 0) }); - debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len()); + debug_assert!( + off <= text.len() && limit <= text.len() && end <= text.len(), + "{} <= {} <= {} <= {}", + off, + limit, + end, + text.len() + ); self.off += off; @@ -534,14 +547,6 @@ impl<'a> Parser<'a> { } } - fn next_list_item(&mut self, ident: usize, text: &'a str) -> Event<'a> { - let (bullet, off, limit, end, has_more) = list::parse(text, ident); - self.push_stack(Container::ListItem, limit, end); - self.off += off; - self.list_more_item = has_more; - Event::ListItemBeg { bullet } - } - #[inline] fn push_stack(&mut self, container: Container, limit: usize, end: usize) { self.stack @@ -575,6 +580,14 @@ impl<'a> Iterator for Parser<'a> { fn next(&mut self) -> Option> { if let Some(&(container, limit, end)) = self.stack.last() { + // eprintln!( + // "{:width$} {:?} {:?}", + // ' ', + // container, + // &self.text[self.off..limit], + // width = self.stack_depth(), + // ); + debug_assert!( self.off <= limit && limit <= end && end <= self.text.len(), "{} <= {} <= {} <= {}", @@ -583,51 +596,76 @@ impl<'a> Iterator for Parser<'a> { end, self.text.len() ); - Some(if self.off >= limit { - self.off = end; - self.end() - } else { - let tail = &self.text[self.off..limit]; - match container { - Container::Headline(beg) => { - debug_assert!(self.off >= beg); - if self.off == beg { - self.next_section_or_headline(tail) - } else { - self.next_headline(tail) - } + + let tail = &self.text[self.off..limit]; + + Some(match container { + Container::Headline(beg) => { + debug_assert!(self.off >= beg); + if self.off >= limit { + self.off = end; + self.stack.pop(); + Event::HeadlineEnd + } else if self.off == beg { + self.next_section_or_headline(tail) + } else { + self.next_headline(tail) } - Container::Drawer - | Container::DynBlock - | Container::CtrBlock - | Container::QteBlock - | Container::SplBlock - | Container::ListItem => self.next_ele(tail), - Container::Section(beg) => { - // planning should be the first line of section - if self.off == beg { - if let Some((planning, off)) = Planning::parse(tail) { - self.off += off; - Event::Planning(planning) - } else { - self.next_ele(tail) - } + } + Container::Drawer + | Container::DynBlock + | Container::CtrBlock + | Container::QteBlock + | Container::SplBlock + | Container::ListItem => { + if self.off >= limit { + self.off = end; + self.end() + } else { + self.next_ele(tail) + } + } + Container::Section(beg) => { + // planning should be the first line of section + if self.off >= limit { + self.off = end; + self.stack.pop(); + Event::SectionEnd + } else if self.off == beg { + if let Some((planning, off)) = Planning::parse(tail) { + self.off += off; + Event::Planning(planning) } else { self.next_ele(tail) } + } else { + self.next_ele(tail) } - Container::List(ident, _) => { - if self.list_more_item { - self.next_list_item(ident, tail) - } else { - self.end() - } + } + Container::List(ident, ordered) => { + if let Some(bullet) = self.next_item.pop().unwrap() { + self.off += bullet.len() + ident; + let (limit, end, next) = list::parse(&self.text[self.off..limit], ident); + self.push_stack(Container::ListItem, limit, end); + self.next_item.push(next); + Event::ListItemBeg { bullet } + } else { + self.off = end; + self.stack.pop(); + Event::ListEnd { ordered } + } + } + Container::Paragraph + | Container::Bold + | Container::Underline + | Container::Italic + | Container::Strike => { + if self.off >= limit { + self.off = end; + self.end() + } else { + self.next_obj(tail) } - Container::Paragraph - | Container::Bold - | Container::Underline - | Container::Italic - | Container::Strike => self.next_obj(tail), } }) } else if self.off < self.text.len() {