diff --git a/src/elements/block.rs b/src/elements/block.rs index 4130c87..30b1306 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -13,19 +13,19 @@ pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { let name = memchr2(b' ', b'\n', src.as_bytes()) .filter(|&i| src.as_bytes()[8..i].iter().all(u8::is_ascii_alphabetic))?; let mut lines = Lines::new(src); - let (pre_cont_end, cont_beg, _) = lines.next()?; - let args = if pre_cont_end == name { + let (pre_limit, begin, _) = lines.next()?; + let args = if pre_limit == name { None } else { - Some(&src[name..pre_cont_end]) + Some(&src[name..pre_limit]) }; let name = &src[8..name]; let end_line = format!(r"#+END_{}", name.to_uppercase()); - let mut pre_end = cont_beg; + let mut pre_end = begin; for (_, end, line) in lines { if line.trim() == end_line { - return Some((name, args, cont_beg, pre_end, end)); + return Some((name, args, begin, pre_end, end)); } else { pre_end = end; } diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index 8dcad41..6c0edfd 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -11,9 +11,9 @@ pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { } let mut lines = Lines::new(src); - let (mut pre_cont_end, _, _) = lines.next()?; + let (mut pre_limit, _, _) = lines.next()?; - for (cont_end, end, line) in lines { + for (limit, end, line) in lines { if line.trim().eq_ignore_ascii_case("#+END:") { let bytes = src.as_bytes(); @@ -23,21 +23,15 @@ pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { let name = &src[8..i].trim(); return Some(if bytes[i] == b'\n' { - (name, None, i, pre_cont_end, end) + (name, None, i, pre_limit, end) } else { - let cont_beg = memchr(b'\n', bytes) + let begin = memchr(b'\n', bytes) .map(|i| i + 1) .unwrap_or_else(|| src.len()); - ( - name, - Some(&src[i..cont_beg].trim()), - cont_beg, - pre_cont_end, - end, - ) + (name, Some(&src[i..begin].trim()), begin, pre_limit, end) }); } - pre_cont_end = cont_end; + pre_limit = limit; } None diff --git a/src/elements/list.rs b/src/elements/list.rs index 2fd5b80..ad3b5fa 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -1,39 +1,46 @@ use crate::lines::Lines; #[inline] -pub fn is_item(src: &str) -> (bool, bool) { - if src.is_empty() { - return (false, false); +pub fn is_item(text: &str) -> Option { + if text.is_empty() { + return None; } - let bytes = src.as_bytes(); - let (i, ordered) = match bytes[0] { - b'*' | b'-' | b'+' => (1, false), + + let bytes = text.as_bytes(); + match bytes[0] { + b'*' | b'-' | b'+' => { + if text.len() > 1 && (bytes[1] == b' ' || bytes[1] == b'\n') { + Some(false) + } else { + None + } + } b'0'...b'9' => { let i = bytes .iter() .position(|&c| !c.is_ascii_digit()) - .unwrap_or_else(|| src.len() - 1); - let c = bytes[i]; - if !(c == b'.' || c == b')') { - return (false, false); + .unwrap_or_else(|| text.len() - 1); + if (bytes[i] == b'.' || bytes[i] == b')') + && i + 1 < text.len() + && (bytes[i + 1] == b' ' || bytes[i + 1] == b'\n') + { + Some(true) + } else { + None } - (i + 1, true) } - _ => return (false, false), - }; - - if i < src.len() { - // bullet is follwed by a space or line ending - (bytes[i] == b' ' || bytes[i] == b'\n', ordered) - } else { - (false, false) + _ => None, } } // returns (bullets, contents begin, contents end, end, has more) #[inline] pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) { - debug_assert!(is_item(&src[ident..]).0); + debug_assert!( + is_item(&src[ident..]).is_some(), + "{:?} is not a list item", + src + ); debug_assert!( src[..ident].chars().all(|c| c == ' ' || c == '\t'), "{:?} doesn't starts with indentation {}", @@ -42,60 +49,60 @@ pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) { ); let mut lines = Lines::new(src); - let (mut pre_cont_end, mut pre_end, first_line) = lines.next().unwrap(); - let beg = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) { + let (mut pre_limit, mut pre_end, first_line) = lines.next().unwrap(); + let begin = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) { Some(i) => i + ident + 1, None => { let len = first_line.len(); return ( - &first_line, + first_line, len, len, len, - is_item(lines.next().unwrap().2).0, + is_item(lines.next().unwrap().2).is_some(), ); } }; - let bullet = &src[0..beg]; + let bullet = &src[0..begin]; - while let Some((mut cont_end, mut end, mut line)) = lines.next() { + while let Some((mut limit, mut end, mut line)) = lines.next() { // this line is emtpy if line.is_empty() { - if let Some((next_cont_end, next_end, next_line)) = lines.next() { + if let Some((next_limit, next_end, next_line)) = lines.next() { // next line is emtpy, too if next_line.is_empty() { - return (bullet, beg, pre_cont_end, next_end, false); + return (bullet, begin, pre_limit, next_end, false); } else { // move to next line pre_end = end; - cont_end = next_cont_end; + limit = next_limit; end = next_end; line = next_line; } } else { - return (bullet, beg, pre_cont_end, end, false); + return (bullet, begin, pre_limit, end, false); } } let line_ident = count_ident(line); if line_ident < ident { - return (bullet, beg, pre_cont_end, pre_end, false); + return (bullet, begin, pre_limit, pre_end, false); } else if line_ident == ident { return ( bullet, - beg, - pre_cont_end, + begin, + pre_limit, pre_end, - is_item(&line[ident..]).0, + is_item(&line[ident..]).is_some(), ); } pre_end = end; - pre_cont_end = cont_end; + pre_limit = limit; } - (bullet, beg, src.len(), src.len(), false) + (bullet, begin, src.len(), src.len(), false) } #[inline] @@ -112,18 +119,18 @@ mod tests { fn is_item() { use super::is_item; - assert_eq!(is_item("+ item"), (true, false)); - assert_eq!(is_item("- item"), (true, false)); - assert_eq!(is_item("10. item"), (true, true)); - assert_eq!(is_item("10) item"), (true, true)); - assert_eq!(is_item("1. item"), (true, true)); - assert_eq!(is_item("1) item"), (true, true)); - assert_eq!(is_item("10. "), (true, true)); - assert_eq!(is_item("10.\n"), (true, true)); - assert_eq!(is_item("10."), (false, false)); - assert_eq!(is_item("+"), (false, false)); - assert_eq!(is_item("-item"), (false, false)); - assert_eq!(is_item("+item"), (false, false)); + assert_eq!(is_item("+ item"), Some(false)); + assert_eq!(is_item("- item"), Some(false)); + assert_eq!(is_item("10. item"), Some(true)); + assert_eq!(is_item("10) item"), Some(true)); + assert_eq!(is_item("1. item"), Some(true)); + assert_eq!(is_item("1) item"), Some(true)); + assert_eq!(is_item("10. "), Some(true)); + assert_eq!(is_item("10.\n"), Some(true)); + assert_eq!(is_item("10."), None); + assert_eq!(is_item("+"), None); + assert_eq!(is_item("-item"), None); + assert_eq!(is_item("+item"), None); } #[test] diff --git a/src/lines.rs b/src/lines.rs index 3c0707e..836f6e5 100644 --- a/src/lines.rs +++ b/src/lines.rs @@ -23,13 +23,13 @@ impl<'a> Iterator for Lines<'a> { #[inline] fn next(&mut self) -> Option<(usize, usize, &'a str)> { self.iter.next().map(|i| { - let (line, cont_end) = if i != self.src.len() && self.src.as_bytes()[i - 1] == b'\r' { + let (line, limit) = if i != self.src.len() && self.src.as_bytes()[i - 1] == b'\r' { (&self.src[self.start..i - 1], i - 1) } else { (&self.src[self.start..i], i) }; self.start = if i != self.src.len() { i + 1 } else { i }; - (cont_end, self.start, line) + (limit, self.start, line) }) } diff --git a/src/objects/timestamp.rs b/src/objects/timestamp.rs index 567d260..1ff8eb0 100644 --- a/src/objects/timestamp.rs +++ b/src/objects/timestamp.rs @@ -151,7 +151,10 @@ pub fn parse_inactive(text: &str) -> Option<(Timestamp<'_>, usize)> { } fn parse_datetime(bytes: &[u8]) -> Option<(Datetime, Option)> { - if !bytes[0].is_ascii_digit() || !bytes[bytes.len() - 1].is_ascii_alphanumeric() { + if bytes.is_empty() + || !bytes[0].is_ascii_digit() + || !bytes[bytes.len() - 1].is_ascii_alphanumeric() + { return None; } diff --git a/src/parser.rs b/src/parser.rs index 4bfab96..aeed002 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -213,35 +213,56 @@ impl<'a> Parser<'a> { } fn next_ele(&mut self, text: &'a str) -> Event<'a> { + fn skip_empty_lines(text: &str) -> usize { + let mut i = 0; + for pos in memchr_iter(b'\n', text.as_bytes()) { + if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) { + i = pos + 1; + } else { + return i; + } + } + if text.as_bytes()[i..].iter().all(u8::is_ascii_whitespace) { + text.len() + } else { + i + } + } + + let start = skip_empty_lines(text); + if start == text.len() { + self.off += text.len(); + return self.end(); + }; + let tail = &text[start..]; + let (ele, off, limit, end) = self .ele_buf .take() - .or_else(|| self.real_next_ele(text)) + .or_else(|| self.real_next_ele(tail)) .unwrap_or_else(|| { - let len = text.len(); - let start = text.find(|c| c != '\n').unwrap_or(0); - if start == len - 1 { - (self.end(), len, 0, 0) - } else { - let mut pos = start; - for off in memchr_iter(b'\n', &text.as_bytes()[start..]) { - if text[pos..off + start].trim().is_empty() { - return (Event::ParagraphBeg, start, pos, off + start); - } else { - pos = off + start; - if let Some(buf) = self.real_next_ele(&text[pos + 1..]) { - self.ele_buf = Some(buf); - return (Event::ParagraphBeg, start, pos, pos); - } + let mut pos = 0; + for off in memchr_iter(b'\n', tail.as_bytes()) { + if tail.as_bytes()[pos + 1..off] + .iter() + .all(u8::is_ascii_whitespace) + { + return (Event::ParagraphBeg, 0, pos + start, off + start); + } else { + if let Some(buf) = self.real_next_ele(&tail[pos + 1..]) { + self.ele_buf = Some(buf); + return (Event::ParagraphBeg, 0, pos + start, pos + start); } } - ( - Event::ParagraphBeg, - start, - if text.ends_with('\n') { len - 1 } else { len }, - len, - ) + pos = off; } + let len = text.len(); + ( + Event::ParagraphBeg, + 0, + if text.ends_with('\n') { len - 1 } else { len }, + len, + ) }); debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len()); @@ -259,13 +280,15 @@ impl<'a> Parser<'a> { _ => (), } - self.off += off; + self.off += off + start; ele } // returns (event, offset, container limit, container end) - fn real_next_ele(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { + fn real_next_ele(&self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> { + debug_assert!(!text.starts_with('\n')); + if text.starts_with("[fn:") { if let Some((label, cont, off)) = fn_def::parse(text) { return Some((Event::FnDef { label, cont }, off + 1, 0, 0)); @@ -277,8 +300,7 @@ impl<'a> Parser<'a> { .map(|off| (&text[off..], off)) .unwrap_or((text, 0)); - let (is_item, ordered) = list::is_item(tail); - if is_item { + if let Some(ordered) = list::is_item(tail) { return Some((Event::ListBeg { ordered }, 0, line_begin, text.len())); } @@ -567,7 +589,6 @@ fn parse() { }), SectionBeg, ParagraphBeg, - Text("test "), BoldBeg, Text("Section 1"), BoldEnd, @@ -621,10 +642,14 @@ fn parse() { assert_eq!( Parser::new( - r#"#+OPTIONS: H:3 num:nil toc:t \n:nil ::t |:t ^:t -:t f:t *:t tex:t d:(HIDE) tags:not-in-toc - -* Definitions -"# + r#"* Title 1 +*Section 1* +** Title 2 +_Section 2_ +* Title 3 +/Section 3/ +* Title 4 +=Section 4="# ) .collect::>(), expected