diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs index 07c1815..f7af320 100644 --- a/src/elements/fn_def.rs +++ b/src/elements/fn_def.rs @@ -9,7 +9,9 @@ fn valid_label(ch: u8) -> bool { impl FnDef { pub fn parse(src: &str) -> Option<(&str, &str, usize)> { - starts_with!(src, "[fn:"); + if cfg!(test) { + starts_with!(src, "[fn:"); + } let label = until_while!(src, 4, b']', valid_label)?; diff --git a/src/elements/list.rs b/src/elements/list.rs index b5a704b..6b97533 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -1,289 +1,157 @@ -pub struct List; +use lines::Lines; -macro_rules! ident { - ($src:expr) => { - $src.as_bytes() - .iter() - .position(|&c| c != b' ' && c != b'\t') - .unwrap_or(0) - }; -} +pub struct List; impl List { #[inline] - fn is_item(src: &str) -> bool { - if src.len() < 2 { - return false; + pub fn is_item(src: &str) -> (bool, bool) { + if src.is_empty() { + return (false, false); } - let bytes = src.as_bytes(); - let i = match bytes[0] { - b'*' | b'-' | b'+' => 1, + let (i, ordered) = match bytes[0] { + b'*' | b'-' | b'+' => (1, false), b'0'...b'9' => { let i = bytes .iter() .position(|&c| !c.is_ascii_digit()) .unwrap_or_else(|| src.len()); - if i >= src.len() - 1 { - return false; - } let c = bytes[i]; if !(c == b'.' || c == b')') { - return false; + return (false, false); } - i + 1 + (i + 1, true) } - _ => return false, + _ => return (false, false), }; - // bullet is follwed by a space or line ending - bytes[i] == b' ' || bytes[i] == b'\n' - } - - #[inline] - pub fn is_ordered(byte: u8) -> bool { - match byte { - b'*' | b'-' | b'+' => false, - b'0'...b'9' => true, - _ => unreachable!(), + if i < src.len() { + // bullet is follwed by a space or line ending + (bytes[i] == b' ' || bytes[i] == b'\n', ordered) + } else { + (false, false) } } - // returns (contents_begin, contents_end) - pub fn parse_item(src: &str, ident: usize) -> (usize, usize) { - let beg = memchr::memchr(b' ', &src.as_bytes()[ident..]) - .map(|i| i + ident + 1) - .unwrap(); - let mut lines = lines!(src); - // skip first line - let mut pos = lines.next().unwrap(); - for line_end in lines { - let line = &src[pos..line_end]; - if !line.trim().is_empty() && ident!(line) == ident { - break; + // returns (bullets, contents begin, contents end, end, has more) + pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) { + debug_assert!(Self::is_item(&src[ident..]).0); + debug_assert!( + src[..ident].chars().all(|c| c == ' ' || c == '\t'), + "{:?} doesn't starts with indentation {}", + src, + ident + ); + + let mut lines = Lines::new(src); + let (mut pre_cont_end, mut pre_end, first_line) = lines.next().unwrap(); + let beg = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) { + Some(i) => i + ident + 1, + None => { + let len = first_line.len(); + return ( + &first_line, + len, + len, + len, + Self::is_item(lines.next().unwrap().2).0, + ); } - pos = line_end; - } - (beg, pos) - } + }; + let bullet = &src[0..beg]; - // return (ident, is_ordered, contents_end, end) - pub fn parse(src: &str) -> Option<(usize, bool, usize, usize)> { - let bytes = src.as_bytes(); - let starting_ident = ident!(src); - - if !Self::is_item(&src[starting_ident..]) { - return None; - } - - let mut lines = lines!(src); - // skip the starting line - let mut pos = lines.next().unwrap(); - let is_ordered = Self::is_ordered(bytes[starting_ident]); - - Some(loop { - let mut curr_line = match lines.next() { - Some(i) => i, - None => break (starting_ident, is_ordered, pos, pos), - }; - // current line is empty - if src[pos..curr_line].trim().is_empty() { - let next_line = match lines.next() { - Some(i) => i, - None => break (starting_ident, is_ordered, pos, pos), - }; - - // next line is emtpy, too - if src[curr_line..next_line].trim().is_empty() { - break (starting_ident, is_ordered, pos, next_line); + while let Some((mut cont_end, mut end, mut line)) = lines.next() { + // this line is emtpy + if line.is_empty() { + if let Some((next_cont_end, next_end, next_line)) = lines.next() { + // next line is emtpy, too + if next_line.is_empty() { + return (bullet, beg, pre_cont_end, next_end, false); + } else { + // move to next line + pre_end = end; + cont_end = next_cont_end; + end = next_end; + line = next_line; + } } else { - // move to next line - pos = curr_line; - curr_line = next_line; + return (bullet, beg, pre_cont_end, end, false); } } - let ident = ident!(src[pos..curr_line]); + let line_ident = Self::ident(line); - // less indented than the starting line - if ident < starting_ident { - break (starting_ident, is_ordered, pos, pos); + if line_ident < ident { + return (bullet, beg, pre_cont_end, pre_end, false); + } else if line_ident == ident { + return ( + bullet, + beg, + pre_cont_end, + pre_end, + Self::is_item(&line[ident..]).0, + ); } - if ident > starting_ident || Self::is_item(&src[pos + ident..]) { - pos = curr_line; - } else { - break (starting_ident, is_ordered, pos, pos); - } - }) + pre_end = end; + pre_cont_end = cont_end; + } + + (bullet, beg, src.len(), src.len(), false) } + + fn ident(src: &str) -> usize { + src.as_bytes() + .iter() + .position(|&c| c != b' ' && c != b'\t') + .unwrap_or(0) + } +} + +#[test] +fn is_item() { + assert_eq!(List::is_item("+ item"), (true, false)); + assert_eq!(List::is_item("- item"), (true, false)); + assert_eq!(List::is_item("10. item"), (true, true)); + assert_eq!(List::is_item("10) item"), (true, true)); + assert_eq!(List::is_item("1. item"), (true, true)); + assert_eq!(List::is_item("1) item"), (true, true)); + assert_eq!(List::is_item("10. "), (true, true)); + assert_eq!(List::is_item("10.\n"), (true, true)); + assert_eq!(List::is_item("10."), (false, false)); + assert_eq!(List::is_item("+"), (false, false)); + assert_eq!(List::is_item("-item"), (false, false)); + assert_eq!(List::is_item("+item"), (false, false)); } #[test] fn parse() { assert_eq!( - List::parse( - r"+ item1 -+ item2 -+ item3" - ), - Some((0, false, 23, 23)) + List::parse("+ item1\n+ item2\n+ item3", 0), + ("+ ", 2, 7, 8, true) ); assert_eq!( - List::parse( - r"* item1 -* item2 - -* item3" - ), - Some((0, false, 24, 24)) + List::parse("* item1\n\n* item2\n* item3", 0), + ("* ", 2, 7, 9, true) ); assert_eq!( - List::parse( - r"- item1 -- item2 - - -- item1" - ), - Some((0, false, 16, 18)) + List::parse("- item1\n\n\n- item2\n- item3", 0), + ("- ", 2, 7, 10, false) ); assert_eq!( - List::parse( - r"1. item1 - 2. item1 -3. item2" - ), - Some((0, true, 28, 28)) + List::parse("1. item1\n\n\n\n2. item2\n3. item3", 0), + ("1. ", 3, 8, 11, false) ); assert_eq!( - List::parse( - r" 1) item1 - 2) item1 - 3) item2" - ), - Some((2, true, 11, 11)) + List::parse(" + item1\n + item2\n+ item3", 2), + (" + ", 4, 21, 22, false) ); assert_eq!( - List::parse( - r" + item1 - 1) item1 - + item2" - ), - Some((2, false, 32, 32)) - ); - assert_eq!( - List::parse( - r" item1 - + item1 - + item2" - ), - None - ); - assert_eq!( - List::parse( - r#"- Lorem ipsum dolor sit amet, consectetur adipiscing elit. - - - Nulla et dolor vitae elit placerat sagittis. Aliquam a lobortis massa, - aliquam efficitur arcu. - - - Lorem ipsum dolor sit amet, consectetur adipiscing elit. - - - Phasellus auctor lacus a orci imperdiet, ut facilisis neque lobortis. - - - Proin condimentum id orci vitae lobortis. Nunc sollicitudin risus neque, - dapibus malesuada sem faucibus vitae. - -- Sed vitae dolor augue. Phasellus at rhoncus arcu. Suspendisse potenti. - - - Nulla faucibus, metus ut porta hendrerit, urna lorem porta metus, in tempus - nibh orci sed sapien. - - - Morbi tortor mi, dapibus vel faucibus a, iaculis sed turpis."# - ), - Some((0, false, 666, 666)) - ); -} - -#[test] -fn is_item() { - assert!(List::is_item("+ item")); - assert!(List::is_item("- item")); - assert!(List::is_item("10. item")); - assert!(List::is_item("10) item")); - assert!(List::is_item("1. item")); - assert!(List::is_item("1) item")); - assert!(List::is_item("10. ")); - assert!(List::is_item("10.\n")); - assert!(!List::is_item("10.")); - assert!(!List::is_item("-item")); - assert!(!List::is_item("+item")); -} - -#[test] -fn parse_item() { - assert_eq!(List::parse_item("+ Item1\n+ Item2", 0), (2, 8)); - assert_eq!(List::parse_item("+ Item1\n\n+ Item2", 0), (2, 9)); - assert_eq!( - List::parse_item( - r"+ item1 - + item1 - + item2", - 0 - ), - (2, 25) - ); - assert_eq!( - List::parse_item( - r" 1. item1 - + item2", - 2 - ), - (5, 11) - ); - assert_eq!( - List::parse_item( - r"+ It - em1 -+ Item2", - 0 - ), - (2, 11) - ); - assert_eq!( - List::parse_item( - r#"1) Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec sit amet - ullamcorper ante, nec pellentesque nisi. -2) Sed pulvinar ut arcu id aliquam.Curabitur quis justo eu magna maximus sodales. - Curabitur nisl nisi, ornare in enim id, sagittis facilisis magna. -3) Curabitur venenatis molestie eros sit amet congue. Nunc at molestie leo, vitae - malesuada nisi."#, - 0 - ), - (3, 119) - ); - assert_eq!( - List::parse_item( - r#"- Lorem ipsum dolor sit amet, consectetur adipiscing elit. - - - Nulla et dolor vitae elit placerat sagittis. Aliquam a lobortis massa, - aliquam efficitur arcu. - - - Lorem ipsum dolor sit amet, consectetur adipiscing elit. - - - Phasellus auctor lacus a orci imperdiet, ut facilisis neque lobortis. - - - Proin condimentum id orci vitae lobortis. Nunc sollicitudin risus neque, - dapibus malesuada sem faucibus vitae. - -- Sed vitae dolor augue. Phasellus at rhoncus arcu. Suspendisse potenti. - - - Nulla faucibus, metus ut porta hendrerit, urna lorem porta metus, in tempus - nibh orci sed sapien. - - - Morbi tortor mi, dapibus vel faucibus a, iaculis sed turpis."#, - 0 - ), - (2, 421) + List::parse(" + item1\n + item2\n + item3", 2), + (" + ", 4, 9, 10, true) ); + assert_eq!(List::parse("+\n", 0), ("+", 1, 1, 1, false)); + assert_eq!(List::parse("+\n+ item2\n+ item3", 0), ("+", 1, 1, 1, true)); + assert_eq!(List::parse("1) item1", 0), ("1) ", 3, 8, 8, false)); + assert_eq!(List::parse("1) item1\n", 0), ("1) ", 3, 8, 9, false)); } diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 64404a5..9973c15 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -12,7 +12,11 @@ pub use self::keyword::{Key, Keyword}; pub use self::list::List; pub use self::rule::Rule; -#[cfg_attr(test, derive(PartialEq, Debug))] +use memchr::memchr; +use memchr::memchr_iter; + +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] pub enum Element<'a> { Paragraph { cont_end: usize, @@ -77,217 +81,195 @@ pub enum Element<'a> { List { ident: usize, ordered: bool, - cont_end: usize, - end: usize, }, } impl<'a> Element<'a> { - pub fn next_2(src: &'a str) -> (usize, Option>, Option<(Element<'a>, usize)>) { - let bytes = src.as_bytes(); - - let mut pos = skip_empty_line!(src, 0); + // return (element, off, next element, next offset) + // the end of first element is relative to the offset + // next offset is relative to the end of the first element + pub fn next_2(src: &'a str) -> (Option>, usize, Option<(Element<'a>, usize)>) { + // skip empty lines + let mut pos = match src.chars().position(|c| c != '\n') { + Some(pos) => pos, + None => return (None, src.len(), None), + }; let start = pos; - - if start == src.len() { - return (start, None, None); - } + let bytes = src.as_bytes(); + let mut line_ends = memchr_iter(b'\n', &bytes[start..]).map(|i| i + start); loop { - // Unlike other element, footnote definition must starts at column 0 - if bytes[pos] == b'[' { - if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) { - break if pos == start { - (off + 1, Some(Element::FnDef { label, cont }), None) + let line_beg = pos; + + macro_rules! brk { + ($ele:expr, $off:expr) => { + break if line_beg == 0 || pos == start { + (Some($ele), start + $off, None) } else { ( - start, Some(Element::Paragraph { - cont_end: pos - 1, - end: pos, + cont_end: line_beg - start - 1, + end: line_beg - start, }), - Some((Element::FnDef { label, cont }, off + 1)), + start, + Some(($ele, $off)), ) }; + }; + } + + // Unlike other element, footnote definition must starts at column 0 + if bytes[pos..].starts_with(b"[fn:") { + if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) { + brk!(Element::FnDef { label, cont }, off + 1); } } - let end = pos; + // FIXME: + if bytes[pos] == b'\n' { + break ( + Some(Element::Paragraph { + cont_end: pos - start - 1, + end: pos - start + 1, + }), + start, + None, + ); + } + pos = skip_space!(src, pos); - if pos <= src.len() { - macro_rules! brk { - ($ele:expr, $off:expr) => { - break if pos == start { - ($off, Some($ele), None) - } else { - ( - start, - Some(Element::Paragraph { - cont_end: end, - end: pos - 1, - }), - Some(($ele, $off)), - ) - }; - }; - } - - if bytes[pos] == b'+' - || bytes[pos] == b'-' - || bytes[pos] == b'*' - || (bytes[pos] >= b'0' && bytes[pos] <= b'9') - { - if let Some((ident, ordered, cont_end, list_end)) = List::parse(&src[end..]) { - let list = Element::List { - ident, - ordered, - cont_end, - end: list_end, - }; - break if pos == start { - (1, Some(list), None) - } else { - ( - start, - Some(Element::Paragraph { - cont_end: end, - end: end, - }), - Some((list, 1)), - ) - }; - } - } - - if bytes[pos] == b'\n' { - break ( - start, + let (is_item, ordered) = List::is_item(&src[pos..]); + if is_item { + let list = Element::List { + ident: pos - line_beg, + ordered, + }; + break if line_beg == start { + (Some(list), start, None) + } else { + ( Some(Element::Paragraph { - cont_end: end, - end: pos, + cont_end: line_beg - start - 1, + end: line_beg - start, }), - None, - ); + start, + Some((list, 1)), + ) + }; + } + + // TODO: LaTeX environment + if bytes[pos..].starts_with(b"\\begin{") {} + + // Rule + if bytes[pos] == b'-' { + let off = Rule::parse(&src[pos..]); + if off != 0 { + brk!(Element::Rule, off); } + } - // TODO: LaTeX environment - if bytes[pos] == b'\\' {} + // TODO: multiple lines fixed width area + if bytes[pos..].starts_with(b": ") || bytes[pos..].starts_with(b":\n") { + let eol = memchr(b'\n', &bytes[pos..]) + .map(|i| i + 1) + .unwrap_or_else(|| src.len() - pos); + brk!(Element::FixedWidth(&src[pos + 1..pos + eol].trim()), eol); + } - // Rule - if bytes[pos] == b'-' { - let off = Rule::parse(&src[pos..]); - if off != 0 { - brk!(Element::Rule, off); - } - } - - // TODO: multiple lines fixed width area - if bytes[pos] == b':' - && bytes - .get(pos + 1) - .map(|&b| b == b' ' || b == b'\n') - .unwrap_or(false) - { - let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..]) - .map(|i| i + 1) - .unwrap_or_else(|| src.len() - pos); - brk!(Element::FixedWidth(&src[pos + 1..pos + eol]), eol); - } - - if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b'+').unwrap_or(false) { - if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) { - let cont = &src[pos + cont_beg + 1..pos + cont_end - 1]; - match name.to_uppercase().as_str() { - "COMMENT" => brk!(Element::CommentBlock { args, cont }, end), - "EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end), - "EXPORT" => brk!(Element::ExportBlock { args, cont }, end), - "SRC" => brk!(Element::SrcBlock { args, cont }, end), - "VERSE" => brk!(Element::VerseBlock { args, cont }, end), - "CENTER" => brk!( - Element::CtrBlock { - args, - cont_end, - end, - }, - cont_beg - ), - "QUOTE" => brk!( - Element::QteBlock { - args, - cont_end, - end, - }, - cont_beg - ), - _ => brk!( - Element::SplBlock { - name, - args, - cont_end, - end - }, - cont_beg - ), - }; - } - - if let Some((name, args, cont_beg, cont_end, end)) = - DynBlock::parse(&src[pos..]) - { - brk!( - Element::DynBlock { - name, + if bytes[pos..].starts_with(b"#+") { + if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) { + let cont = &src[pos + cont_beg + 1..pos + cont_end - 1]; + match name.to_uppercase().as_str() { + "COMMENT" => brk!(Element::CommentBlock { args, cont }, end), + "EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end), + "EXPORT" => brk!(Element::ExportBlock { args, cont }, end), + "SRC" => brk!(Element::SrcBlock { args, cont }, end), + "VERSE" => brk!(Element::VerseBlock { args, cont }, end), + "CENTER" => brk!( + Element::CtrBlock { args, cont_end, end, }, cont_beg - ) - } - - if let Some((key, value, off)) = Keyword::parse(&src[pos..]) { - brk!( - if let Key::Call = key { - Element::Call { value } - } else { - Element::Keyword { key, value } + ), + "QUOTE" => brk!( + Element::QteBlock { + args, + cont_end, + end, }, - off - ) - } + cont_beg + ), + _ => brk!( + Element::SplBlock { + name, + args, + cont_end, + end + }, + cont_beg + ), + }; } - // Comment - // TODO: multiple lines comment - if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b' ').unwrap_or(false) { - let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..]) - .map(|i| i + 1) - .unwrap_or_else(|| src.len() - pos); - brk!(Element::Comment(&src[pos + 1..pos + eol]), eol); + if let Some((name, args, cont_beg, cont_end, end)) = DynBlock::parse(&src[pos..]) { + brk!( + Element::DynBlock { + name, + args, + cont_end, + end, + }, + cont_beg + ) + } + + if let Some((key, value, off)) = Keyword::parse(&src[pos..]) { + brk!( + if let Key::Call = key { + Element::Call { value } + } else { + Element::Keyword { key, value } + }, + off + ) } } - if let Some(off) = memchr::memchr(b'\n', &src.as_bytes()[pos..]) { - pos += off + 1; - // last char - if pos == src.len() { + // Comment + // TODO: multiple lines comment + if bytes[pos..].starts_with(b"# ") || bytes[pos..].starts_with(b"#\n") { + let eol = memchr(b'\n', &bytes[pos..]) + .map(|i| i + 1) + .unwrap_or_else(|| src.len() - pos); + brk!(Element::Comment(&src[pos + 1..pos + eol].trim()), eol); + } + + // move to the beginning of the next line + if let Some(off) = line_ends.next() { + pos = off + 1; + + // the last character + if pos >= src.len() { break ( - start, Some(Element::Paragraph { - cont_end: pos - 1, - end: pos, + cont_end: src.len() - start - 1, + end: src.len() - start, }), + start, None, ); } } else { break ( - start, Some(Element::Paragraph { - cont_end: src.len(), - end: src.len(), + cont_end: src.len() - start, + end: src.len() - start, }), + start, None, ); } @@ -297,6 +279,102 @@ impl<'a> Element<'a> { #[test] fn next_2() { + use self::Element::*; + + assert_eq!(Element::next_2("\n\n\n"), (None, 3, None)); + + let len = "Lorem ipsum dolor sit amet.".len(); + assert_eq!( + Element::next_2("\nLorem ipsum dolor sit amet.\n\n\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 2, + }), + 1, + None + ) + ); + assert_eq!( + Element::next_2("\n\nLorem ipsum dolor sit amet.\n\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 2, + }), + 2, + None + ) + ); + assert_eq!( + Element::next_2("\nLorem ipsum dolor sit amet.\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 1, + }), + 1, + None + ) + ); + assert_eq!( + Element::next_2("\n\n\nLorem ipsum dolor sit amet."), + ( + Some(Paragraph { + cont_end: len, + end: len, + }), + 3, + None + ) + ); + + assert_eq!( + Element::next_2("\n\n\n: Lorem ipsum dolor sit amet.\n"), + ( + Some(FixedWidth("Lorem ipsum dolor sit amet.")), + "\n\n\n: Lorem ipsum dolor sit amet.\n".len(), + None + ) + ); + assert_eq!( + Element::next_2("\n\n\n: Lorem ipsum dolor sit amet."), + ( + Some(FixedWidth("Lorem ipsum dolor sit amet.")), + "\n\n\n: Lorem ipsum dolor sit amet.".len(), + None + ) + ); + + assert_eq!( + Element::next_2("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 1, + }), + 2, + Some((FixedWidth("Lorem ipsum dolor sit amet."), 30)) + ) + ); + + assert_eq!( + Element::next_2("\n\nLorem ipsum dolor sit amet.\n+ Lorem ipsum dolor sit amet.\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 1, + }), + 2, + Some(( + List { + ident: 0, + ordered: false, + }, + 1 + )) + ) + ); + // TODO: more tests - assert_eq!(Element::next_2("\n\n\n\n"), (4, None, None)); } diff --git a/src/export/html.rs b/src/export/html.rs index 88a06d9..a552721 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -73,7 +73,7 @@ impl Handler for HtmlHandler { fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()> { write!(w, "{}", if ordered { "" } else { "" }) } - fn handle_list_beg_item(&mut self, w: &mut W) -> Result<()> { + fn handle_list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<()> { write!(w, "
  • ") } fn handle_list_end_item(&mut self, w: &mut W) -> Result<()> { diff --git a/src/export/mod.rs b/src/export/mod.rs index fa53dfa..3cad105 100644 --- a/src/export/mod.rs +++ b/src/export/mod.rs @@ -30,7 +30,7 @@ pub trait Handler { fn handle_dyn_block_end(&mut self, w: &mut W) -> Result<()>; fn handle_list_beg(&mut self, w: &mut W, ordered: bool) -> Result<()>; fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()>; - fn handle_list_beg_item(&mut self, w: &mut W) -> Result<()>; + fn handle_list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<()>; fn handle_list_end_item(&mut self, w: &mut W) -> Result<()>; fn handle_call(&mut self, w: &mut W, value: &str) -> Result<()>; fn handle_clock(&mut self, w: &mut W) -> Result<()>; @@ -113,7 +113,7 @@ impl<'a, W: Write, H: Handler> Render<'a, W, H> { DynBlockEnd => h.handle_dyn_block_end(w)?, ListBeg { ordered } => h.handle_list_beg(w, ordered)?, ListEnd { ordered } => h.handle_list_end(w, ordered)?, - ListItemBeg => h.handle_list_beg_item(w)?, + ListItemBeg { bullet } => h.handle_list_beg_item(w, bullet)?, ListItemEnd => h.handle_list_end_item(w)?, Call { value } => h.handle_call(w, value)?, Clock => h.handle_clock(w)?, diff --git a/src/lib.rs b/src/lib.rs index d168696..00f0af4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ mod utils; mod elements; mod export; mod headline; +mod lines; mod objects; mod parser; diff --git a/src/lines.rs b/src/lines.rs new file mode 100644 index 0000000..4c404b8 --- /dev/null +++ b/src/lines.rs @@ -0,0 +1,54 @@ +use memchr::{memchr_iter, Memchr}; +use std::iter::{once, Chain, Once}; + +pub struct Lines<'a> { + src: &'a str, + iter: Chain, Once>, + start: usize, + pre_cont_end: usize, +} + +impl<'a> Lines<'a> { + pub fn new(src: &'a str) -> Lines<'a> { + Lines { + src, + iter: memchr_iter(b'\n', &src.as_bytes()).chain(once(src.len())), + start: 0, + pre_cont_end: 0, + } + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = (usize, usize, &'a str); + + #[inline] + fn next(&mut self) -> Option<(usize, usize, &'a str)> { + self.iter.next().map(|i| { + let (line, cont_end) = if i != self.src.len() && self.src.as_bytes()[i - 1] == b'\r' { + (&self.src[self.start..i - 1], i - 1) + } else { + (&self.src[self.start..i], i) + }; + self.start = if i != self.src.len() { i + 1 } else { i }; + (cont_end, self.start, line) + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +#[test] +fn lines() { + let mut lines = Lines::new("foo\r\nbar\n\nbaz\n"); + + assert_eq!(Some((3, 5, "foo")), lines.next()); + assert_eq!(Some((8, 9, "bar")), lines.next()); + assert_eq!(Some((9, 10, "")), lines.next()); + assert_eq!(Some((13, 14, "baz")), lines.next()); + assert_eq!(Some((14, 14, "")), lines.next()); + assert_eq!(None, lines.next()); +} diff --git a/src/objects/mod.rs b/src/objects/mod.rs index adb7fa6..0fdfac6 100644 --- a/src/objects/mod.rs +++ b/src/objects/mod.rs @@ -51,13 +51,13 @@ impl<'a> Object<'a> { return (Object::Text(src), src.len(), None); } - let chars = ascii_chars!('@', ' ', '"', '(', '\n', '{', '<', '['); + let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'['); let mut pos = 0; loop { macro_rules! brk { ($obj:expr, $off:expr, $pos:expr) => { - break if pos == 0 { + break if $pos == 0 { ($obj, $off, None) } else { (Object::Text(&src[0..$pos]), $pos, Some(($obj, $off))) @@ -159,8 +159,8 @@ impl<'a> Object<'a> { _ => (), } - if let Some(off) = chars - .find(&src[pos + 1..]) + if let Some(off) = bs + .find(&bytes[pos + 1..]) .map(|i| i + pos + 1) .filter(|&i| i < src.len() - 2) { diff --git a/src/parser.rs b/src/parser.rs index 81f0905..d4a213f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,54 +5,19 @@ use objects::*; #[cfg_attr(test, derive(PartialEq))] #[derive(Copy, Clone, Debug)] pub enum Container { - Headline { - beg: usize, - end: usize, - }, - Section { - end: usize, - }, - Paragraph { - cont_end: usize, - end: usize, - }, - CtrBlock { - cont_end: usize, - end: usize, - }, - QteBlock { - cont_end: usize, - end: usize, - }, - SplBlock { - cont_end: usize, - end: usize, - }, - DynBlock { - cont_end: usize, - end: usize, - }, - List { - ident: usize, - ordered: bool, - cont_end: usize, - end: usize, - }, - ListItem { - end: usize, - }, - Italic { - end: usize, - }, - Strike { - end: usize, - }, - Bold { - end: usize, - }, - Underline { - end: usize, - }, + Headline { beg: usize, end: usize }, + Section { end: usize }, + Paragraph { cont_end: usize, end: usize }, + CtrBlock { cont_end: usize, end: usize }, + QteBlock { cont_end: usize, end: usize }, + SplBlock { cont_end: usize, end: usize }, + DynBlock { cont_end: usize, end: usize }, + List { ident: usize, ordered: bool }, + ListItem { cont_end: usize, end: usize }, + Italic { end: usize }, + Strike { end: usize }, + Bold { end: usize }, + Underline { end: usize }, } #[cfg_attr(test, derive(PartialEq))] @@ -109,7 +74,9 @@ pub enum Event<'a> { ListEnd { ordered: bool, }, - ListItemBeg, + ListItemBeg { + bullet: &'a str, + }, ListItemEnd, Call { @@ -166,6 +133,7 @@ pub struct Parser<'a> { off: usize, ele_buf: Option<(Element<'a>, usize)>, obj_buf: Option<(Object<'a>, usize)>, + has_more_item: bool, } impl<'a> Parser<'a> { @@ -176,11 +144,13 @@ impl<'a> Parser<'a> { off: 0, ele_buf: None, obj_buf: None, + has_more_item: false, } } fn next_sec_or_hdl(&mut self) -> Event<'a> { let end = Headline::find_level(&self.text[self.off..], std::usize::MAX); + debug_assert!(end <= self.text.len()); if end != 0 { self.stack.push(Container::Section { end: self.off + end, @@ -192,7 +162,9 @@ impl<'a> Parser<'a> { } fn next_hdl(&mut self) -> Event<'a> { - let (hdl, off, end) = Headline::parse(&self.text[self.off..]); + let tail = &self.text[self.off..]; + let (hdl, off, end) = Headline::parse(tail); + debug_assert!(end <= self.text.len()); self.stack.push(Container::Headline { beg: self.off + off, end: self.off + end, @@ -201,104 +173,126 @@ impl<'a> Parser<'a> { Event::HeadlineBeg(hdl) } - fn next_ele(&mut self, end: usize) -> Event<'a> { + fn next_ele(&mut self, text: &'a str) -> Event<'a> { let (ele, off) = self .ele_buf .take() .map(|(ele, off)| (Some(ele), off)) .unwrap_or_else(|| { - let (off, ele, next_2) = Element::next_2(&self.text[self.off..end]); - self.ele_buf = next_2; + let (ele, off, next_ele) = Element::next_2(text); + self.ele_buf = next_ele; (ele, off) }); - debug_assert!(self.off + off <= end); + debug_assert!(off <= text.len()); - if let Some(ele) = ele { - match ele { - Element::Paragraph { cont_end, end } => self.stack.push(Container::Paragraph { + self.off += off; + + match ele { + Some(Element::Paragraph { cont_end, end }) => { + debug_assert!(cont_end <= text.len() && end <= text.len()); + self.stack.push(Container::Paragraph { cont_end: cont_end + self.off, end: end + self.off, - }), - Element::QteBlock { end, cont_end, .. } => self.stack.push(Container::QteBlock { - cont_end: cont_end + self.off, - end: end + self.off, - }), - Element::CtrBlock { end, cont_end, .. } => self.stack.push(Container::CtrBlock { - cont_end: cont_end + self.off, - end: end + self.off, - }), - Element::SplBlock { end, cont_end, .. } => self.stack.push(Container::SplBlock { - cont_end: cont_end + self.off, - end: end + self.off, - }), - Element::DynBlock { end, cont_end, .. } => self.stack.push(Container::DynBlock { - cont_end: cont_end + self.off, - end: end + self.off, - }), - Element::List { - ident, - ordered, - cont_end, - end, - } => self.stack.push(Container::List { - ident, - ordered, - cont_end: cont_end + self.off, - end: end + self.off, - }), - _ => (), + }); + Event::ParagraphBeg } - - self.off += off; - - match ele { - Element::Call { value } => Event::Call { value }, - Element::Comment(c) => Event::Comment(c), - Element::CommentBlock { args, cont } => Event::CommentBlock { args, cont }, - Element::CtrBlock { .. } => Event::CtrBlockBeg, - Element::DynBlock { name, args, .. } => Event::DynBlockBeg { name, args }, - Element::ExampleBlock { args, cont } => Event::ExampleBlock { args, cont }, - Element::ExportBlock { args, cont } => Event::ExportBlock { args, cont }, - Element::FixedWidth(f) => Event::FixedWidth(f), - Element::FnDef { label, cont } => Event::FnDef { label, cont }, - Element::Keyword { key, value } => Event::Keyword { key, value }, - Element::List { ordered, .. } => Event::ListBeg { ordered }, - Element::Paragraph { .. } => Event::ParagraphBeg, - Element::QteBlock { .. } => Event::QteBlockBeg, - Element::Rule => Event::Rule, - Element::SplBlock { name, args, .. } => Event::SplBlockBeg { name, args }, - Element::SrcBlock { args, cont } => Event::SrcBlock { args, cont }, - Element::VerseBlock { args, cont } => Event::VerseBlock { args, cont }, + Some(Element::QteBlock { end, cont_end, .. }) => { + debug_assert!(cont_end <= text.len() && end <= text.len()); + self.stack.push(Container::QteBlock { + cont_end: cont_end + self.off, + end: end + self.off, + }); + Event::QteBlockBeg } - } else { - self.off += off; - self.end() + Some(Element::CtrBlock { end, cont_end, .. }) => { + debug_assert!(cont_end <= text.len() && end <= text.len()); + self.stack.push(Container::CtrBlock { + cont_end: cont_end + self.off, + end: end + self.off, + }); + Event::CtrBlockBeg + } + Some(Element::SplBlock { + name, + args, + end, + cont_end, + }) => { + debug_assert!(cont_end <= text.len() && end <= text.len()); + self.stack.push(Container::SplBlock { + cont_end: cont_end + self.off, + end: end + self.off, + }); + Event::SplBlockBeg { name, args } + } + Some(Element::DynBlock { + name, + args, + cont_end, + end, + }) => { + debug_assert!(cont_end <= text.len() && end <= text.len()); + self.stack.push(Container::DynBlock { + cont_end: cont_end + self.off, + end: end + self.off, + }); + Event::DynBlockBeg { name, args } + } + Some(Element::List { ident, ordered }) => { + self.stack.push(Container::List { ident, ordered }); + self.has_more_item = true; + Event::ListBeg { ordered } + } + Some(Element::Call { value }) => Event::Call { value }, + Some(Element::Comment(c)) => Event::Comment(c), + Some(Element::CommentBlock { args, cont }) => Event::CommentBlock { args, cont }, + Some(Element::ExampleBlock { args, cont }) => Event::ExampleBlock { args, cont }, + Some(Element::ExportBlock { args, cont }) => Event::ExportBlock { args, cont }, + Some(Element::FixedWidth(f)) => Event::FixedWidth(f), + Some(Element::FnDef { label, cont }) => Event::FnDef { label, cont }, + Some(Element::Keyword { key, value }) => Event::Keyword { key, value }, + Some(Element::Rule) => Event::Rule, + Some(Element::SrcBlock { args, cont }) => Event::SrcBlock { args, cont }, + Some(Element::VerseBlock { args, cont }) => Event::VerseBlock { args, cont }, + None => self.end(), } } - fn next_obj(&mut self, end: usize) -> Event<'a> { + fn next_obj(&mut self, text: &'a str) -> Event<'a> { let (obj, off) = self.obj_buf.take().unwrap_or_else(|| { - let (obj, off, next_2) = Object::next_2(&self.text[self.off..end]); - self.obj_buf = next_2; + let (obj, off, next_obj) = Object::next_2(text); + self.obj_buf = next_obj; (obj, off) }); - debug_assert!(self.off + off <= end); + debug_assert!(off <= text.len()); match obj { - Object::Underline { end } => self.stack.push(Container::Underline { - end: self.off + end, - }), - Object::Strike { end } => self.stack.push(Container::Strike { - end: self.off + end, - }), - Object::Italic { end } => self.stack.push(Container::Italic { - end: self.off + end, - }), - Object::Bold { end } => self.stack.push(Container::Bold { - end: self.off + end, - }), + Object::Underline { end } => { + debug_assert!(end <= text.len()); + self.stack.push(Container::Underline { + end: self.off + end, + }); + } + Object::Strike { end } => { + debug_assert!(end <= text.len()); + self.stack.push(Container::Strike { + end: self.off + end, + }); + } + Object::Italic { end } => { + debug_assert!(end <= text.len()); + self.stack.push(Container::Italic { + end: self.off + end, + }); + } + Object::Bold { end } => { + debug_assert!(end <= text.len()); + self.stack.push(Container::Bold { + end: self.off + end, + }); + } _ => (), } @@ -324,13 +318,16 @@ impl<'a> Parser<'a> { } } - fn next_list_item(&mut self, end: usize, ident: usize) -> Event<'a> { - let (beg, end) = List::parse_item(&self.text[self.off..end], ident); + fn next_list_item(&mut self, ident: usize) -> Event<'a> { + let (bullet, cont_beg, cont_end, end, has_more) = + List::parse(&self.text[self.off..], ident); self.stack.push(Container::ListItem { + cont_end: self.off + cont_end, end: self.off + end, }); - self.off += beg; - Event::ListItemBeg + self.off += cont_beg; + self.has_more_item = has_more; + Event::ListItemBeg { bullet } } fn end(&mut self) -> Event<'a> { @@ -378,58 +375,50 @@ impl<'a> Iterator for Parser<'a> { Container::DynBlock { cont_end, end, .. } | Container::CtrBlock { cont_end, end, .. } | Container::QteBlock { cont_end, end, .. } - | Container::SplBlock { cont_end, end, .. } => { + | Container::SplBlock { cont_end, end, .. } + | Container::ListItem { cont_end, end } => { + let text = &self.text[self.off..cont_end]; if self.off >= cont_end { self.off = end; self.end() } else { - self.next_ele(cont_end) + self.next_ele(text) } } - Container::List { - cont_end, - end, - ident, - .. - } => { - if self.off >= cont_end { - self.off = end; - self.end() + Container::List { ident, .. } => { + if self.has_more_item { + self.next_list_item(ident) } else { - self.next_list_item(cont_end, ident) - } - } - Container::ListItem { end } => { - if self.off >= end { self.end() - } else { - self.next_ele(end) } } Container::Section { end } => { + let text = &self.text[self.off..end]; if self.off >= end { self.end() } else { - self.next_ele(end) + self.next_ele(text) } } Container::Paragraph { cont_end, end } => { + let text = &self.text[self.off..cont_end]; if self.off >= cont_end { self.off = end; self.end() } else { - self.next_obj(cont_end) + self.next_obj(text) } } Container::Bold { end } | Container::Underline { end } | Container::Italic { end } | Container::Strike { end } => { + let text = &self.text[self.off..end]; if self.off >= end { self.off += 1; self.end() } else { - self.next_obj(end) + self.next_obj(text) } } })