From 73c6e9de8f8d87685bf402f9f75b9009074f22a4 Mon Sep 17 00:00:00 2001 From: PoiScript Date: Wed, 30 Oct 2019 11:31:37 +0800 Subject: [PATCH] feat(parser): update list parsing --- src/elements/list.rs | 442 +++++++++++++++++++++++++------------------ src/export/org.rs | 11 +- src/parsers.rs | 244 ++++++++++++------------ tests/blank.rs | 18 ++ 4 files changed, 408 insertions(+), 307 deletions(-) diff --git a/src/elements/list.rs b/src/elements/list.rs index 3ede500..771ce2d 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -1,67 +1,29 @@ use std::borrow::Cow; use std::iter::once; -use memchr::memchr_iter; +use memchr::{memchr, memchr_iter}; +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{digit1, space0}, + combinator::{map, recognize}, + error::ParseError, + sequence::terminated, + IResult, +}; /// Plain List Element #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] #[derive(Debug)] pub struct List { + /// List indent, number of whitespaces pub indent: usize, + /// List's type, determined by the first item of this list pub ordered: bool, -} - -impl List { - #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, List, &str)> { - let (indent, tail) = text - .find(|c| c != ' ') - .map(|off| (off, &text[off..])) - .unwrap_or((0, text)); - - let ordered = is_item(tail)?; - - let mut last_end = 0; - let mut start = 0; - - for i in memchr_iter(b'\n', text.as_bytes()) - .map(|i| i + 1) - .chain(once(text.len())) - { - let line = &text[start..i]; - if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) { - if line_indent < indent - || (line_indent == indent && is_item(&line[line_indent..]).is_none()) - { - return Some(( - &text[start..], - List { indent, ordered }, - &text[0..start - 1], - )); - } else { - last_end = 0; - start = i; - continue; - } - } else { - // this line is empty - if last_end != 0 { - return Some((&text[i..], List { indent, ordered }, &text[0..last_end])); - } else { - last_end = start; - start = i; - continue; - } - } - } - - if last_end != 0 { - Some(("", List { indent, ordered }, &text[0..last_end])) - } else { - Some(("", List { indent, ordered }, text)) - } - } + /// Numbers of blank lines between last list's line and next non-blank line + /// or buffer's end + pub post_blank: usize, } /// List Item Elemenet @@ -71,185 +33,287 @@ impl List { pub struct ListItem<'a> { /// List item bullet pub bullet: Cow<'a, str>, + /// List item indent, number of whitespaces + pub indent: usize, + /// List item type + pub ordered: bool, + // TODO checkbox + // TODO counter + // TODO tag } impl ListItem<'_> { #[inline] - pub(crate) fn parse(text: &str, indent: usize) -> (&str, ListItem, &str) { - debug_assert!(&text[0..indent].trim().is_empty()); - let off = &text[indent..].find(' ').unwrap() + 1 + indent; - - let bytes = text.as_bytes(); - let mut lines = memchr_iter(b'\n', bytes) - .map(|i| i + 1) - .chain(once(text.len())); - let mut pos = lines.next().unwrap(); - - for i in lines { - let line = &text[pos..i]; - if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) { - if line_indent == indent { - return ( - &text[pos..], - ListItem { - bullet: text[indent..off].into(), - }, - &text[off..pos], - ); - } - } - pos = i; - } - - ( - "", - ListItem { - bullet: text[indent..off].into(), - }, - &text[off..], - ) + pub(crate) fn parse(input: &str) -> Option<(&str, (ListItem, &str))> { + list_item::<()>(input).ok() } pub fn into_owned(self) -> ListItem<'static> { ListItem { bullet: self.bullet.into_owned().into(), + indent: self.indent, + ordered: self.ordered, } } } -#[inline] -pub fn is_item(text: &str) -> Option { - let bytes = text.as_bytes(); - match bytes.get(0)? { - b'*' | b'-' | b'+' => { - if text.len() > 1 && (bytes[1] == b' ' || bytes[1] == b'\n') { - Some(false) - } else { - None +fn list_item<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, (ListItem, &str), E> { + let (input, indent) = map(space0, |s: &str| s.len())(input)?; + let (input, bullet) = recognize(alt(( + tag("+ "), + tag("* "), + tag("- "), + terminated(digit1, tag(". ")), + )))(input)?; + let (input, contents) = list_item_contents(input, indent); + Ok(( + input, + ( + ListItem { + bullet: bullet.into(), + indent, + ordered: bullet.starts_with(|c: char| c.is_ascii_digit()), + }, + contents, + ), + )) +} + +fn list_item_contents(input: &str, indent: usize) -> (&str, &str) { + let mut last_end = memchr(b'\n', input.as_bytes()) + .map(|i| i + 1) + .unwrap_or_else(|| input.len()); + + for i in memchr_iter(b'\n', input.as_bytes()) + .map(|i| i + 1) + .chain(once(input.len())) + .skip(1) + { + if input[last_end..i] + .as_bytes() + .iter() + .all(u8::is_ascii_whitespace) + { + let x = memchr(b'\n', &input[i..].as_bytes()) + .map(|ii| i + ii + 1) + .unwrap_or_else(|| input.len()); + + // two consecutive empty lines + if input[i..x].as_bytes().iter().all(u8::is_ascii_whitespace) { + return (&input[x..], &input[0..x]); } } - b'0'..=b'9' => { - let i = bytes - .iter() - .position(|&c| !c.is_ascii_digit()) - .unwrap_or_else(|| text.len() - 1); - if (bytes[i] == b'.' || bytes[i] == b')') - && text.len() > i + 1 - && (bytes[i + 1] == b' ' || bytes[i + 1] == b'\n') - { - Some(true) - } else { - None - } + + // line less or equally indented than the starting line + if input[last_end..i] + .as_bytes() + .iter() + .take(indent + 1) + .any(|c| !c.is_ascii_whitespace()) + { + return (&input[last_end..], &input[0..last_end]); } - _ => None, + + last_end = i; } + + ("", input) } #[test] -fn test_is_item() { - assert_eq!(is_item("+ item"), Some(false)); - assert_eq!(is_item("- item"), Some(false)); - assert_eq!(is_item("10. item"), Some(true)); - assert_eq!(is_item("10) item"), Some(true)); - assert_eq!(is_item("1. item"), Some(true)); - assert_eq!(is_item("1) item"), Some(true)); - assert_eq!(is_item("10. "), Some(true)); - assert_eq!(is_item("10.\n"), Some(true)); - assert_eq!(is_item("10."), None); - assert_eq!(is_item("+"), None); - assert_eq!(is_item("-item"), None); - assert_eq!(is_item("+item"), None); -} +fn parse() { + use nom::error::VerboseError; -#[test] -fn list_parse() { assert_eq!( - List::parse("+ item1\n+ item2"), - Some(( - "", - List { - indent: 0, - ordered: false, - }, - "+ item1\n+ item2" + list_item::>( + r#"+ item1 ++ item2"# + ), + Ok(( + "+ item2", + ( + ListItem { + bullet: "+ ".into(), + indent: 0, + ordered: false, + }, + r#"item1 +"# + ) )) ); assert_eq!( - List::parse("* item1\n \n* item2"), - Some(( - "", - List { - indent: 0, - ordered: false, - }, - "* item1\n \n* item2" - )) - ); - assert_eq!( - List::parse("* item1\n \n \n* item2"), - Some(( + list_item::>( + r#"* item1 + +* item2"# + ), + Ok(( "* item2", - List { - indent: 0, - ordered: false, - }, - "* item1\n" + ( + ListItem { + bullet: "* ".into(), + indent: 0, + ordered: false, + }, + r#"item1 + +"# + ) )) ); assert_eq!( - List::parse("* item1\n \n "), - Some(( - "", - List { - indent: 0, - ordered: false, - }, - "* item1\n" + list_item::>( + r#"* item1 + + +* item2"# + ), + Ok(( + "* item2", + ( + ListItem { + bullet: "* ".into(), + indent: 0, + ordered: false, + }, + r#"item1 + + +"# + ) )) ); assert_eq!( - List::parse("+ item1\n + item2\n "), - Some(( + list_item::>( + r#"* item1 + +"# + ), + Ok(( "", - List { - indent: 0, - ordered: false, - }, - "+ item1\n + item2\n" + ( + ListItem { + bullet: "* ".into(), + indent: 0, + ordered: false, + }, + r#"item1 + +"# + ) )) ); assert_eq!( - List::parse("+ item1\n \n + item2\n \n+ item 3"), - Some(( + list_item::>( + r#"+ item1 + + item2 +"# + ), + Ok(( "", - List { - indent: 0, - ordered: false, - }, - "+ item1\n \n + item2\n \n+ item 3" + ( + ListItem { + bullet: "+ ".into(), + indent: 0, + ordered: false, + }, + r#"item1 + + item2 +"# + ) )) ); assert_eq!( - List::parse(" + item1\n \n + item2"), - Some(( - "", - List { - indent: 2, - ordered: false, - }, - " + item1\n \n + item2" + list_item::>( + r#"+ item1 + + + item2 + ++ item 3"# + ), + Ok(( + "+ item 3", + ( + ListItem { + bullet: "+ ".into(), + indent: 0, + ordered: false, + }, + r#"item1 + + + item2 + +"# + ) )) ); assert_eq!( - List::parse("+ 1\n\n - 2\n\n - 3\n\n+ 4"), - Some(( - "", - List { - indent: 0, - ordered: false, - }, - "+ 1\n\n - 2\n\n - 3\n\n+ 4" + list_item::>( + r#" + item1 + + + item2"# + ), + Ok(( + " + item2", + ( + ListItem { + bullet: "+ ".into(), + indent: 2, + ordered: false, + }, + r#"item1 + +"# + ) + )) + ); + assert_eq!( + list_item::>( + r#" 1. item1 +2. item2 + 3. item3"# + ), + Ok(( + r#"2. item2 + 3. item3"#, + ( + ListItem { + bullet: "1. ".into(), + indent: 2, + ordered: true, + }, + r#"item1 +"# + ) + )) + ); + assert_eq!( + list_item::>( + r#"+ 1 + + - 2 + + - 3 + ++ 4"# + ), + Ok(( + "+ 4", + ( + ListItem { + bullet: "+ ".into(), + indent: 0, + ordered: false, + }, + r#"1 + + - 2 + + - 3 + +"# + ) )) ); } diff --git a/src/export/org.rs b/src/export/org.rs index 0146a5e..6074fa3 100644 --- a/src/export/org.rs +++ b/src/export/org.rs @@ -39,7 +39,12 @@ pub trait OrgHandler>: Default { Headline { .. } => (), List(_list) => (), Italic => write!(w, "/")?, - ListItem(list_item) => write!(w, "{}", list_item.bullet)?, + ListItem(list_item) => { + for _ in 0..list_item.indent { + write!(&mut w, " ")?; + } + write!(&mut w, "{}", list_item.bullet)?; + } Paragraph { .. } => (), Section => (), Strike => write!(w, "+")?, @@ -216,7 +221,9 @@ pub trait OrgHandler>: Default { write_blank_lines(w, dyn_block.post_blank)?; } Headline { .. } => (), - List(_list) => (), + List(list) => { + write_blank_lines(w, list.post_blank)?; + } Italic => write!(w, "/")?, ListItem(_) => (), Paragraph { post_blank } => { diff --git a/src/parsers.rs b/src/parsers.rs index 3825338..38c4c87 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -20,34 +20,46 @@ use crate::elements::{ }; pub trait ElementArena<'a> { - fn append_element>>(&mut self, element: T, parent: NodeId) -> NodeId; - fn insert_before_last_child>>( - &mut self, - element: T, - parent: NodeId, - ) -> NodeId; + fn append(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>; + fn insert_before_last_child(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>; + fn set(&mut self, node: NodeId, element: T) + where + T: Into>; } impl<'a> ElementArena<'a> for Arena> { - fn append_element>>(&mut self, element: T, parent: NodeId) -> NodeId { + fn append(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>, + { let node = self.new_node(element.into()); parent.append(node, self); node } - fn insert_before_last_child>>( - &mut self, - element: T, - parent: NodeId, - ) -> NodeId { + fn insert_before_last_child(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>, + { if let Some(child) = self[parent].last_child() { let node = self.new_node(element.into()); child.insert_before(node, self); node } else { - self.append_element(element, parent) + self.append(element, parent) } } + + fn set(&mut self, node: NodeId, element: T) + where + T: Into>, + { + *self[node].get_mut() = element.into(); + } } pub struct OwnedArena<'a, 'b, 'c> { @@ -65,50 +77,39 @@ impl<'a, 'b, 'c> OwnedArena<'a, 'b, 'c> { } impl<'a> ElementArena<'a> for OwnedArena<'a, '_, '_> { - fn append_element>>(&mut self, element: T, parent: NodeId) -> NodeId { - let node = self.arena.new_node(element.into().into_owned()); - parent.append(node, self.arena); - node + fn append(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>, + { + self.arena.append(element.into().into_owned(), parent) } - fn insert_before_last_child>>( - &mut self, - element: T, - parent: NodeId, - ) -> NodeId { + fn insert_before_last_child(&mut self, element: T, parent: NodeId) -> NodeId + where + T: Into>, + { self.arena .insert_before_last_child(element.into().into_owned(), parent) } + + fn set(&mut self, node: NodeId, element: T) + where + T: Into>, + { + self.arena.set(node, element.into().into_owned()); + } } #[derive(Debug)] pub enum Container<'a> { - // List - List { - content: &'a str, - node: NodeId, - indent: usize, - }, // Block, List Item - Block { - content: &'a str, - node: NodeId, - }, + Block { content: &'a str, node: NodeId }, // Pargraph, Inline Markup - Inline { - content: &'a str, - node: NodeId, - }, + Inline { content: &'a str, node: NodeId }, // Headline - Headline { - content: &'a str, - node: NodeId, - }, + Headline { content: &'a str, node: NodeId }, // Document - Document { - content: &'a str, - node: NodeId, - }, + Document { content: &'a str, node: NodeId }, } pub fn parse_container<'a, T: ElementArena<'a>>( @@ -132,13 +133,6 @@ pub fn parse_container<'a, T: ElementArena<'a>>( Container::Inline { content, node } => { parse_inlines(arena, content, node, containers); } - Container::List { - content, - node, - indent, - } => { - parse_list_items(arena, content, indent, node, containers); - } } } } @@ -151,7 +145,7 @@ pub fn parse_headline_content<'a, T: ElementArena<'a>>( config: &ParseConfig, ) { let (tail, (title, content)) = Title::parse(content, config).unwrap(); - let node = arena.append_element(title, parent); + let node = arena.append(title, parent); containers.push(Container::Inline { content, node }); parse_section_and_headlines(arena, tail, parent, containers); } @@ -171,12 +165,12 @@ pub fn parse_section_and_headlines<'a, T: ElementArena<'a>>( for i in memchr_iter(b'\n', content.as_bytes()).chain(once(content.len())) { if let Some((mut tail, (headline_content, level))) = parse_headline(&content[last_end..]) { if last_end != 0 { - let node = arena.append_element(Element::Section, parent); + let node = arena.append(Element::Section, parent); let content = &content[0..last_end]; containers.push(Container::Block { content, node }); } - let node = arena.append_element(Element::Headline { level }, parent); + let node = arena.append(Element::Headline { level }, parent); containers.push(Container::Headline { content: headline_content, node, @@ -184,7 +178,7 @@ pub fn parse_section_and_headlines<'a, T: ElementArena<'a>>( while let Some((new_tail, (content, level))) = parse_headline(tail) { debug_assert_ne!(tail, new_tail); - let node = arena.append_element(Element::Headline { level }, parent); + let node = arena.append(Element::Headline { level }, parent); containers.push(Container::Headline { content, node }); tail = new_tail; } @@ -193,7 +187,7 @@ pub fn parse_section_and_headlines<'a, T: ElementArena<'a>>( last_end = i + 1; } - let node = arena.append_element(Element::Section, parent); + let node = arena.append(Element::Section, parent); containers.push(Container::Block { content, node }); } @@ -221,7 +215,7 @@ pub fn parse_blocks<'a, T: ElementArena<'a>>( debug_assert_ne!(tail, tail_); tail = tail_; - let node = arena.append_element( + let node = arena.append( Element::Paragraph { // including current line (&tail[0..i]) post_blank: blank + 1, @@ -259,7 +253,7 @@ pub fn parse_blocks<'a, T: ElementArena<'a>>( } if !text.is_empty() { - let node = arena.append_element(Element::Paragraph { post_blank: 0 }, parent); + let node = arena.append(Element::Paragraph { post_blank: 0 }, parent); containers.push(Container::Inline { content: &text[0..pos].trim_end(), @@ -276,19 +270,12 @@ pub fn parse_block<'a, T: ElementArena<'a>>( ) -> Option<&'a str> { // footnote definitions must be start at column 0 if let Some((tail, (fn_def, content))) = FnDef::parse(contents) { - let node = arena.append_element(fn_def, parent); + let node = arena.append(fn_def, parent); containers.push(Container::Block { content, node }); return Some(tail); } - if let Some((tail, list, content)) = List::parse(contents) { - let indent = list.indent; - let node = arena.append_element(list, parent); - containers.push(Container::List { - content, - node, - indent, - }); + if let Some(tail) = parse_list(arena, contents, parent, containers) { return Some(tail); } @@ -297,7 +284,7 @@ pub fn parse_block<'a, T: ElementArena<'a>>( match contents.as_bytes().get(0)? { b'C' => { let (tail, clock) = Clock::parse(contents)?; - arena.append_element(clock, parent); + arena.append(clock, parent); Some(tail) } b'\'' => { @@ -306,17 +293,17 @@ pub fn parse_block<'a, T: ElementArena<'a>>( } b'-' => { let (tail, rule) = Rule::parse(contents)?; - arena.append_element(rule, parent); + arena.append(rule, parent); Some(tail) } b':' => { if let Some((tail, (drawer, content))) = Drawer::parse(contents) { - let node = arena.append_element(drawer, parent); + let node = arena.append(drawer, parent); containers.push(Container::Block { content, node }); Some(tail) } else { let (tail, fixed_width) = FixedWidth::parse(contents)?; - arena.append_element(fixed_width, parent); + arena.append(fixed_width, parent); Some(tail) } } @@ -337,12 +324,12 @@ pub fn parse_block<'a, T: ElementArena<'a>>( ); Some(tail) } else if let Some((tail, (dyn_block, content))) = DynBlock::parse(contents) { - let node = arena.append_element(dyn_block, parent); + let node = arena.append(dyn_block, parent); containers.push(Container::Block { content, node }); Some(tail) } else if let Some((tail, (key, optional, value, blank))) = parse_keyword(contents) { if (&*key).eq_ignore_ascii_case("CALL") { - arena.append_element( + arena.append( BabelCall { value: value.into(), post_blank: blank, @@ -350,7 +337,7 @@ pub fn parse_block<'a, T: ElementArena<'a>>( parent, ); } else { - arena.append_element( + arena.append( Keyword { key: key.into(), optional: optional.map(Into::into), @@ -363,7 +350,7 @@ pub fn parse_block<'a, T: ElementArena<'a>>( Some(tail) } else { let (tail, comment) = Comment::parse(contents)?; - arena.append_element(comment, parent); + arena.append(comment, parent); Some(tail) } } @@ -383,7 +370,7 @@ pub fn match_block<'a, T: ElementArena<'a>>( match &*name.to_uppercase() { "CENTER" => { let (content, pre_blank) = blank_lines(content); - let node = arena.append_element( + let node = arena.append( CenterBlock { parameters, pre_blank, @@ -395,7 +382,7 @@ pub fn match_block<'a, T: ElementArena<'a>>( } "QUOTE" => { let (content, pre_blank) = blank_lines(content); - let node = arena.append_element( + let node = arena.append( QuoteBlock { parameters, pre_blank, @@ -407,7 +394,7 @@ pub fn match_block<'a, T: ElementArena<'a>>( } "VERSE" => { let (content, pre_blank) = blank_lines(content); - let node = arena.append_element( + let node = arena.append( VerseBlock { parameters, pre_blank, @@ -418,7 +405,7 @@ pub fn match_block<'a, T: ElementArena<'a>>( containers.push(Container::Block { content, node }); } "COMMENT" => { - arena.append_element( + arena.append( CommentBlock { data: parameters, contents: content.into(), @@ -428,7 +415,7 @@ pub fn match_block<'a, T: ElementArena<'a>>( ); } "EXAMPLE" => { - arena.append_element( + arena.append( ExampleBlock { data: parameters, contents: content.into(), @@ -438,7 +425,7 @@ pub fn match_block<'a, T: ElementArena<'a>>( ); } "EXPORT" => { - arena.append_element( + arena.append( ExportBlock { data: parameters.unwrap_or_default(), contents: content.into(), @@ -457,7 +444,7 @@ pub fn match_block<'a, T: ElementArena<'a>>( None => (Cow::Borrowed(""), Cow::Borrowed("")), _ => unreachable!("`parse_block_element` returns `Some(Cow::Borrowed)` or `None`"), }; - arena.append_element( + arena.append( SourceBlock { arguments, language, @@ -469,7 +456,7 @@ pub fn match_block<'a, T: ElementArena<'a>>( } _ => { let (content, pre_blank) = blank_lines(content); - let node = arena.append_element( + let node = arena.append( SpecialBlock { parameters, name, @@ -553,7 +540,7 @@ pub fn parse_inlines<'a, T: ElementArena<'a>>( } if !tail.is_empty() { - arena.append_element(Element::Text { value: tail.into() }, parent); + arena.append(Element::Text { value: tail.into() }, parent); } } @@ -570,109 +557,134 @@ pub fn parse_inline<'a, T: ElementArena<'a>>( match contents.as_bytes()[0] { b'@' => { let (tail, snippet) = Snippet::parse(contents)?; - arena.append_element(snippet, parent); + arena.append(snippet, parent); Some(tail) } b'{' => { let (tail, macros) = Macros::parse(contents)?; - arena.append_element(macros, parent); + arena.append(macros, parent); Some(tail) } b'<' => { if let Some((tail, _content)) = parse_radio_target(contents) { - arena.append_element(Element::RadioTarget, parent); + arena.append(Element::RadioTarget, parent); Some(tail) } else if let Some((tail, target)) = Target::parse(contents) { - arena.append_element(target, parent); + arena.append(target, parent); Some(tail) } else if let Some((tail, timestamp)) = Timestamp::parse_active(contents) { - arena.append_element(timestamp, parent); + arena.append(timestamp, parent); Some(tail) } else { let (tail, timestamp) = Timestamp::parse_diary(contents)?; - arena.append_element(timestamp, parent); + arena.append(timestamp, parent); Some(tail) } } b'[' => { if let Some((tail, fn_ref)) = FnRef::parse(contents) { - arena.append_element(fn_ref, parent); + arena.append(fn_ref, parent); Some(tail) } else if let Some((tail, link)) = Link::parse(contents) { - arena.append_element(link, parent); + arena.append(link, parent); Some(tail) } else if let Some((tail, cookie)) = Cookie::parse(contents) { - arena.append_element(cookie, parent); + arena.append(cookie, parent); Some(tail) } else { let (tail, timestamp) = Timestamp::parse_inactive(contents)?; - arena.append_element(timestamp, parent); + arena.append(timestamp, parent); Some(tail) } } b'*' => { let (tail, content) = parse_emphasis(contents, b'*')?; - let node = arena.append_element(Element::Bold, parent); + let node = arena.append(Element::Bold, parent); containers.push(Container::Inline { content, node }); Some(tail) } b'+' => { let (tail, content) = parse_emphasis(contents, b'+')?; - let node = arena.append_element(Element::Strike, parent); + let node = arena.append(Element::Strike, parent); containers.push(Container::Inline { content, node }); Some(tail) } b'/' => { let (tail, content) = parse_emphasis(contents, b'/')?; - let node = arena.append_element(Element::Italic, parent); + let node = arena.append(Element::Italic, parent); containers.push(Container::Inline { content, node }); Some(tail) } b'_' => { let (tail, content) = parse_emphasis(contents, b'_')?; - let node = arena.append_element(Element::Underline, parent); + let node = arena.append(Element::Underline, parent); containers.push(Container::Inline { content, node }); Some(tail) } b'=' => { let (tail, value) = parse_emphasis(contents, b'=')?; let value = value.into(); - arena.append_element(Element::Verbatim { value }, parent); + arena.append(Element::Verbatim { value }, parent); Some(tail) } b'~' => { let (tail, value) = parse_emphasis(contents, b'~')?; let value = value.into(); - arena.append_element(Element::Code { value }, parent); + arena.append(Element::Code { value }, parent); Some(tail) } b's' => { let (tail, inline_src) = InlineSrc::parse(contents)?; - arena.append_element(inline_src, parent); + arena.append(inline_src, parent); Some(tail) } b'c' => { let (tail, inline_call) = InlineCall::parse(contents)?; - arena.append_element(inline_call, parent); + arena.append(inline_call, parent); Some(tail) } _ => None, } } -pub fn parse_list_items<'a, T: ElementArena<'a>>( +pub fn parse_list<'a, T: ElementArena<'a>>( arena: &mut T, - mut contents: &'a str, - indent: usize, + contents: &'a str, parent: NodeId, containers: &mut Vec>, -) { - while !contents.is_empty() { - let (tail, list_item, content) = ListItem::parse(contents, indent); - let node = arena.append_element(list_item, parent); - containers.push(Container::Block { content, node }); - contents = tail; +) -> Option<&'a str> { + let (mut tail, (first_item, content)) = ListItem::parse(contents)?; + let first_item_indent = first_item.indent; + let first_item_ordered = first_item.ordered; + + let parent = arena.append(Element::Document { pre_blank: 0 }, parent); // placeholder + + let node = arena.append(first_item, parent); + containers.push(Container::Block { content, node }); + + while let Some((tail_, (item, content))) = ListItem::parse(tail) { + if item.indent == first_item_indent { + let node = arena.append(item, parent); + containers.push(Container::Block { content, node }); + debug_assert_ne!(tail, tail_); + tail = tail_; + } else { + break; + } } + + let (tail, blank) = blank_lines(tail); + + arena.set( + parent, + List { + indent: first_item_indent, + ordered: first_item_ordered, + post_blank: blank, + }, + ); + + Some(tail) } pub fn parse_table<'a, T: ElementArena<'a>>( @@ -682,16 +694,16 @@ pub fn parse_table<'a, T: ElementArena<'a>>( parent: NodeId, ) -> Option<&'a str> { if contents.trim_start().starts_with('|') { - let table_node = arena.append_element(Table::Org { tblfm: None }, parent); + let table_node = arena.append(Table::Org { tblfm: None }, parent); let mut last_end = 0; for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) { let line = contents[last_end..start].trim(); match TableRow::parse(line) { Some(TableRow::Standard) => { - let row_node = arena.append_element(TableRow::Standard, table_node); + let row_node = arena.append(TableRow::Standard, table_node); for cell in line[1..].split_terminator('|') { - let cell_node = arena.append_element(Element::TableCell, row_node); + let cell_node = arena.append(Element::TableCell, row_node); containers.push(Container::Inline { content: cell.trim(), node: cell_node, @@ -699,7 +711,7 @@ pub fn parse_table<'a, T: ElementArena<'a>>( } } Some(TableRow::Rule) => { - arena.append_element(TableRow::Rule, table_node); + arena.append(TableRow::Rule, table_node); } None => return Some(&contents[last_end..]), } @@ -710,7 +722,7 @@ pub fn parse_table<'a, T: ElementArena<'a>>( } else { let (tail, value) = parse_table_el(contents)?; let value = value.into(); - arena.append_element(Table::TableEl { value }, parent); + arena.append(Table::TableEl { value }, parent); Some(tail) } diff --git a/tests/blank.rs b/tests/blank.rs index b36f492..288e26a 100644 --- a/tests/blank.rs +++ b/tests/blank.rs @@ -51,6 +51,24 @@ COMMENT #+BEGIN_EXAMPLE #+END_EXAMPLE + 1. 1 + +2. 2 + + 3. 3 + + + 1 + + + 2 + + - 3 + + - 4 + + + 5 + + + "#; #[test]