From 33f78ee207e4b59be25986085cc3921ed0bdc626 Mon Sep 17 00:00:00 2001 From: PoiScript Date: Thu, 27 Jun 2019 00:03:59 +0800 Subject: [PATCH] feat: Iter struct --- src/elements/list.rs | 2 + src/elements/mod.rs | 1 + src/iter.rs | 428 +++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/org.rs | 35 +++- src/serde.rs | 6 +- 6 files changed, 467 insertions(+), 6 deletions(-) create mode 100644 src/iter.rs diff --git a/src/elements/list.rs b/src/elements/list.rs index f3c8c20..f4ef18a 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -60,6 +60,8 @@ impl List { } } +#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug)] pub struct ListItem<'a> { pub bullet: &'a str, } diff --git a/src/elements/mod.rs b/src/elements/mod.rs index a366e97..df02608 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -47,6 +47,7 @@ pub use self::{ #[derive(Debug)] pub enum Element<'a> { + Root, Block { block: Block<'a>, begin: usize, diff --git a/src/iter.rs b/src/iter.rs new file mode 100644 index 0000000..7e9eb62 --- /dev/null +++ b/src/iter.rs @@ -0,0 +1,428 @@ +use indextree::{Arena, NodeId}; + +use crate::elements::*; + +#[derive(Debug)] +pub enum Container<'a> { + Block(&'a Block<'a>), + Bold, + Document, + DynBlock(&'a DynBlock<'a>), + Headline(&'a Headline<'a>), + Italic, + List(&'a List), + ListItem(&'a ListItem<'a>), + Paragraph, + Section, + Strike, + Underline, +} + +#[derive(Debug)] +pub enum Event<'a> { + Start(Container<'a>), + End(Container<'a>), + Clock(&'a Clock<'a>), + Cookie(&'a Cookie<'a>), + Drawer(&'a Drawer<'a>), + FnDef(&'a FnDef<'a>), + FnRef(&'a FnRef<'a>), + InlineCall(&'a InlineCall<'a>), + InlineSrc(&'a InlineSrc<'a>), + Keyword(&'a Keyword<'a>), + Link(&'a Link<'a>), + Macros(&'a Macros<'a>), + Planning(&'a Planning<'a>), + RadioTarget(&'a RadioTarget<'a>), + Rule, + Snippet(&'a Snippet<'a>), + Target(&'a Target<'a>), + Timestamp(&'a Timestamp<'a>), + Text(&'a str), + Code(&'a str), + Verbatim(&'a str), + BabelCall(&'a str), +} + +enum State { + Start, + End, + Empty, + Finished, +} + +pub struct Iter<'a> { + arena: &'a Arena>, + node: NodeId, + state: State, +} + +impl<'a> Iter<'a> { + pub(crate) fn new(arena: &'a Arena>, node: NodeId) -> Self { + Iter { + arena, + node, + state: State::Start, + } + } + + fn start_event(&mut self) -> Option> { + let node = &self.arena[self.node]; + match &node.data { + Element::Root => { + self.state = State::Finished; + None + } + Element::BabelCall { value, .. } => { + self.state = State::Start; + Some(Event::BabelCall(value)) + } + Element::Verbatim { value, .. } => { + self.state = State::Start; + Some(Event::Verbatim(value)) + } + Element::Code { value, .. } => { + self.state = State::Start; + Some(Event::Code(value)) + } + Element::Text { value, .. } => { + self.state = State::Start; + Some(Event::Text(value)) + } + Element::Block { block, .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::Block(block))) + } + Element::Bold { .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::Bold)) + } + Element::Document { .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + + Some(Event::Start(Container::Document)) + } + Element::DynBlock { dyn_block, .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::DynBlock(dyn_block))) + } + Element::Headline { headline, .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::Headline(headline))) + } + Element::Italic { .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::Italic)) + } + Element::List { list, .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::List(list))) + } + Element::ListItem { list_item, .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::ListItem(list_item))) + } + Element::Paragraph { .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::Paragraph)) + } + Element::Section { .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::Section)) + } + Element::Strike { .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::Strike)) + } + Element::Underline { .. } => { + if node.first_child().is_none() { + self.state = State::Empty; + } else { + self.state = State::Start; + } + Some(Event::Start(Container::Underline)) + } + Element::Clock { clock, .. } => { + self.state = State::Start; + Some(Event::Clock(clock)) + } + Element::Cookie { cookie, .. } => { + self.state = State::Start; + Some(Event::Cookie(cookie)) + } + Element::Drawer { drawer, .. } => { + self.state = State::Start; + Some(Event::Drawer(drawer)) + } + Element::FnDef { fn_def, .. } => { + self.state = State::Start; + Some(Event::FnDef(fn_def)) + } + Element::FnRef { fn_ref, .. } => { + self.state = State::Start; + Some(Event::FnRef(fn_ref)) + } + Element::InlineCall { inline_call, .. } => { + self.state = State::Start; + Some(Event::InlineCall(inline_call)) + } + Element::InlineSrc { inline_src, .. } => { + self.state = State::Start; + Some(Event::InlineSrc(inline_src)) + } + Element::Keyword { keyword, .. } => { + self.state = State::Start; + Some(Event::Keyword(keyword)) + } + Element::Link { link, .. } => { + self.state = State::Start; + Some(Event::Link(link)) + } + Element::Macros { macros, .. } => { + self.state = State::Start; + Some(Event::Macros(macros)) + } + Element::Planning(planning) => { + self.state = State::Start; + Some(Event::Planning(planning)) + } + Element::RadioTarget { radio_target, .. } => { + self.state = State::Start; + Some(Event::RadioTarget(radio_target)) + } + Element::Rule { .. } => { + self.state = State::Start; + Some(Event::Rule) + } + Element::Snippet { snippet, .. } => { + self.state = State::Start; + Some(Event::Snippet(snippet)) + } + Element::Target { target, .. } => { + self.state = State::Start; + Some(Event::Target(target)) + } + Element::Timestamp { timestamp, .. } => { + self.state = State::Start; + Some(Event::Timestamp(timestamp)) + } + } + } + + fn end_event(&mut self) -> Option> { + let node = &self.arena[self.node]; + match &node.data { + Element::Root => { + self.state = State::Finished; + None + } + Element::BabelCall { value, .. } => { + self.state = State::End; + Some(Event::BabelCall(value)) + } + Element::Verbatim { value, .. } => { + self.state = State::End; + Some(Event::Verbatim(value)) + } + Element::Code { value, .. } => { + self.state = State::End; + Some(Event::Code(value)) + } + Element::Text { value, .. } => { + self.state = State::End; + Some(Event::Text(value)) + } + Element::Block { block, .. } => { + self.state = State::End; + Some(Event::End(Container::Block(block))) + } + Element::Bold { .. } => { + self.state = State::End; + Some(Event::End(Container::Bold)) + } + Element::Document { .. } => { + self.state = State::End; + Some(Event::End(Container::Document)) + } + Element::DynBlock { dyn_block, .. } => { + self.state = State::End; + Some(Event::End(Container::DynBlock(dyn_block))) + } + Element::Headline { headline, .. } => { + self.state = State::End; + Some(Event::End(Container::Headline(headline))) + } + Element::Italic { .. } => { + self.state = State::End; + Some(Event::End(Container::Italic)) + } + Element::List { list, .. } => { + self.state = State::End; + Some(Event::End(Container::List(list))) + } + Element::ListItem { list_item, .. } => { + self.state = State::End; + Some(Event::End(Container::ListItem(list_item))) + } + Element::Paragraph { .. } => { + self.state = State::End; + Some(Event::End(Container::Paragraph)) + } + Element::Section { .. } => { + self.state = State::End; + Some(Event::End(Container::Section)) + } + Element::Strike { .. } => { + self.state = State::End; + Some(Event::End(Container::Strike)) + } + Element::Underline { .. } => { + self.state = State::End; + Some(Event::End(Container::Underline)) + } + Element::Clock { clock, .. } => { + self.state = State::End; + Some(Event::Clock(clock)) + } + Element::Cookie { cookie, .. } => { + self.state = State::End; + Some(Event::Cookie(cookie)) + } + Element::Drawer { drawer, .. } => { + self.state = State::End; + Some(Event::Drawer(drawer)) + } + Element::FnDef { fn_def, .. } => { + self.state = State::End; + Some(Event::FnDef(fn_def)) + } + Element::FnRef { fn_ref, .. } => { + self.state = State::End; + Some(Event::FnRef(fn_ref)) + } + Element::InlineCall { inline_call, .. } => { + self.state = State::End; + Some(Event::InlineCall(inline_call)) + } + Element::InlineSrc { inline_src, .. } => { + self.state = State::End; + Some(Event::InlineSrc(inline_src)) + } + Element::Keyword { keyword, .. } => { + self.state = State::End; + Some(Event::Keyword(keyword)) + } + Element::Link { link, .. } => { + self.state = State::End; + Some(Event::Link(link)) + } + Element::Macros { macros, .. } => { + self.state = State::End; + Some(Event::Macros(macros)) + } + Element::Planning(planning) => { + self.state = State::End; + Some(Event::Planning(planning)) + } + Element::RadioTarget { radio_target, .. } => { + self.state = State::End; + Some(Event::RadioTarget(radio_target)) + } + Element::Rule { .. } => { + self.state = State::End; + Some(Event::Rule) + } + Element::Snippet { snippet, .. } => { + self.state = State::End; + Some(Event::Snippet(snippet)) + } + Element::Target { target, .. } => { + self.state = State::End; + Some(Event::Target(target)) + } + Element::Timestamp { timestamp, .. } => { + self.state = State::End; + Some(Event::Timestamp(timestamp)) + } + } + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = Event<'a>; + + fn next(&mut self) -> Option { + match self.state { + State::Finished => None, + State::End => { + let node = &self.arena[self.node]; + if let Some(sibling_node) = node.next_sibling() { + self.node = sibling_node; + self.start_event() + } else if let Some(parent_node) = node.parent() { + self.node = parent_node; + self.end_event() + } else { + None + } + } + State::Start => { + let node = &self.arena[self.node]; + if let Some(child_node) = node.first_child() { + self.node = child_node; + self.start_event() + } else if let Some(parent_node) = node.parent() { + self.node = parent_node; + self.end_event() + } else { + None + } + } + State::Empty => self.end_event(), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 1812d2c..213dedb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -140,6 +140,7 @@ //! ``` pub mod elements; +pub mod iter; pub mod org; #[cfg(feature = "serde")] mod serde; diff --git a/src/org.rs b/src/org.rs index 58a3ece..74e8335 100644 --- a/src/org.rs +++ b/src/org.rs @@ -1,4 +1,5 @@ use crate::elements::*; +use crate::iter::Iter; use indextree::{Arena, NodeId}; use jetscii::bytes; @@ -6,23 +7,39 @@ use memchr::{memchr_iter, memrchr_iter}; pub struct Org<'a> { pub(crate) arena: Arena>, - pub(crate) root: NodeId, + pub(crate) document: NodeId, + root: Option, text: &'a str, } impl<'a> Org<'a> { pub fn new(text: &'a str) -> Self { let mut arena = Arena::new(); - let root = arena.new_node(Element::Document { + let document = arena.new_node(Element::Document { begin: 0, end: text.len(), }); - Org { arena, root, text } + Org { + arena, + root: None, + document, + text, + } } pub fn finish(&self) -> bool { - self.arena[self.root].first_child().is_some() + self.arena[self.document].first_child().is_some() + } + + pub fn iter(&'a mut self) -> Iter<'a> { + if let Some(root) = self.root { + Iter::new(&self.arena, root) + } else { + let root = self.arena.new_node(Element::Root); + root.append(self.document, &mut self.arena).unwrap(); + Iter::new(&self.arena, root) + } } pub fn parse(&mut self) { @@ -30,7 +47,7 @@ impl<'a> Org<'a> { return; } - let mut node = self.root; + let mut node = self.document; loop { match self.arena[node].data { Element::Document { begin, end, .. } @@ -152,6 +169,14 @@ impl<'a> Org<'a> { fn parse_elements_children(&mut self, mut begin: usize, end: usize, node: NodeId) { 'out: while begin < end { let text = &self.text[begin..end]; + + if let Some((ty, off)) = self.parse_element(begin, end) { + let new_node = self.arena.new_node(ty); + node.append(new_node, &mut self.arena).unwrap(); + begin += off; + continue 'out; + } + let mut pos = 0; for i in memchr_iter(b'\n', text.as_bytes()) { if text.as_bytes()[pos..i].iter().all(u8::is_ascii_whitespace) { diff --git a/src/serde.rs b/src/serde.rs index 7b7bf6f..19131fe 100644 --- a/src/serde.rs +++ b/src/serde.rs @@ -10,7 +10,7 @@ impl Serialize for Org<'_> { serializer.serialize_newtype_struct( "Element", &ElementNode { - node: self.root, + node: self.document, arena: &self.arena, }, ) @@ -27,6 +27,10 @@ impl Serialize for ElementNode<'_> { fn serialize(&self, serializer: S) -> Result { let mut state; match &self.arena[self.node].data { + Element::Root => { + state = serializer.serialize_struct("Element::Root", 2)?; + state.serialize_field("type", "root")?; + } Element::Document { begin, end } => { state = serializer.serialize_struct("Element::Document", 2)?; state.serialize_field("type", "document")?;