From b8265814aa5bdbecea678f67c4fecf880db32cfc Mon Sep 17 00:00:00 2001 From: PoiScript Date: Tue, 5 Nov 2019 19:37:58 +0800 Subject: [PATCH] feat(parsers): update table parsing --- src/elements/mod.rs | 47 ++++++++-------- src/elements/table.rs | 53 ++++++++++++++++-- src/export/html.rs | 40 +++++++++++--- src/export/org.rs | 4 +- src/parsers.rs | 63 +++++++++++++++++----- src/validate.rs | 123 +++++++++++++++++++++++++----------------- 6 files changed, 232 insertions(+), 98 deletions(-) diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 6d6fdd9..ff99f76 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -47,7 +47,7 @@ pub use self::{ planning::Planning, rule::Rule, snippet::Snippet, - table::{Table, TableRow}, + table::{Table, TableCell, TableRow}, target::Target, timestamp::{Datetime, Timestamp}, title::Title, @@ -103,33 +103,32 @@ pub enum Element<'a> { Title(Title<'a>), Table(Table<'a>), TableRow(TableRow), - TableCell, + TableCell(TableCell), } impl Element<'_> { pub fn is_container(&self) -> bool { - use Element::*; - match self { - SpecialBlock(_) - | QuoteBlock(_) - | CenterBlock(_) - | VerseBlock(_) - | Bold - | Document { .. } - | DynBlock(_) - | Headline { .. } - | Italic - | List(_) - | ListItem(_) - | Paragraph { .. } - | Section - | Strike - | Underline - | Title(_) - | Table(_) - | TableRow(_) - | TableCell => true, + Element::SpecialBlock(_) + | Element::QuoteBlock(_) + | Element::CenterBlock(_) + | Element::VerseBlock(_) + | Element::Bold + | Element::Document { .. } + | Element::DynBlock(_) + | Element::Headline { .. } + | Element::Italic + | Element::List(_) + | Element::ListItem(_) + | Element::Paragraph { .. } + | Element::Section + | Element::Strike + | Element::Underline + | Element::Title(_) + | Element::Table(_) + | Element::TableRow(TableRow::Header) + | Element::TableRow(TableRow::Body) + | Element::TableCell(_) => true, _ => false, } } @@ -187,7 +186,7 @@ impl Element<'_> { Title(e) => Title(e.into_owned()), Table(e) => Table(e.into_owned()), TableRow(e) => TableRow(e), - TableCell => TableCell, + TableCell(e) => TableCell(e), } } } diff --git a/src/elements/table.rs b/src/elements/table.rs index 5fe8878..cc6b04a 100644 --- a/src/elements/table.rs +++ b/src/elements/table.rs @@ -18,6 +18,7 @@ pub enum Table<'a> { /// Numbers of blank lines between last table's line and next non-blank /// line or buffer's end post_blank: usize, + has_header: bool, }, /// "table.el" type table #[cfg_attr(feature = "ser", serde(rename = "table.el"))] @@ -63,9 +64,14 @@ impl Table<'_> { pub fn into_owned(self) -> Table<'static> { match self { - Table::Org { tblfm, post_blank } => Table::Org { + Table::Org { + tblfm, + post_blank, + has_header, + } => Table::Org { tblfm: tblfm.map(Into::into).map(Cow::Owned), post_blank: post_blank, + has_header: has_header, }, Table::TableEl { value, post_blank } => Table::TableEl { value: value.into_owned().into(), @@ -76,14 +82,55 @@ impl Table<'_> { } /// Table Row Elemenet +/// +/// # Syntax +/// +/// ```text +/// | 0 | 1 | 2 | <- TableRow::Body +/// | 0 | 1 | 2 | <- TableRow::Body +/// ``` +/// +/// ```text +/// |-----+-----+-----| <- ignores +/// | 0 | 1 | 2 | <- TableRow::Header +/// | 0 | 1 | 2 | <- TableRow::Header +/// |-----+-----+-----| <- TableRow::HeaderRule +/// | 0 | 1 | 2 | <- TableRow::Body +/// |-----+-----+-----| <- TableRow::BodyRule +/// | 0 | 1 | 2 | <- TableRow::Body +/// |-----+-----+-----| <- TableRow::BodyRule +/// |-----+-----+-----| <- TableRow::BodyRule +/// | 0 | 1 | 2 | <- TableRow::Body +/// |-----+-----+-----| <- ignores +/// ``` +/// #[derive(Debug)] #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] #[cfg_attr(feature = "ser", serde(tag = "table_row_type"))] #[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))] pub enum TableRow { - Standard, - Rule, + /// This row is part of table header + Header, + /// This row is part of table body + Body, + /// This row is between table header and body + HeaderRule, + /// This row is between table body and next body + BodyRule, +} + +/// Table Cell Elemenet +#[derive(Debug)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "ser", derive(serde::Serialize))] +#[cfg_attr(feature = "ser", serde(tag = "table_cell_type"))] +#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))] +pub enum TableCell { + /// Header cell + Header, + /// Body cell, or standard cell + Body, } #[test] diff --git a/src/export/html.rs b/src/export/html.rs index 8d47026..966196a 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -3,7 +3,7 @@ use std::io::{Error, Write}; use jetscii::{bytes, BytesConst}; -use crate::elements::{Element, Timestamp}; +use crate::elements::{Element, Table, TableCell, TableRow, Timestamp}; use crate::export::write_datetime; /// A wrapper for escaping sensitive characters in html. @@ -169,11 +169,27 @@ pub trait HtmlHandler>: Default { Element::Rule(_) => write!(w, "
")?, Element::Cookie(cookie) => write!(w, "{}", cookie.value)?, Element::Title(title) => { - write!(w, "", if title.level <= 6 { title.level } else { 6 })? + write!(w, "", if title.level <= 6 { title.level } else { 6 })?; } - Element::Table(_) => (), - Element::TableRow(_) => (), - Element::TableCell => (), + Element::Table(Table::TableEl { .. }) => (), + Element::Table(Table::Org { has_header, .. }) => { + write!(w, "")?; + if *has_header { + write!(w, "")?; + } else { + write!(w, "")?; + } + } + Element::TableRow(row) => match row { + TableRow::Body => write!(w, "")?, + TableRow::BodyRule => write!(w, "")?, + TableRow::Header => write!(w, "")?, + TableRow::HeaderRule => write!(w, "")?, + }, + Element::TableCell(cell) => match cell { + TableCell::Body => write!(w, "
")?, + TableCell::Header => write!(w, "")?, + }, } Ok(()) @@ -206,9 +222,17 @@ pub trait HtmlHandler>: Default { Element::Title(title) => { write!(w, "", if title.level <= 6 { title.level } else { 6 })? } - Element::Table(_) => (), - Element::TableRow(_) => (), - Element::TableCell => (), + Element::Table(Table::TableEl { .. }) => (), + Element::Table(Table::Org { .. }) => { + write!(w, "
")?; + } + Element::TableRow(TableRow::Body) | Element::TableRow(TableRow::Header) => { + write!(w, "")?; + } + Element::TableCell(cell) => match cell { + TableCell::Body => write!(w, "")?, + TableCell::Header => write!(w, "")?, + }, // non-container elements _ => debug_assert!(!element.is_container()), } diff --git a/src/export/org.rs b/src/export/org.rs index cb9fcfb..ba8b360 100644 --- a/src/export/org.rs +++ b/src/export/org.rs @@ -183,7 +183,7 @@ pub trait OrgHandler>: Default { } Element::Table(_) => (), Element::TableRow(_) => (), - Element::TableCell => (), + Element::TableCell(_) => (), } Ok(()) @@ -275,7 +275,7 @@ pub trait OrgHandler>: Default { write_blank_lines(w, *post_blank)?; } Element::TableRow(_) => (), - Element::TableCell => (), + Element::TableCell(_) => (), // non-container elements _ => debug_assert!(!element.is_container()), } diff --git a/src/parsers.rs b/src/parsers.rs index 5f9d1fe..a3e737e 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -13,7 +13,7 @@ use crate::elements::{ radio_target::parse_radio_target, BabelCall, CenterBlock, Clock, Comment, CommentBlock, Cookie, Drawer, DynBlock, Element, ExampleBlock, ExportBlock, FixedWidth, FnDef, FnRef, InlineCall, InlineSrc, Keyword, Link, List, ListItem, Macros, QuoteBlock, Rule, Snippet, SourceBlock, - SpecialBlock, Table, TableRow, Target, Timestamp, Title, VerseBlock, + SpecialBlock, Table, TableCell, TableRow, Target, Timestamp, Title, VerseBlock, }; pub trait ElementArena<'a> { @@ -707,28 +707,67 @@ pub fn parse_org_table<'a, T: ElementArena<'a>>( let (tail, contents) = take_lines_while(|line| line.trim_start().starts_with('|'))(contents); let (tail, blank) = blank_lines(tail); + let mut iter = contents.trim_end().lines().peekable(); + + let mut lines = vec![]; + + let mut has_header = false; + + if let Some(line) = iter.next() { + let line = line.trim_start(); + if !line.starts_with("|-") { + lines.push(line); + } + } + + while let Some(line) = iter.next() { + let line = line.trim_start(); + if iter.peek().is_none() && line.starts_with("|-") { + break; + } else if line.starts_with("|-") { + has_header = true; + } + lines.push(line); + } + let parent = arena.append( Table::Org { tblfm: None, post_blank: blank, + has_header, }, parent, ); - let mut last_end = 0; - for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) { - let line = contents[last_end..start].trim_start(); + for line in lines { if line.starts_with("|-") { - arena.append(TableRow::Rule, parent); - } else if !line.is_empty() { - // ignores trailing newline - let parent = arena.append(TableRow::Standard, parent); - for content in line.split_terminator('|').skip(1) { - let node = arena.append(Element::TableCell, parent); - containers.push(Container::Inline { content, node }); + if has_header { + arena.append(Element::TableRow(TableRow::HeaderRule), parent); + has_header = false; + } else { + arena.append(Element::TableRow(TableRow::BodyRule), parent); + } + } else { + if has_header { + let parent = arena.append(Element::TableRow(TableRow::Header), parent); + for content in line.split_terminator('|').skip(1) { + let node = arena.append(Element::TableCell(TableCell::Header), parent); + containers.push(Container::Inline { + content: content.trim(), + node, + }); + } + } else { + let parent = arena.append(Element::TableRow(TableRow::Body), parent); + for content in line.split_terminator('|').skip(1) { + let node = arena.append(Element::TableCell(TableCell::Body), parent); + containers.push(Container::Inline { + content: content.trim(), + node, + }); + } } } - last_end = start + 1; } tail diff --git a/src/validate.rs b/src/validate.rs index 044cd00..f3b46b2 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,7 +1,7 @@ use indextree::NodeId; use std::ops::RangeInclusive; -use crate::elements::{Element, Table, TableRow}; +use crate::elements::{Element, Table, TableCell, TableRow}; use crate::Org; /// Validation Error @@ -49,7 +49,7 @@ impl Org<'_> { pub fn validate(&self) -> Vec { let mut errors = Vec::new(); - macro_rules! expect { + macro_rules! expect_element { ($node:ident, $expect:expr, $($pattern:pat)|+) => { match self[$node] { $($pattern)|+ => (), @@ -61,48 +61,57 @@ impl Org<'_> { }; } + macro_rules! expect_children { + ($node:ident) => { + if self.arena[$node].first_child().is_none() { + errors.push(ValidationError::ExpectedChildren { at: $node }); + } + }; + } + for node_id in self.root.descendants(&self.arena) { let node = &self.arena[node_id]; match node.get() { Element::Document { .. } => { let mut children = node_id.children(&self.arena); - if let Some(node) = children.next() { - expect!( - node, - "Headline,Section", + if let Some(child) = children.next() { + expect_element!( + child, + "Headline|Section", Element::Headline { .. } | Element::Section ); } - for node in children { - expect!( - node, + + for child in children { + expect_element!( + child, "Headline", Element::Headline { .. } ); } } Element::Headline { .. } => { - if node.first_child().is_some() { - let mut children = node_id.children(&self.arena); - if let Some(node) = children.next() { - expect!(node, "Title", Element::Title(_)); - } - if let Some(node) = children.next() { - expect!( - node, - "Headline,Section", - Element::Headline { .. } | Element::Section - ); - } - for node in children { - expect!( - node, - "Headline", - Element::Headline { .. } - ); - } - } else { - errors.push(ValidationError::ExpectedChildren { at: node_id }); + expect_children!(node_id); + + let mut children = node_id.children(&self.arena); + if let Some(child) = children.next() { + expect_element!(child, "Title", Element::Title(_)); + } + + if let Some(child) = children.next() { + expect_element!( + child, + "Headline|Section", + Element::Headline { .. } | Element::Section + ); + } + + for child in children { + expect_element!( + child, + "Headline", + Element::Headline { .. } + ); } } Element::Title(title) => { @@ -110,6 +119,37 @@ impl Org<'_> { errors.push(ValidationError::ExpectedChildren { at: node_id }); } } + Element::List(_) => { + expect_children!(node_id); + for child in node_id.children(&self.arena) { + expect_element!(child, "ListItem", Element::ListItem(_)); + } + } + Element::Table(Table::Org { .. }) => { + for child in node_id.children(&self.arena) { + expect_element!(child, "TableRow", Element::TableRow(_)); + } + } + Element::TableRow(TableRow::Header) => { + expect_children!(node_id); + for child in node_id.children(&self.arena) { + expect_element!( + child, + "TableCell::Header", + Element::TableCell(TableCell::Header) + ); + } + } + Element::TableRow(TableRow::Body) => { + expect_children!(node_id); + for child in node_id.children(&self.arena) { + expect_element!( + child, + "TableCell::Body", + Element::TableCell(TableCell::Body) + ); + } + } Element::CommentBlock(_) | Element::ExampleBlock(_) | Element::ExportBlock(_) @@ -134,42 +174,27 @@ impl Org<'_> { | Element::Keyword(_) | Element::Rule(_) | Element::Cookie(_) - | Element::Table(Table::TableEl { .. }) - | Element::TableRow(TableRow::Rule) => { + | Element::TableRow(TableRow::BodyRule) + | Element::TableRow(TableRow::HeaderRule) => { if node.first_child().is_some() { errors.push(ValidationError::UnexpectedChildren { at: node_id }); } } - Element::List(_) => { - if node.first_child().is_some() { - for node in node_id.children(&self.arena) { - expect!(node, "ListItem", Element::ListItem(_)); - } - } else { - errors.push(ValidationError::ExpectedChildren { at: node_id }); - } - } Element::SpecialBlock(_) | Element::QuoteBlock(_) | Element::CenterBlock(_) | Element::VerseBlock(_) | Element::Paragraph { .. } | Element::Section - | Element::Table(Table::Org { .. }) - | Element::TableRow(TableRow::Standard) | Element::Bold | Element::Italic | Element::Underline | Element::Strike | Element::DynBlock(_) | Element::ListItem(_) => { - if node.first_child().is_none() { - errors.push(ValidationError::ExpectedChildren { at: node_id }); - } + expect_children!(node_id); } - // TableCell is a container but it might - // not contains anything, e.g. `||||||` - Element::Drawer(_) | Element::TableCell => (), + Element::Drawer(_) | Element::TableCell(_) | Element::Table(_) => (), } } errors