feat(parsers): update table parsing

This commit is contained in:
PoiScript 2019-11-05 19:37:58 +08:00
parent 5d5fc58027
commit b8265814aa
6 changed files with 232 additions and 98 deletions

View file

@ -47,7 +47,7 @@ pub use self::{
planning::Planning, planning::Planning,
rule::Rule, rule::Rule,
snippet::Snippet, snippet::Snippet,
table::{Table, TableRow}, table::{Table, TableCell, TableRow},
target::Target, target::Target,
timestamp::{Datetime, Timestamp}, timestamp::{Datetime, Timestamp},
title::Title, title::Title,
@ -103,33 +103,32 @@ pub enum Element<'a> {
Title(Title<'a>), Title(Title<'a>),
Table(Table<'a>), Table(Table<'a>),
TableRow(TableRow), TableRow(TableRow),
TableCell, TableCell(TableCell),
} }
impl Element<'_> { impl Element<'_> {
pub fn is_container(&self) -> bool { pub fn is_container(&self) -> bool {
use Element::*;
match self { match self {
SpecialBlock(_) Element::SpecialBlock(_)
| QuoteBlock(_) | Element::QuoteBlock(_)
| CenterBlock(_) | Element::CenterBlock(_)
| VerseBlock(_) | Element::VerseBlock(_)
| Bold | Element::Bold
| Document { .. } | Element::Document { .. }
| DynBlock(_) | Element::DynBlock(_)
| Headline { .. } | Element::Headline { .. }
| Italic | Element::Italic
| List(_) | Element::List(_)
| ListItem(_) | Element::ListItem(_)
| Paragraph { .. } | Element::Paragraph { .. }
| Section | Element::Section
| Strike | Element::Strike
| Underline | Element::Underline
| Title(_) | Element::Title(_)
| Table(_) | Element::Table(_)
| TableRow(_) | Element::TableRow(TableRow::Header)
| TableCell => true, | Element::TableRow(TableRow::Body)
| Element::TableCell(_) => true,
_ => false, _ => false,
} }
} }
@ -187,7 +186,7 @@ impl Element<'_> {
Title(e) => Title(e.into_owned()), Title(e) => Title(e.into_owned()),
Table(e) => Table(e.into_owned()), Table(e) => Table(e.into_owned()),
TableRow(e) => TableRow(e), TableRow(e) => TableRow(e),
TableCell => TableCell, TableCell(e) => TableCell(e),
} }
} }
} }

View file

@ -18,6 +18,7 @@ pub enum Table<'a> {
/// Numbers of blank lines between last table's line and next non-blank /// Numbers of blank lines between last table's line and next non-blank
/// line or buffer's end /// line or buffer's end
post_blank: usize, post_blank: usize,
has_header: bool,
}, },
/// "table.el" type table /// "table.el" type table
#[cfg_attr(feature = "ser", serde(rename = "table.el"))] #[cfg_attr(feature = "ser", serde(rename = "table.el"))]
@ -63,9 +64,14 @@ impl Table<'_> {
pub fn into_owned(self) -> Table<'static> { pub fn into_owned(self) -> Table<'static> {
match self { match self {
Table::Org { tblfm, post_blank } => Table::Org { Table::Org {
tblfm,
post_blank,
has_header,
} => Table::Org {
tblfm: tblfm.map(Into::into).map(Cow::Owned), tblfm: tblfm.map(Into::into).map(Cow::Owned),
post_blank: post_blank, post_blank: post_blank,
has_header: has_header,
}, },
Table::TableEl { value, post_blank } => Table::TableEl { Table::TableEl { value, post_blank } => Table::TableEl {
value: value.into_owned().into(), value: value.into_owned().into(),
@ -76,14 +82,55 @@ impl Table<'_> {
} }
/// Table Row Elemenet /// Table Row Elemenet
///
/// # Syntax
///
/// ```text
/// | 0 | 1 | 2 | <- TableRow::Body
/// | 0 | 1 | 2 | <- TableRow::Body
/// ```
///
/// ```text
/// |-----+-----+-----| <- ignores
/// | 0 | 1 | 2 | <- TableRow::Header
/// | 0 | 1 | 2 | <- TableRow::Header
/// |-----+-----+-----| <- TableRow::HeaderRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- TableRow::BodyRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- TableRow::BodyRule
/// |-----+-----+-----| <- TableRow::BodyRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- ignores
/// ```
///
#[derive(Debug)] #[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))] #[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "table_row_type"))] #[cfg_attr(feature = "ser", serde(tag = "table_row_type"))]
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))] #[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
pub enum TableRow { pub enum TableRow {
Standard, /// This row is part of table header
Rule, Header,
/// This row is part of table body
Body,
/// This row is between table header and body
HeaderRule,
/// This row is between table body and next body
BodyRule,
}
/// Table Cell Elemenet
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "table_cell_type"))]
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
pub enum TableCell {
/// Header cell
Header,
/// Body cell, or standard cell
Body,
} }
#[test] #[test]

View file

@ -3,7 +3,7 @@ use std::io::{Error, Write};
use jetscii::{bytes, BytesConst}; use jetscii::{bytes, BytesConst};
use crate::elements::{Element, Timestamp}; use crate::elements::{Element, Table, TableCell, TableRow, Timestamp};
use crate::export::write_datetime; use crate::export::write_datetime;
/// A wrapper for escaping sensitive characters in html. /// A wrapper for escaping sensitive characters in html.
@ -169,11 +169,27 @@ pub trait HtmlHandler<E: From<Error>>: Default {
Element::Rule(_) => write!(w, "<hr>")?, Element::Rule(_) => write!(w, "<hr>")?,
Element::Cookie(cookie) => write!(w, "<code>{}</code>", cookie.value)?, Element::Cookie(cookie) => write!(w, "<code>{}</code>", cookie.value)?,
Element::Title(title) => { Element::Title(title) => {
write!(w, "<h{}>", if title.level <= 6 { title.level } else { 6 })? write!(w, "<h{}>", if title.level <= 6 { title.level } else { 6 })?;
} }
Element::Table(_) => (), Element::Table(Table::TableEl { .. }) => (),
Element::TableRow(_) => (), Element::Table(Table::Org { has_header, .. }) => {
Element::TableCell => (), write!(w, "<table>")?;
if *has_header {
write!(w, "<thead>")?;
} else {
write!(w, "<tbody>")?;
}
}
Element::TableRow(row) => match row {
TableRow::Body => write!(w, "<tr>")?,
TableRow::BodyRule => write!(w, "</tbody><tbody>")?,
TableRow::Header => write!(w, "<tr>")?,
TableRow::HeaderRule => write!(w, "</thead><tbody>")?,
},
Element::TableCell(cell) => match cell {
TableCell::Body => write!(w, "<td>")?,
TableCell::Header => write!(w, "<th>")?,
},
} }
Ok(()) Ok(())
@ -206,9 +222,17 @@ pub trait HtmlHandler<E: From<Error>>: Default {
Element::Title(title) => { Element::Title(title) => {
write!(w, "</h{}>", if title.level <= 6 { title.level } else { 6 })? write!(w, "</h{}>", if title.level <= 6 { title.level } else { 6 })?
} }
Element::Table(_) => (), Element::Table(Table::TableEl { .. }) => (),
Element::TableRow(_) => (), Element::Table(Table::Org { .. }) => {
Element::TableCell => (), write!(w, "</tbody></table>")?;
}
Element::TableRow(TableRow::Body) | Element::TableRow(TableRow::Header) => {
write!(w, "</tr>")?;
}
Element::TableCell(cell) => match cell {
TableCell::Body => write!(w, "</td>")?,
TableCell::Header => write!(w, "</th>")?,
},
// non-container elements // non-container elements
_ => debug_assert!(!element.is_container()), _ => debug_assert!(!element.is_container()),
} }

View file

@ -183,7 +183,7 @@ pub trait OrgHandler<E: From<Error>>: Default {
} }
Element::Table(_) => (), Element::Table(_) => (),
Element::TableRow(_) => (), Element::TableRow(_) => (),
Element::TableCell => (), Element::TableCell(_) => (),
} }
Ok(()) Ok(())
@ -275,7 +275,7 @@ pub trait OrgHandler<E: From<Error>>: Default {
write_blank_lines(w, *post_blank)?; write_blank_lines(w, *post_blank)?;
} }
Element::TableRow(_) => (), Element::TableRow(_) => (),
Element::TableCell => (), Element::TableCell(_) => (),
// non-container elements // non-container elements
_ => debug_assert!(!element.is_container()), _ => debug_assert!(!element.is_container()),
} }

View file

@ -13,7 +13,7 @@ use crate::elements::{
radio_target::parse_radio_target, BabelCall, CenterBlock, Clock, Comment, CommentBlock, Cookie, radio_target::parse_radio_target, BabelCall, CenterBlock, Clock, Comment, CommentBlock, Cookie,
Drawer, DynBlock, Element, ExampleBlock, ExportBlock, FixedWidth, FnDef, FnRef, InlineCall, Drawer, DynBlock, Element, ExampleBlock, ExportBlock, FixedWidth, FnDef, FnRef, InlineCall,
InlineSrc, Keyword, Link, List, ListItem, Macros, QuoteBlock, Rule, Snippet, SourceBlock, InlineSrc, Keyword, Link, List, ListItem, Macros, QuoteBlock, Rule, Snippet, SourceBlock,
SpecialBlock, Table, TableRow, Target, Timestamp, Title, VerseBlock, SpecialBlock, Table, TableCell, TableRow, Target, Timestamp, Title, VerseBlock,
}; };
pub trait ElementArena<'a> { pub trait ElementArena<'a> {
@ -707,28 +707,67 @@ pub fn parse_org_table<'a, T: ElementArena<'a>>(
let (tail, contents) = take_lines_while(|line| line.trim_start().starts_with('|'))(contents); let (tail, contents) = take_lines_while(|line| line.trim_start().starts_with('|'))(contents);
let (tail, blank) = blank_lines(tail); let (tail, blank) = blank_lines(tail);
let mut iter = contents.trim_end().lines().peekable();
let mut lines = vec![];
let mut has_header = false;
if let Some(line) = iter.next() {
let line = line.trim_start();
if !line.starts_with("|-") {
lines.push(line);
}
}
while let Some(line) = iter.next() {
let line = line.trim_start();
if iter.peek().is_none() && line.starts_with("|-") {
break;
} else if line.starts_with("|-") {
has_header = true;
}
lines.push(line);
}
let parent = arena.append( let parent = arena.append(
Table::Org { Table::Org {
tblfm: None, tblfm: None,
post_blank: blank, post_blank: blank,
has_header,
}, },
parent, parent,
); );
let mut last_end = 0; for line in lines {
for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) {
let line = contents[last_end..start].trim_start();
if line.starts_with("|-") { if line.starts_with("|-") {
arena.append(TableRow::Rule, parent); if has_header {
} else if !line.is_empty() { arena.append(Element::TableRow(TableRow::HeaderRule), parent);
// ignores trailing newline has_header = false;
let parent = arena.append(TableRow::Standard, parent); } else {
for content in line.split_terminator('|').skip(1) { arena.append(Element::TableRow(TableRow::BodyRule), parent);
let node = arena.append(Element::TableCell, parent); }
containers.push(Container::Inline { content, node }); } else {
if has_header {
let parent = arena.append(Element::TableRow(TableRow::Header), parent);
for content in line.split_terminator('|').skip(1) {
let node = arena.append(Element::TableCell(TableCell::Header), parent);
containers.push(Container::Inline {
content: content.trim(),
node,
});
}
} else {
let parent = arena.append(Element::TableRow(TableRow::Body), parent);
for content in line.split_terminator('|').skip(1) {
let node = arena.append(Element::TableCell(TableCell::Body), parent);
containers.push(Container::Inline {
content: content.trim(),
node,
});
}
} }
} }
last_end = start + 1;
} }
tail tail

View file

@ -1,7 +1,7 @@
use indextree::NodeId; use indextree::NodeId;
use std::ops::RangeInclusive; use std::ops::RangeInclusive;
use crate::elements::{Element, Table, TableRow}; use crate::elements::{Element, Table, TableCell, TableRow};
use crate::Org; use crate::Org;
/// Validation Error /// Validation Error
@ -49,7 +49,7 @@ impl Org<'_> {
pub fn validate(&self) -> Vec<ValidationError> { pub fn validate(&self) -> Vec<ValidationError> {
let mut errors = Vec::new(); let mut errors = Vec::new();
macro_rules! expect { macro_rules! expect_element {
($node:ident, $expect:expr, $($pattern:pat)|+) => { ($node:ident, $expect:expr, $($pattern:pat)|+) => {
match self[$node] { match self[$node] {
$($pattern)|+ => (), $($pattern)|+ => (),
@ -61,48 +61,57 @@ impl Org<'_> {
}; };
} }
macro_rules! expect_children {
($node:ident) => {
if self.arena[$node].first_child().is_none() {
errors.push(ValidationError::ExpectedChildren { at: $node });
}
};
}
for node_id in self.root.descendants(&self.arena) { for node_id in self.root.descendants(&self.arena) {
let node = &self.arena[node_id]; let node = &self.arena[node_id];
match node.get() { match node.get() {
Element::Document { .. } => { Element::Document { .. } => {
let mut children = node_id.children(&self.arena); let mut children = node_id.children(&self.arena);
if let Some(node) = children.next() { if let Some(child) = children.next() {
expect!( expect_element!(
node, child,
"Headline,Section", "Headline|Section",
Element::Headline { .. } | Element::Section Element::Headline { .. } | Element::Section
); );
} }
for node in children {
expect!( for child in children {
node, expect_element!(
child,
"Headline", "Headline",
Element::Headline { .. } Element::Headline { .. }
); );
} }
} }
Element::Headline { .. } => { Element::Headline { .. } => {
if node.first_child().is_some() { expect_children!(node_id);
let mut children = node_id.children(&self.arena);
if let Some(node) = children.next() { let mut children = node_id.children(&self.arena);
expect!(node, "Title", Element::Title(_)); if let Some(child) = children.next() {
} expect_element!(child, "Title", Element::Title(_));
if let Some(node) = children.next() { }
expect!(
node, if let Some(child) = children.next() {
"Headline,Section", expect_element!(
Element::Headline { .. } | Element::Section child,
); "Headline|Section",
} Element::Headline { .. } | Element::Section
for node in children { );
expect!( }
node,
"Headline", for child in children {
Element::Headline { .. } expect_element!(
); child,
} "Headline",
} else { Element::Headline { .. }
errors.push(ValidationError::ExpectedChildren { at: node_id }); );
} }
} }
Element::Title(title) => { Element::Title(title) => {
@ -110,6 +119,37 @@ impl Org<'_> {
errors.push(ValidationError::ExpectedChildren { at: node_id }); errors.push(ValidationError::ExpectedChildren { at: node_id });
} }
} }
Element::List(_) => {
expect_children!(node_id);
for child in node_id.children(&self.arena) {
expect_element!(child, "ListItem", Element::ListItem(_));
}
}
Element::Table(Table::Org { .. }) => {
for child in node_id.children(&self.arena) {
expect_element!(child, "TableRow", Element::TableRow(_));
}
}
Element::TableRow(TableRow::Header) => {
expect_children!(node_id);
for child in node_id.children(&self.arena) {
expect_element!(
child,
"TableCell::Header",
Element::TableCell(TableCell::Header)
);
}
}
Element::TableRow(TableRow::Body) => {
expect_children!(node_id);
for child in node_id.children(&self.arena) {
expect_element!(
child,
"TableCell::Body",
Element::TableCell(TableCell::Body)
);
}
}
Element::CommentBlock(_) Element::CommentBlock(_)
| Element::ExampleBlock(_) | Element::ExampleBlock(_)
| Element::ExportBlock(_) | Element::ExportBlock(_)
@ -134,42 +174,27 @@ impl Org<'_> {
| Element::Keyword(_) | Element::Keyword(_)
| Element::Rule(_) | Element::Rule(_)
| Element::Cookie(_) | Element::Cookie(_)
| Element::Table(Table::TableEl { .. }) | Element::TableRow(TableRow::BodyRule)
| Element::TableRow(TableRow::Rule) => { | Element::TableRow(TableRow::HeaderRule) => {
if node.first_child().is_some() { if node.first_child().is_some() {
errors.push(ValidationError::UnexpectedChildren { at: node_id }); errors.push(ValidationError::UnexpectedChildren { at: node_id });
} }
} }
Element::List(_) => {
if node.first_child().is_some() {
for node in node_id.children(&self.arena) {
expect!(node, "ListItem", Element::ListItem(_));
}
} else {
errors.push(ValidationError::ExpectedChildren { at: node_id });
}
}
Element::SpecialBlock(_) Element::SpecialBlock(_)
| Element::QuoteBlock(_) | Element::QuoteBlock(_)
| Element::CenterBlock(_) | Element::CenterBlock(_)
| Element::VerseBlock(_) | Element::VerseBlock(_)
| Element::Paragraph { .. } | Element::Paragraph { .. }
| Element::Section | Element::Section
| Element::Table(Table::Org { .. })
| Element::TableRow(TableRow::Standard)
| Element::Bold | Element::Bold
| Element::Italic | Element::Italic
| Element::Underline | Element::Underline
| Element::Strike | Element::Strike
| Element::DynBlock(_) | Element::DynBlock(_)
| Element::ListItem(_) => { | Element::ListItem(_) => {
if node.first_child().is_none() { expect_children!(node_id);
errors.push(ValidationError::ExpectedChildren { at: node_id });
}
} }
// TableCell is a container but it might Element::Drawer(_) | Element::TableCell(_) | Element::Table(_) => (),
// not contains anything, e.g. `||||||`
Element::Drawer(_) | Element::TableCell => (),
} }
} }
errors errors