feat(parsers): update table parsing

This commit is contained in:
PoiScript 2019-11-05 19:37:58 +08:00
parent 5d5fc58027
commit b8265814aa
6 changed files with 232 additions and 98 deletions

View file

@ -47,7 +47,7 @@ pub use self::{
planning::Planning,
rule::Rule,
snippet::Snippet,
table::{Table, TableRow},
table::{Table, TableCell, TableRow},
target::Target,
timestamp::{Datetime, Timestamp},
title::Title,
@ -103,33 +103,32 @@ pub enum Element<'a> {
Title(Title<'a>),
Table(Table<'a>),
TableRow(TableRow),
TableCell,
TableCell(TableCell),
}
impl Element<'_> {
pub fn is_container(&self) -> bool {
use Element::*;
match self {
SpecialBlock(_)
| QuoteBlock(_)
| CenterBlock(_)
| VerseBlock(_)
| Bold
| Document { .. }
| DynBlock(_)
| Headline { .. }
| Italic
| List(_)
| ListItem(_)
| Paragraph { .. }
| Section
| Strike
| Underline
| Title(_)
| Table(_)
| TableRow(_)
| TableCell => true,
Element::SpecialBlock(_)
| Element::QuoteBlock(_)
| Element::CenterBlock(_)
| Element::VerseBlock(_)
| Element::Bold
| Element::Document { .. }
| Element::DynBlock(_)
| Element::Headline { .. }
| Element::Italic
| Element::List(_)
| Element::ListItem(_)
| Element::Paragraph { .. }
| Element::Section
| Element::Strike
| Element::Underline
| Element::Title(_)
| Element::Table(_)
| Element::TableRow(TableRow::Header)
| Element::TableRow(TableRow::Body)
| Element::TableCell(_) => true,
_ => false,
}
}
@ -187,7 +186,7 @@ impl Element<'_> {
Title(e) => Title(e.into_owned()),
Table(e) => Table(e.into_owned()),
TableRow(e) => TableRow(e),
TableCell => TableCell,
TableCell(e) => TableCell(e),
}
}
}

View file

@ -18,6 +18,7 @@ pub enum Table<'a> {
/// Numbers of blank lines between last table's line and next non-blank
/// line or buffer's end
post_blank: usize,
has_header: bool,
},
/// "table.el" type table
#[cfg_attr(feature = "ser", serde(rename = "table.el"))]
@ -63,9 +64,14 @@ impl Table<'_> {
pub fn into_owned(self) -> Table<'static> {
match self {
Table::Org { tblfm, post_blank } => Table::Org {
Table::Org {
tblfm,
post_blank,
has_header,
} => Table::Org {
tblfm: tblfm.map(Into::into).map(Cow::Owned),
post_blank: post_blank,
has_header: has_header,
},
Table::TableEl { value, post_blank } => Table::TableEl {
value: value.into_owned().into(),
@ -76,14 +82,55 @@ impl Table<'_> {
}
/// Table Row Elemenet
///
/// # Syntax
///
/// ```text
/// | 0 | 1 | 2 | <- TableRow::Body
/// | 0 | 1 | 2 | <- TableRow::Body
/// ```
///
/// ```text
/// |-----+-----+-----| <- ignores
/// | 0 | 1 | 2 | <- TableRow::Header
/// | 0 | 1 | 2 | <- TableRow::Header
/// |-----+-----+-----| <- TableRow::HeaderRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- TableRow::BodyRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- TableRow::BodyRule
/// |-----+-----+-----| <- TableRow::BodyRule
/// | 0 | 1 | 2 | <- TableRow::Body
/// |-----+-----+-----| <- ignores
/// ```
///
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "table_row_type"))]
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
pub enum TableRow {
Standard,
Rule,
/// This row is part of table header
Header,
/// This row is part of table body
Body,
/// This row is between table header and body
HeaderRule,
/// This row is between table body and next body
BodyRule,
}
/// Table Cell Elemenet
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[cfg_attr(feature = "ser", serde(tag = "table_cell_type"))]
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
pub enum TableCell {
/// Header cell
Header,
/// Body cell, or standard cell
Body,
}
#[test]

View file

@ -3,7 +3,7 @@ use std::io::{Error, Write};
use jetscii::{bytes, BytesConst};
use crate::elements::{Element, Timestamp};
use crate::elements::{Element, Table, TableCell, TableRow, Timestamp};
use crate::export::write_datetime;
/// A wrapper for escaping sensitive characters in html.
@ -169,11 +169,27 @@ pub trait HtmlHandler<E: From<Error>>: Default {
Element::Rule(_) => write!(w, "<hr>")?,
Element::Cookie(cookie) => write!(w, "<code>{}</code>", cookie.value)?,
Element::Title(title) => {
write!(w, "<h{}>", if title.level <= 6 { title.level } else { 6 })?
write!(w, "<h{}>", if title.level <= 6 { title.level } else { 6 })?;
}
Element::Table(_) => (),
Element::TableRow(_) => (),
Element::TableCell => (),
Element::Table(Table::TableEl { .. }) => (),
Element::Table(Table::Org { has_header, .. }) => {
write!(w, "<table>")?;
if *has_header {
write!(w, "<thead>")?;
} else {
write!(w, "<tbody>")?;
}
}
Element::TableRow(row) => match row {
TableRow::Body => write!(w, "<tr>")?,
TableRow::BodyRule => write!(w, "</tbody><tbody>")?,
TableRow::Header => write!(w, "<tr>")?,
TableRow::HeaderRule => write!(w, "</thead><tbody>")?,
},
Element::TableCell(cell) => match cell {
TableCell::Body => write!(w, "<td>")?,
TableCell::Header => write!(w, "<th>")?,
},
}
Ok(())
@ -206,9 +222,17 @@ pub trait HtmlHandler<E: From<Error>>: Default {
Element::Title(title) => {
write!(w, "</h{}>", if title.level <= 6 { title.level } else { 6 })?
}
Element::Table(_) => (),
Element::TableRow(_) => (),
Element::TableCell => (),
Element::Table(Table::TableEl { .. }) => (),
Element::Table(Table::Org { .. }) => {
write!(w, "</tbody></table>")?;
}
Element::TableRow(TableRow::Body) | Element::TableRow(TableRow::Header) => {
write!(w, "</tr>")?;
}
Element::TableCell(cell) => match cell {
TableCell::Body => write!(w, "</td>")?,
TableCell::Header => write!(w, "</th>")?,
},
// non-container elements
_ => debug_assert!(!element.is_container()),
}

View file

@ -183,7 +183,7 @@ pub trait OrgHandler<E: From<Error>>: Default {
}
Element::Table(_) => (),
Element::TableRow(_) => (),
Element::TableCell => (),
Element::TableCell(_) => (),
}
Ok(())
@ -275,7 +275,7 @@ pub trait OrgHandler<E: From<Error>>: Default {
write_blank_lines(w, *post_blank)?;
}
Element::TableRow(_) => (),
Element::TableCell => (),
Element::TableCell(_) => (),
// non-container elements
_ => debug_assert!(!element.is_container()),
}

View file

@ -13,7 +13,7 @@ use crate::elements::{
radio_target::parse_radio_target, BabelCall, CenterBlock, Clock, Comment, CommentBlock, Cookie,
Drawer, DynBlock, Element, ExampleBlock, ExportBlock, FixedWidth, FnDef, FnRef, InlineCall,
InlineSrc, Keyword, Link, List, ListItem, Macros, QuoteBlock, Rule, Snippet, SourceBlock,
SpecialBlock, Table, TableRow, Target, Timestamp, Title, VerseBlock,
SpecialBlock, Table, TableCell, TableRow, Target, Timestamp, Title, VerseBlock,
};
pub trait ElementArena<'a> {
@ -707,28 +707,67 @@ pub fn parse_org_table<'a, T: ElementArena<'a>>(
let (tail, contents) = take_lines_while(|line| line.trim_start().starts_with('|'))(contents);
let (tail, blank) = blank_lines(tail);
let mut iter = contents.trim_end().lines().peekable();
let mut lines = vec![];
let mut has_header = false;
if let Some(line) = iter.next() {
let line = line.trim_start();
if !line.starts_with("|-") {
lines.push(line);
}
}
while let Some(line) = iter.next() {
let line = line.trim_start();
if iter.peek().is_none() && line.starts_with("|-") {
break;
} else if line.starts_with("|-") {
has_header = true;
}
lines.push(line);
}
let parent = arena.append(
Table::Org {
tblfm: None,
post_blank: blank,
has_header,
},
parent,
);
let mut last_end = 0;
for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) {
let line = contents[last_end..start].trim_start();
for line in lines {
if line.starts_with("|-") {
arena.append(TableRow::Rule, parent);
} else if !line.is_empty() {
// ignores trailing newline
let parent = arena.append(TableRow::Standard, parent);
if has_header {
arena.append(Element::TableRow(TableRow::HeaderRule), parent);
has_header = false;
} else {
arena.append(Element::TableRow(TableRow::BodyRule), parent);
}
} else {
if has_header {
let parent = arena.append(Element::TableRow(TableRow::Header), parent);
for content in line.split_terminator('|').skip(1) {
let node = arena.append(Element::TableCell, parent);
containers.push(Container::Inline { content, node });
let node = arena.append(Element::TableCell(TableCell::Header), parent);
containers.push(Container::Inline {
content: content.trim(),
node,
});
}
} else {
let parent = arena.append(Element::TableRow(TableRow::Body), parent);
for content in line.split_terminator('|').skip(1) {
let node = arena.append(Element::TableCell(TableCell::Body), parent);
containers.push(Container::Inline {
content: content.trim(),
node,
});
}
}
}
last_end = start + 1;
}
tail

View file

@ -1,7 +1,7 @@
use indextree::NodeId;
use std::ops::RangeInclusive;
use crate::elements::{Element, Table, TableRow};
use crate::elements::{Element, Table, TableCell, TableRow};
use crate::Org;
/// Validation Error
@ -49,7 +49,7 @@ impl Org<'_> {
pub fn validate(&self) -> Vec<ValidationError> {
let mut errors = Vec::new();
macro_rules! expect {
macro_rules! expect_element {
($node:ident, $expect:expr, $($pattern:pat)|+) => {
match self[$node] {
$($pattern)|+ => (),
@ -61,55 +61,95 @@ impl Org<'_> {
};
}
macro_rules! expect_children {
($node:ident) => {
if self.arena[$node].first_child().is_none() {
errors.push(ValidationError::ExpectedChildren { at: $node });
}
};
}
for node_id in self.root.descendants(&self.arena) {
let node = &self.arena[node_id];
match node.get() {
Element::Document { .. } => {
let mut children = node_id.children(&self.arena);
if let Some(node) = children.next() {
expect!(
node,
"Headline,Section",
if let Some(child) = children.next() {
expect_element!(
child,
"Headline|Section",
Element::Headline { .. } | Element::Section
);
}
for node in children {
expect!(
node,
for child in children {
expect_element!(
child,
"Headline",
Element::Headline { .. }
);
}
}
Element::Headline { .. } => {
if node.first_child().is_some() {
expect_children!(node_id);
let mut children = node_id.children(&self.arena);
if let Some(node) = children.next() {
expect!(node, "Title", Element::Title(_));
if let Some(child) = children.next() {
expect_element!(child, "Title", Element::Title(_));
}
if let Some(node) = children.next() {
expect!(
node,
"Headline,Section",
if let Some(child) = children.next() {
expect_element!(
child,
"Headline|Section",
Element::Headline { .. } | Element::Section
);
}
for node in children {
expect!(
node,
for child in children {
expect_element!(
child,
"Headline",
Element::Headline { .. }
);
}
} else {
errors.push(ValidationError::ExpectedChildren { at: node_id });
}
}
Element::Title(title) => {
if !title.raw.is_empty() && node.first_child().is_none() {
errors.push(ValidationError::ExpectedChildren { at: node_id });
}
}
Element::List(_) => {
expect_children!(node_id);
for child in node_id.children(&self.arena) {
expect_element!(child, "ListItem", Element::ListItem(_));
}
}
Element::Table(Table::Org { .. }) => {
for child in node_id.children(&self.arena) {
expect_element!(child, "TableRow", Element::TableRow(_));
}
}
Element::TableRow(TableRow::Header) => {
expect_children!(node_id);
for child in node_id.children(&self.arena) {
expect_element!(
child,
"TableCell::Header",
Element::TableCell(TableCell::Header)
);
}
}
Element::TableRow(TableRow::Body) => {
expect_children!(node_id);
for child in node_id.children(&self.arena) {
expect_element!(
child,
"TableCell::Body",
Element::TableCell(TableCell::Body)
);
}
}
Element::CommentBlock(_)
| Element::ExampleBlock(_)
| Element::ExportBlock(_)
@ -134,42 +174,27 @@ impl Org<'_> {
| Element::Keyword(_)
| Element::Rule(_)
| Element::Cookie(_)
| Element::Table(Table::TableEl { .. })
| Element::TableRow(TableRow::Rule) => {
| Element::TableRow(TableRow::BodyRule)
| Element::TableRow(TableRow::HeaderRule) => {
if node.first_child().is_some() {
errors.push(ValidationError::UnexpectedChildren { at: node_id });
}
}
Element::List(_) => {
if node.first_child().is_some() {
for node in node_id.children(&self.arena) {
expect!(node, "ListItem", Element::ListItem(_));
}
} else {
errors.push(ValidationError::ExpectedChildren { at: node_id });
}
}
Element::SpecialBlock(_)
| Element::QuoteBlock(_)
| Element::CenterBlock(_)
| Element::VerseBlock(_)
| Element::Paragraph { .. }
| Element::Section
| Element::Table(Table::Org { .. })
| Element::TableRow(TableRow::Standard)
| Element::Bold
| Element::Italic
| Element::Underline
| Element::Strike
| Element::DynBlock(_)
| Element::ListItem(_) => {
if node.first_child().is_none() {
errors.push(ValidationError::ExpectedChildren { at: node_id });
expect_children!(node_id);
}
}
// TableCell is a container but it might
// not contains anything, e.g. `||||||`
Element::Drawer(_) | Element::TableCell => (),
Element::Drawer(_) | Element::TableCell(_) | Element::Table(_) => (),
}
}
errors