feat(parsers): update table parsing
This commit is contained in:
parent
5d5fc58027
commit
b8265814aa
|
@ -47,7 +47,7 @@ pub use self::{
|
|||
planning::Planning,
|
||||
rule::Rule,
|
||||
snippet::Snippet,
|
||||
table::{Table, TableRow},
|
||||
table::{Table, TableCell, TableRow},
|
||||
target::Target,
|
||||
timestamp::{Datetime, Timestamp},
|
||||
title::Title,
|
||||
|
@ -103,33 +103,32 @@ pub enum Element<'a> {
|
|||
Title(Title<'a>),
|
||||
Table(Table<'a>),
|
||||
TableRow(TableRow),
|
||||
TableCell,
|
||||
TableCell(TableCell),
|
||||
}
|
||||
|
||||
impl Element<'_> {
|
||||
pub fn is_container(&self) -> bool {
|
||||
use Element::*;
|
||||
|
||||
match self {
|
||||
SpecialBlock(_)
|
||||
| QuoteBlock(_)
|
||||
| CenterBlock(_)
|
||||
| VerseBlock(_)
|
||||
| Bold
|
||||
| Document { .. }
|
||||
| DynBlock(_)
|
||||
| Headline { .. }
|
||||
| Italic
|
||||
| List(_)
|
||||
| ListItem(_)
|
||||
| Paragraph { .. }
|
||||
| Section
|
||||
| Strike
|
||||
| Underline
|
||||
| Title(_)
|
||||
| Table(_)
|
||||
| TableRow(_)
|
||||
| TableCell => true,
|
||||
Element::SpecialBlock(_)
|
||||
| Element::QuoteBlock(_)
|
||||
| Element::CenterBlock(_)
|
||||
| Element::VerseBlock(_)
|
||||
| Element::Bold
|
||||
| Element::Document { .. }
|
||||
| Element::DynBlock(_)
|
||||
| Element::Headline { .. }
|
||||
| Element::Italic
|
||||
| Element::List(_)
|
||||
| Element::ListItem(_)
|
||||
| Element::Paragraph { .. }
|
||||
| Element::Section
|
||||
| Element::Strike
|
||||
| Element::Underline
|
||||
| Element::Title(_)
|
||||
| Element::Table(_)
|
||||
| Element::TableRow(TableRow::Header)
|
||||
| Element::TableRow(TableRow::Body)
|
||||
| Element::TableCell(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
@ -187,7 +186,7 @@ impl Element<'_> {
|
|||
Title(e) => Title(e.into_owned()),
|
||||
Table(e) => Table(e.into_owned()),
|
||||
TableRow(e) => TableRow(e),
|
||||
TableCell => TableCell,
|
||||
TableCell(e) => TableCell(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ pub enum Table<'a> {
|
|||
/// Numbers of blank lines between last table's line and next non-blank
|
||||
/// line or buffer's end
|
||||
post_blank: usize,
|
||||
has_header: bool,
|
||||
},
|
||||
/// "table.el" type table
|
||||
#[cfg_attr(feature = "ser", serde(rename = "table.el"))]
|
||||
|
@ -63,9 +64,14 @@ impl Table<'_> {
|
|||
|
||||
pub fn into_owned(self) -> Table<'static> {
|
||||
match self {
|
||||
Table::Org { tblfm, post_blank } => Table::Org {
|
||||
Table::Org {
|
||||
tblfm,
|
||||
post_blank,
|
||||
has_header,
|
||||
} => Table::Org {
|
||||
tblfm: tblfm.map(Into::into).map(Cow::Owned),
|
||||
post_blank: post_blank,
|
||||
has_header: has_header,
|
||||
},
|
||||
Table::TableEl { value, post_blank } => Table::TableEl {
|
||||
value: value.into_owned().into(),
|
||||
|
@ -76,14 +82,55 @@ impl Table<'_> {
|
|||
}
|
||||
|
||||
/// Table Row Elemenet
|
||||
///
|
||||
/// # Syntax
|
||||
///
|
||||
/// ```text
|
||||
/// | 0 | 1 | 2 | <- TableRow::Body
|
||||
/// | 0 | 1 | 2 | <- TableRow::Body
|
||||
/// ```
|
||||
///
|
||||
/// ```text
|
||||
/// |-----+-----+-----| <- ignores
|
||||
/// | 0 | 1 | 2 | <- TableRow::Header
|
||||
/// | 0 | 1 | 2 | <- TableRow::Header
|
||||
/// |-----+-----+-----| <- TableRow::HeaderRule
|
||||
/// | 0 | 1 | 2 | <- TableRow::Body
|
||||
/// |-----+-----+-----| <- TableRow::BodyRule
|
||||
/// | 0 | 1 | 2 | <- TableRow::Body
|
||||
/// |-----+-----+-----| <- TableRow::BodyRule
|
||||
/// |-----+-----+-----| <- TableRow::BodyRule
|
||||
/// | 0 | 1 | 2 | <- TableRow::Body
|
||||
/// |-----+-----+-----| <- ignores
|
||||
/// ```
|
||||
///
|
||||
#[derive(Debug)]
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
|
||||
#[cfg_attr(feature = "ser", serde(tag = "table_row_type"))]
|
||||
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
|
||||
pub enum TableRow {
|
||||
Standard,
|
||||
Rule,
|
||||
/// This row is part of table header
|
||||
Header,
|
||||
/// This row is part of table body
|
||||
Body,
|
||||
/// This row is between table header and body
|
||||
HeaderRule,
|
||||
/// This row is between table body and next body
|
||||
BodyRule,
|
||||
}
|
||||
|
||||
/// Table Cell Elemenet
|
||||
#[derive(Debug)]
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
|
||||
#[cfg_attr(feature = "ser", serde(tag = "table_cell_type"))]
|
||||
#[cfg_attr(feature = "ser", serde(rename_all = "kebab-case"))]
|
||||
pub enum TableCell {
|
||||
/// Header cell
|
||||
Header,
|
||||
/// Body cell, or standard cell
|
||||
Body,
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -3,7 +3,7 @@ use std::io::{Error, Write};
|
|||
|
||||
use jetscii::{bytes, BytesConst};
|
||||
|
||||
use crate::elements::{Element, Timestamp};
|
||||
use crate::elements::{Element, Table, TableCell, TableRow, Timestamp};
|
||||
use crate::export::write_datetime;
|
||||
|
||||
/// A wrapper for escaping sensitive characters in html.
|
||||
|
@ -169,11 +169,27 @@ pub trait HtmlHandler<E: From<Error>>: Default {
|
|||
Element::Rule(_) => write!(w, "<hr>")?,
|
||||
Element::Cookie(cookie) => write!(w, "<code>{}</code>", cookie.value)?,
|
||||
Element::Title(title) => {
|
||||
write!(w, "<h{}>", if title.level <= 6 { title.level } else { 6 })?
|
||||
write!(w, "<h{}>", if title.level <= 6 { title.level } else { 6 })?;
|
||||
}
|
||||
Element::Table(_) => (),
|
||||
Element::TableRow(_) => (),
|
||||
Element::TableCell => (),
|
||||
Element::Table(Table::TableEl { .. }) => (),
|
||||
Element::Table(Table::Org { has_header, .. }) => {
|
||||
write!(w, "<table>")?;
|
||||
if *has_header {
|
||||
write!(w, "<thead>")?;
|
||||
} else {
|
||||
write!(w, "<tbody>")?;
|
||||
}
|
||||
}
|
||||
Element::TableRow(row) => match row {
|
||||
TableRow::Body => write!(w, "<tr>")?,
|
||||
TableRow::BodyRule => write!(w, "</tbody><tbody>")?,
|
||||
TableRow::Header => write!(w, "<tr>")?,
|
||||
TableRow::HeaderRule => write!(w, "</thead><tbody>")?,
|
||||
},
|
||||
Element::TableCell(cell) => match cell {
|
||||
TableCell::Body => write!(w, "<td>")?,
|
||||
TableCell::Header => write!(w, "<th>")?,
|
||||
},
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -206,9 +222,17 @@ pub trait HtmlHandler<E: From<Error>>: Default {
|
|||
Element::Title(title) => {
|
||||
write!(w, "</h{}>", if title.level <= 6 { title.level } else { 6 })?
|
||||
}
|
||||
Element::Table(_) => (),
|
||||
Element::TableRow(_) => (),
|
||||
Element::TableCell => (),
|
||||
Element::Table(Table::TableEl { .. }) => (),
|
||||
Element::Table(Table::Org { .. }) => {
|
||||
write!(w, "</tbody></table>")?;
|
||||
}
|
||||
Element::TableRow(TableRow::Body) | Element::TableRow(TableRow::Header) => {
|
||||
write!(w, "</tr>")?;
|
||||
}
|
||||
Element::TableCell(cell) => match cell {
|
||||
TableCell::Body => write!(w, "</td>")?,
|
||||
TableCell::Header => write!(w, "</th>")?,
|
||||
},
|
||||
// non-container elements
|
||||
_ => debug_assert!(!element.is_container()),
|
||||
}
|
||||
|
|
|
@ -183,7 +183,7 @@ pub trait OrgHandler<E: From<Error>>: Default {
|
|||
}
|
||||
Element::Table(_) => (),
|
||||
Element::TableRow(_) => (),
|
||||
Element::TableCell => (),
|
||||
Element::TableCell(_) => (),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -275,7 +275,7 @@ pub trait OrgHandler<E: From<Error>>: Default {
|
|||
write_blank_lines(w, *post_blank)?;
|
||||
}
|
||||
Element::TableRow(_) => (),
|
||||
Element::TableCell => (),
|
||||
Element::TableCell(_) => (),
|
||||
// non-container elements
|
||||
_ => debug_assert!(!element.is_container()),
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ use crate::elements::{
|
|||
radio_target::parse_radio_target, BabelCall, CenterBlock, Clock, Comment, CommentBlock, Cookie,
|
||||
Drawer, DynBlock, Element, ExampleBlock, ExportBlock, FixedWidth, FnDef, FnRef, InlineCall,
|
||||
InlineSrc, Keyword, Link, List, ListItem, Macros, QuoteBlock, Rule, Snippet, SourceBlock,
|
||||
SpecialBlock, Table, TableRow, Target, Timestamp, Title, VerseBlock,
|
||||
SpecialBlock, Table, TableCell, TableRow, Target, Timestamp, Title, VerseBlock,
|
||||
};
|
||||
|
||||
pub trait ElementArena<'a> {
|
||||
|
@ -707,28 +707,67 @@ pub fn parse_org_table<'a, T: ElementArena<'a>>(
|
|||
let (tail, contents) = take_lines_while(|line| line.trim_start().starts_with('|'))(contents);
|
||||
let (tail, blank) = blank_lines(tail);
|
||||
|
||||
let mut iter = contents.trim_end().lines().peekable();
|
||||
|
||||
let mut lines = vec![];
|
||||
|
||||
let mut has_header = false;
|
||||
|
||||
if let Some(line) = iter.next() {
|
||||
let line = line.trim_start();
|
||||
if !line.starts_with("|-") {
|
||||
lines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(line) = iter.next() {
|
||||
let line = line.trim_start();
|
||||
if iter.peek().is_none() && line.starts_with("|-") {
|
||||
break;
|
||||
} else if line.starts_with("|-") {
|
||||
has_header = true;
|
||||
}
|
||||
lines.push(line);
|
||||
}
|
||||
|
||||
let parent = arena.append(
|
||||
Table::Org {
|
||||
tblfm: None,
|
||||
post_blank: blank,
|
||||
has_header,
|
||||
},
|
||||
parent,
|
||||
);
|
||||
|
||||
let mut last_end = 0;
|
||||
for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) {
|
||||
let line = contents[last_end..start].trim_start();
|
||||
for line in lines {
|
||||
if line.starts_with("|-") {
|
||||
arena.append(TableRow::Rule, parent);
|
||||
} else if !line.is_empty() {
|
||||
// ignores trailing newline
|
||||
let parent = arena.append(TableRow::Standard, parent);
|
||||
for content in line.split_terminator('|').skip(1) {
|
||||
let node = arena.append(Element::TableCell, parent);
|
||||
containers.push(Container::Inline { content, node });
|
||||
if has_header {
|
||||
arena.append(Element::TableRow(TableRow::HeaderRule), parent);
|
||||
has_header = false;
|
||||
} else {
|
||||
arena.append(Element::TableRow(TableRow::BodyRule), parent);
|
||||
}
|
||||
} else {
|
||||
if has_header {
|
||||
let parent = arena.append(Element::TableRow(TableRow::Header), parent);
|
||||
for content in line.split_terminator('|').skip(1) {
|
||||
let node = arena.append(Element::TableCell(TableCell::Header), parent);
|
||||
containers.push(Container::Inline {
|
||||
content: content.trim(),
|
||||
node,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
let parent = arena.append(Element::TableRow(TableRow::Body), parent);
|
||||
for content in line.split_terminator('|').skip(1) {
|
||||
let node = arena.append(Element::TableCell(TableCell::Body), parent);
|
||||
containers.push(Container::Inline {
|
||||
content: content.trim(),
|
||||
node,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
last_end = start + 1;
|
||||
}
|
||||
|
||||
tail
|
||||
|
|
123
src/validate.rs
123
src/validate.rs
|
@ -1,7 +1,7 @@
|
|||
use indextree::NodeId;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
use crate::elements::{Element, Table, TableRow};
|
||||
use crate::elements::{Element, Table, TableCell, TableRow};
|
||||
use crate::Org;
|
||||
|
||||
/// Validation Error
|
||||
|
@ -49,7 +49,7 @@ impl Org<'_> {
|
|||
pub fn validate(&self) -> Vec<ValidationError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
macro_rules! expect {
|
||||
macro_rules! expect_element {
|
||||
($node:ident, $expect:expr, $($pattern:pat)|+) => {
|
||||
match self[$node] {
|
||||
$($pattern)|+ => (),
|
||||
|
@ -61,48 +61,57 @@ impl Org<'_> {
|
|||
};
|
||||
}
|
||||
|
||||
macro_rules! expect_children {
|
||||
($node:ident) => {
|
||||
if self.arena[$node].first_child().is_none() {
|
||||
errors.push(ValidationError::ExpectedChildren { at: $node });
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for node_id in self.root.descendants(&self.arena) {
|
||||
let node = &self.arena[node_id];
|
||||
match node.get() {
|
||||
Element::Document { .. } => {
|
||||
let mut children = node_id.children(&self.arena);
|
||||
if let Some(node) = children.next() {
|
||||
expect!(
|
||||
node,
|
||||
"Headline,Section",
|
||||
if let Some(child) = children.next() {
|
||||
expect_element!(
|
||||
child,
|
||||
"Headline|Section",
|
||||
Element::Headline { .. } | Element::Section
|
||||
);
|
||||
}
|
||||
for node in children {
|
||||
expect!(
|
||||
node,
|
||||
|
||||
for child in children {
|
||||
expect_element!(
|
||||
child,
|
||||
"Headline",
|
||||
Element::Headline { .. }
|
||||
);
|
||||
}
|
||||
}
|
||||
Element::Headline { .. } => {
|
||||
if node.first_child().is_some() {
|
||||
let mut children = node_id.children(&self.arena);
|
||||
if let Some(node) = children.next() {
|
||||
expect!(node, "Title", Element::Title(_));
|
||||
}
|
||||
if let Some(node) = children.next() {
|
||||
expect!(
|
||||
node,
|
||||
"Headline,Section",
|
||||
Element::Headline { .. } | Element::Section
|
||||
);
|
||||
}
|
||||
for node in children {
|
||||
expect!(
|
||||
node,
|
||||
"Headline",
|
||||
Element::Headline { .. }
|
||||
);
|
||||
}
|
||||
} else {
|
||||
errors.push(ValidationError::ExpectedChildren { at: node_id });
|
||||
expect_children!(node_id);
|
||||
|
||||
let mut children = node_id.children(&self.arena);
|
||||
if let Some(child) = children.next() {
|
||||
expect_element!(child, "Title", Element::Title(_));
|
||||
}
|
||||
|
||||
if let Some(child) = children.next() {
|
||||
expect_element!(
|
||||
child,
|
||||
"Headline|Section",
|
||||
Element::Headline { .. } | Element::Section
|
||||
);
|
||||
}
|
||||
|
||||
for child in children {
|
||||
expect_element!(
|
||||
child,
|
||||
"Headline",
|
||||
Element::Headline { .. }
|
||||
);
|
||||
}
|
||||
}
|
||||
Element::Title(title) => {
|
||||
|
@ -110,6 +119,37 @@ impl Org<'_> {
|
|||
errors.push(ValidationError::ExpectedChildren { at: node_id });
|
||||
}
|
||||
}
|
||||
Element::List(_) => {
|
||||
expect_children!(node_id);
|
||||
for child in node_id.children(&self.arena) {
|
||||
expect_element!(child, "ListItem", Element::ListItem(_));
|
||||
}
|
||||
}
|
||||
Element::Table(Table::Org { .. }) => {
|
||||
for child in node_id.children(&self.arena) {
|
||||
expect_element!(child, "TableRow", Element::TableRow(_));
|
||||
}
|
||||
}
|
||||
Element::TableRow(TableRow::Header) => {
|
||||
expect_children!(node_id);
|
||||
for child in node_id.children(&self.arena) {
|
||||
expect_element!(
|
||||
child,
|
||||
"TableCell::Header",
|
||||
Element::TableCell(TableCell::Header)
|
||||
);
|
||||
}
|
||||
}
|
||||
Element::TableRow(TableRow::Body) => {
|
||||
expect_children!(node_id);
|
||||
for child in node_id.children(&self.arena) {
|
||||
expect_element!(
|
||||
child,
|
||||
"TableCell::Body",
|
||||
Element::TableCell(TableCell::Body)
|
||||
);
|
||||
}
|
||||
}
|
||||
Element::CommentBlock(_)
|
||||
| Element::ExampleBlock(_)
|
||||
| Element::ExportBlock(_)
|
||||
|
@ -134,42 +174,27 @@ impl Org<'_> {
|
|||
| Element::Keyword(_)
|
||||
| Element::Rule(_)
|
||||
| Element::Cookie(_)
|
||||
| Element::Table(Table::TableEl { .. })
|
||||
| Element::TableRow(TableRow::Rule) => {
|
||||
| Element::TableRow(TableRow::BodyRule)
|
||||
| Element::TableRow(TableRow::HeaderRule) => {
|
||||
if node.first_child().is_some() {
|
||||
errors.push(ValidationError::UnexpectedChildren { at: node_id });
|
||||
}
|
||||
}
|
||||
Element::List(_) => {
|
||||
if node.first_child().is_some() {
|
||||
for node in node_id.children(&self.arena) {
|
||||
expect!(node, "ListItem", Element::ListItem(_));
|
||||
}
|
||||
} else {
|
||||
errors.push(ValidationError::ExpectedChildren { at: node_id });
|
||||
}
|
||||
}
|
||||
Element::SpecialBlock(_)
|
||||
| Element::QuoteBlock(_)
|
||||
| Element::CenterBlock(_)
|
||||
| Element::VerseBlock(_)
|
||||
| Element::Paragraph { .. }
|
||||
| Element::Section
|
||||
| Element::Table(Table::Org { .. })
|
||||
| Element::TableRow(TableRow::Standard)
|
||||
| Element::Bold
|
||||
| Element::Italic
|
||||
| Element::Underline
|
||||
| Element::Strike
|
||||
| Element::DynBlock(_)
|
||||
| Element::ListItem(_) => {
|
||||
if node.first_child().is_none() {
|
||||
errors.push(ValidationError::ExpectedChildren { at: node_id });
|
||||
}
|
||||
expect_children!(node_id);
|
||||
}
|
||||
// TableCell is a container but it might
|
||||
// not contains anything, e.g. `||||||`
|
||||
Element::Drawer(_) | Element::TableCell => (),
|
||||
Element::Drawer(_) | Element::TableCell(_) | Element::Table(_) => (),
|
||||
}
|
||||
}
|
||||
errors
|
||||
|
|
Loading…
Reference in a new issue