feat(parser): table blank lines

This commit is contained in:
PoiScript 2019-10-30 14:43:55 +08:00
parent ead6ea6289
commit b446471535
10 changed files with 151 additions and 114 deletions

View file

@ -1,8 +1,11 @@
use std::borrow::Cow;
use nom::{
bytes::complete::tag_no_case, character::complete::alpha1, error::ParseError,
sequence::preceded, IResult,
bytes::complete::tag_no_case,
character::complete::{alpha1, space0},
error::ParseError,
sequence::preceded,
IResult,
};
use crate::parsers::{blank_lines, line, take_lines_while};
@ -226,6 +229,7 @@ pub fn parse_block_element(input: &str) -> Option<(&str, (&str, Option<&str>, &s
fn parse_block_element_internal<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (&str, Option<&str>, &str, usize), E> {
let (input, _) = space0(input)?;
let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
let (input, args) = line(input)?;
let end_line = format!("#+END_{}", name);

View file

@ -139,6 +139,7 @@ impl Clock<'_> {
}
fn parse_clock<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Clock, E> {
let (input, _) = space0(input)?;
let (input, _) = tag("CLOCK:")(input)?;
let (input, _) = space0(input)?;
let (input, timestamp) = parse_inactive(input)?;

View file

@ -14,7 +14,10 @@ pub struct Comment<'a> {
impl Comment<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Comment<'_>)> {
let (input, value) = take_lines_while(|line| line == "#" || line.starts_with("# "))(input);
let (input, value) = take_lines_while(|line| {
let line = line.trim_start();
line == "#" || line.starts_with("# ")
})(input);
let (input, blank) = blank_lines(input);
if value.is_empty() {

View file

@ -2,6 +2,7 @@ use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_while1},
character::complete::space0,
error::ParseError,
sequence::delimited,
IResult,
@ -56,6 +57,7 @@ pub fn parse_drawer<'a, E: ParseError<&'a str>>(
pub fn parse_drawer_without_blank<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (Drawer, &str), E> {
let (input, _) = space0(input)?;
let (input, name) = delimited(
tag(":"),
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),

View file

@ -2,7 +2,7 @@ use std::borrow::Cow;
use nom::{
bytes::complete::tag_no_case,
character::complete::{alpha1, space1},
character::complete::{alpha1, space0, space1},
error::ParseError,
IResult,
};
@ -46,6 +46,7 @@ impl DynBlock<'_> {
fn parse_dyn_block<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (DynBlock, &str), E> {
let (input, _) = space0(input)?;
let (input, _) = tag_no_case("#+BEGIN:")(input)?;
let (input, _) = space1(input)?;
let (input, name) = alpha1(input)?;

View file

@ -15,7 +15,10 @@ pub struct FixedWidth<'a> {
impl FixedWidth<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth<'_>)> {
let (input, value) = take_lines_while(|line| line == ":" || line.starts_with(": "))(input);
let (input, value) = take_lines_while(|line| {
let line = line.trim_start();
line == ":" || line.starts_with(": ")
})(input);
let (input, blank) = blank_lines(input);
if value.is_empty() {

View file

@ -2,6 +2,7 @@ use std::borrow::Cow;
use nom::{
bytes::complete::{tag, take_till},
character::complete::space0,
combinator::opt,
error::ParseError,
sequence::delimited,
@ -67,6 +68,7 @@ pub fn parse_keyword(input: &str) -> Option<(&str, (&str, Option<&str>, &str, us
fn parse_keyword_internal<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (&str, Option<&str>, &str, usize), E> {
let (input, _) = space0(input)?;
let (input, _) = tag("#+")(input)?;
let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
let (input, optional) = opt(delimited(

View file

@ -1,4 +1,6 @@
use nom::{bytes::complete::take_while_m_n, error::ParseError, IResult};
use nom::{
bytes::complete::take_while_m_n, character::complete::space0, error::ParseError, IResult,
};
use crate::parsers::{blank_lines, eol};
@ -18,6 +20,7 @@ impl Rule {
}
fn parse_rule<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Rule, E> {
let (input, _) = space0(input)?;
let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?;
let (input, _) = eol(input)?;
let (input, blank) = blank_lines(input);

View file

@ -1,12 +1,8 @@
use std::borrow::Cow;
use nom::{
combinator::{peek, verify},
error::ParseError,
IResult,
};
use memchr::memchr;
use crate::parsers::{line, take_lines_while};
use crate::parsers::{blank_lines, take_lines_while};
/// Table Elemenet
#[derive(Debug)]
@ -16,20 +12,64 @@ use crate::parsers::{line, take_lines_while};
pub enum Table<'a> {
/// "org" type table
#[cfg_attr(feature = "ser", serde(rename = "org"))]
Org { tblfm: Option<Cow<'a, str>> },
Org {
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
tblfm: Option<Cow<'a, str>>,
/// Numbers of blank lines between last table's line and next non-blank
/// line or buffer's end
post_blank: usize,
},
/// "table.el" type table
#[cfg_attr(feature = "ser", serde(rename = "table.el"))]
TableEl { value: Cow<'a, str> },
TableEl {
value: Cow<'a, str>,
/// Numbers of blank lines between last table's line and next non-blank
/// line or buffer's end
post_blank: usize,
},
}
impl Table<'_> {
pub fn parse_table_el(input: &str) -> Option<(&str, Table<'_>)> {
let first_line = memchr(b'\n', input.as_bytes())
.map(|i| input[0..i].trim())
.unwrap_or_else(|| input.trim());
// first line must be the "+-" string and followed by plus or minus signs
if !first_line.starts_with("+-")
|| first_line
.as_bytes()
.iter()
.any(|&c| c != b'+' && c != b'-')
{
return None;
}
let (input, content) = take_lines_while(|line| {
let line = line.trim_start();
line.starts_with('|') || line.starts_with('+')
})(input);
let (input, blank) = blank_lines(input);
Some((
input,
Table::TableEl {
value: content.into(),
post_blank: blank,
},
))
}
pub fn into_owned(self) -> Table<'static> {
match self {
Table::Org { tblfm } => Table::Org {
Table::Org { tblfm, post_blank } => Table::Org {
tblfm: tblfm.map(Into::into).map(Cow::Owned),
post_blank: post_blank,
},
Table::TableEl { value } => Table::TableEl {
Table::TableEl { value, post_blank } => Table::TableEl {
value: value.into_owned().into(),
post_blank: post_blank,
},
}
}
@ -46,57 +86,28 @@ pub enum TableRow {
Rule,
}
impl TableRow {
pub(crate) fn parse(input: &str) -> Option<TableRow> {
if input.starts_with("|-") {
Some(TableRow::Rule)
} else if input.starts_with('|') {
Some(TableRow::Standard)
} else {
None
}
}
}
pub fn parse_table_el(input: &str) -> Option<(&str, &str)> {
parse_table_el_internal::<()>(input).ok()
}
fn parse_table_el_internal<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, &'a str, E> {
let (input, _) = peek(verify(line, |s: &str| {
let s = s.trim();
s.starts_with("+-") && s.as_bytes().iter().all(|&c| c == b'+' || c == b'-')
}))(input)?;
let (input, content) =
take_lines_while(|line| line.starts_with('|') || line.starts_with('+'))(input);
Ok((input, content))
}
#[test]
fn parse_table_el_() {
use nom::error::VerboseError;
assert_eq!(
parse_table_el_internal::<VerboseError<&str>>(
r#"+---+
| |
+---+
Table::parse_table_el(
r#" +---+
| |
+---+
"#
),
Ok((
r#"
"#,
r#"+---+
| |
+---+
Some((
"",
Table::TableEl {
value: r#" +---+
| |
+---+
"#
.into(),
post_blank: 1
}
))
);
assert!(parse_table_el_internal::<VerboseError<&str>>("").is_err());
assert!(parse_table_el_internal::<VerboseError<&str>>("+----|---").is_err());
assert!(Table::parse_table_el("").is_none());
assert!(Table::parse_table_el("+----|---").is_none());
}

View file

@ -12,11 +12,10 @@ use nom::{bytes::complete::take_while1, combinator::verify, error::ParseError, I
use crate::config::ParseConfig;
use crate::elements::{
block::parse_block_element, emphasis::parse_emphasis, keyword::parse_keyword,
radio_target::parse_radio_target, table::parse_table_el, BabelCall, CenterBlock, Clock,
Comment, CommentBlock, Cookie, Drawer, DynBlock, Element, ExampleBlock, ExportBlock,
FixedWidth, FnDef, FnRef, InlineCall, InlineSrc, Keyword, Link, List, ListItem, Macros,
QuoteBlock, Rule, Snippet, SourceBlock, SpecialBlock, Table, TableRow, Target, Timestamp,
Title, VerseBlock,
radio_target::parse_radio_target, BabelCall, CenterBlock, Clock, Comment, CommentBlock, Cookie,
Drawer, DynBlock, Element, ExampleBlock, ExportBlock, FixedWidth, FnDef, FnRef, InlineCall,
InlineSrc, Keyword, Link, List, ListItem, Macros, QuoteBlock, Rule, Snippet, SourceBlock,
SpecialBlock, Table, TableRow, Target, Timestamp, Title, VerseBlock,
};
pub trait ElementArena<'a> {
@ -268,20 +267,21 @@ pub fn parse_block<'a, T: ElementArena<'a>>(
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) -> Option<&'a str> {
// footnote definitions must be start at column 0
if let Some((tail, (fn_def, content))) = FnDef::parse(contents) {
let node = arena.append(fn_def, parent);
containers.push(Container::Block { content, node });
return Some(tail);
}
if let Some(tail) = parse_list(arena, contents, parent, containers) {
return Some(tail);
}
let contents = contents.trim_start();
match contents.as_bytes().get(0)? {
match contents
.as_bytes()
.iter()
.find(|c| !c.is_ascii_whitespace())?
{
b'[' => {
let (tail, (fn_def, content)) = FnDef::parse(contents)?;
let node = arena.append(fn_def, parent);
containers.push(Container::Block { content, node });
Some(tail)
}
b'0'..=b'9' | b'*' => {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
b'C' => {
let (tail, clock) = Clock::parse(contents)?;
arena.append(clock, parent);
@ -292,9 +292,13 @@ pub fn parse_block<'a, T: ElementArena<'a>>(
None
}
b'-' => {
let (tail, rule) = Rule::parse(contents)?;
arena.append(rule, parent);
Some(tail)
if let Some((tail, rule)) = Rule::parse(contents) {
arena.append(rule, parent);
Some(tail)
} else {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
}
b':' => {
if let Some((tail, (drawer, content))) = Drawer::parse(contents) {
@ -308,9 +312,18 @@ pub fn parse_block<'a, T: ElementArena<'a>>(
}
}
b'|' => {
let tail = parse_table(arena, contents, containers, parent)?;
let tail = parse_org_table(arena, contents, containers, parent);
Some(tail)
}
b'+' => {
if let Some((tail, table)) = Table::parse_table_el(contents) {
arena.append(table, parent);
Some(tail)
} else {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
}
b'#' => {
if let Some((tail, (name, args, content, blank))) = parse_block_element(contents) {
match_block(
@ -687,45 +700,39 @@ pub fn parse_list<'a, T: ElementArena<'a>>(
Some(tail)
}
pub fn parse_table<'a, T: ElementArena<'a>>(
pub fn parse_org_table<'a, T: ElementArena<'a>>(
arena: &mut T,
contents: &'a str,
containers: &mut Vec<Container<'a>>,
parent: NodeId,
) -> Option<&'a str> {
if contents.trim_start().starts_with('|') {
let table_node = arena.append(Table::Org { tblfm: None }, parent);
) -> &'a str {
let (tail, contents) = take_lines_while(|line| line.trim_start().starts_with('|'))(contents);
let (tail, blank) = blank_lines(tail);
let mut last_end = 0;
for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) {
let line = contents[last_end..start].trim();
match TableRow::parse(line) {
Some(TableRow::Standard) => {
let row_node = arena.append(TableRow::Standard, table_node);
for cell in line[1..].split_terminator('|') {
let cell_node = arena.append(Element::TableCell, row_node);
containers.push(Container::Inline {
content: cell.trim(),
node: cell_node,
});
}
}
Some(TableRow::Rule) => {
arena.append(TableRow::Rule, table_node);
}
None => return Some(&contents[last_end..]),
let parent = arena.append(
Table::Org {
tblfm: None,
post_blank: blank,
},
parent,
);
let mut last_end = 0;
for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) {
let line = contents[last_end..start].trim_start();
if line.starts_with("|-") {
arena.append(TableRow::Rule, parent);
} else {
let parent = arena.append(TableRow::Standard, parent);
for content in line.split_terminator('|').skip(1) {
let node = arena.append(Element::TableCell, parent);
containers.push(Container::Inline { content, node });
}
last_end = start + 1;
}
Some("")
} else {
let (tail, value) = parse_table_el(contents)?;
let value = value.into();
arena.append(Table::TableEl { value }, parent);
Some(tail)
last_end = start + 1;
}
tail
}
pub fn line<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> {