feat(parser): table blank lines

This commit is contained in:
PoiScript 2019-10-30 14:43:55 +08:00
parent ead6ea6289
commit b446471535
10 changed files with 151 additions and 114 deletions

View file

@ -1,8 +1,11 @@
use std::borrow::Cow; use std::borrow::Cow;
use nom::{ use nom::{
bytes::complete::tag_no_case, character::complete::alpha1, error::ParseError, bytes::complete::tag_no_case,
sequence::preceded, IResult, character::complete::{alpha1, space0},
error::ParseError,
sequence::preceded,
IResult,
}; };
use crate::parsers::{blank_lines, line, take_lines_while}; use crate::parsers::{blank_lines, line, take_lines_while};
@ -226,6 +229,7 @@ pub fn parse_block_element(input: &str) -> Option<(&str, (&str, Option<&str>, &s
fn parse_block_element_internal<'a, E: ParseError<&'a str>>( fn parse_block_element_internal<'a, E: ParseError<&'a str>>(
input: &'a str, input: &'a str,
) -> IResult<&str, (&str, Option<&str>, &str, usize), E> { ) -> IResult<&str, (&str, Option<&str>, &str, usize), E> {
let (input, _) = space0(input)?;
let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?; let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
let (input, args) = line(input)?; let (input, args) = line(input)?;
let end_line = format!("#+END_{}", name); let end_line = format!("#+END_{}", name);

View file

@ -139,6 +139,7 @@ impl Clock<'_> {
} }
fn parse_clock<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Clock, E> { fn parse_clock<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Clock, E> {
let (input, _) = space0(input)?;
let (input, _) = tag("CLOCK:")(input)?; let (input, _) = tag("CLOCK:")(input)?;
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
let (input, timestamp) = parse_inactive(input)?; let (input, timestamp) = parse_inactive(input)?;

View file

@ -14,7 +14,10 @@ pub struct Comment<'a> {
impl Comment<'_> { impl Comment<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Comment<'_>)> { pub(crate) fn parse(input: &str) -> Option<(&str, Comment<'_>)> {
let (input, value) = take_lines_while(|line| line == "#" || line.starts_with("# "))(input); let (input, value) = take_lines_while(|line| {
let line = line.trim_start();
line == "#" || line.starts_with("# ")
})(input);
let (input, blank) = blank_lines(input); let (input, blank) = blank_lines(input);
if value.is_empty() { if value.is_empty() {

View file

@ -2,6 +2,7 @@ use std::borrow::Cow;
use nom::{ use nom::{
bytes::complete::{tag, take_while1}, bytes::complete::{tag, take_while1},
character::complete::space0,
error::ParseError, error::ParseError,
sequence::delimited, sequence::delimited,
IResult, IResult,
@ -56,6 +57,7 @@ pub fn parse_drawer<'a, E: ParseError<&'a str>>(
pub fn parse_drawer_without_blank<'a, E: ParseError<&'a str>>( pub fn parse_drawer_without_blank<'a, E: ParseError<&'a str>>(
input: &'a str, input: &'a str,
) -> IResult<&str, (Drawer, &str), E> { ) -> IResult<&str, (Drawer, &str), E> {
let (input, _) = space0(input)?;
let (input, name) = delimited( let (input, name) = delimited(
tag(":"), tag(":"),
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'), take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),

View file

@ -2,7 +2,7 @@ use std::borrow::Cow;
use nom::{ use nom::{
bytes::complete::tag_no_case, bytes::complete::tag_no_case,
character::complete::{alpha1, space1}, character::complete::{alpha1, space0, space1},
error::ParseError, error::ParseError,
IResult, IResult,
}; };
@ -46,6 +46,7 @@ impl DynBlock<'_> {
fn parse_dyn_block<'a, E: ParseError<&'a str>>( fn parse_dyn_block<'a, E: ParseError<&'a str>>(
input: &'a str, input: &'a str,
) -> IResult<&str, (DynBlock, &str), E> { ) -> IResult<&str, (DynBlock, &str), E> {
let (input, _) = space0(input)?;
let (input, _) = tag_no_case("#+BEGIN:")(input)?; let (input, _) = tag_no_case("#+BEGIN:")(input)?;
let (input, _) = space1(input)?; let (input, _) = space1(input)?;
let (input, name) = alpha1(input)?; let (input, name) = alpha1(input)?;

View file

@ -15,7 +15,10 @@ pub struct FixedWidth<'a> {
impl FixedWidth<'_> { impl FixedWidth<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth<'_>)> { pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth<'_>)> {
let (input, value) = take_lines_while(|line| line == ":" || line.starts_with(": "))(input); let (input, value) = take_lines_while(|line| {
let line = line.trim_start();
line == ":" || line.starts_with(": ")
})(input);
let (input, blank) = blank_lines(input); let (input, blank) = blank_lines(input);
if value.is_empty() { if value.is_empty() {

View file

@ -2,6 +2,7 @@ use std::borrow::Cow;
use nom::{ use nom::{
bytes::complete::{tag, take_till}, bytes::complete::{tag, take_till},
character::complete::space0,
combinator::opt, combinator::opt,
error::ParseError, error::ParseError,
sequence::delimited, sequence::delimited,
@ -67,6 +68,7 @@ pub fn parse_keyword(input: &str) -> Option<(&str, (&str, Option<&str>, &str, us
fn parse_keyword_internal<'a, E: ParseError<&'a str>>( fn parse_keyword_internal<'a, E: ParseError<&'a str>>(
input: &'a str, input: &'a str,
) -> IResult<&str, (&str, Option<&str>, &str, usize), E> { ) -> IResult<&str, (&str, Option<&str>, &str, usize), E> {
let (input, _) = space0(input)?;
let (input, _) = tag("#+")(input)?; let (input, _) = tag("#+")(input)?;
let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?; let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
let (input, optional) = opt(delimited( let (input, optional) = opt(delimited(

View file

@ -1,4 +1,6 @@
use nom::{bytes::complete::take_while_m_n, error::ParseError, IResult}; use nom::{
bytes::complete::take_while_m_n, character::complete::space0, error::ParseError, IResult,
};
use crate::parsers::{blank_lines, eol}; use crate::parsers::{blank_lines, eol};
@ -18,6 +20,7 @@ impl Rule {
} }
fn parse_rule<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Rule, E> { fn parse_rule<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Rule, E> {
let (input, _) = space0(input)?;
let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?; let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?;
let (input, _) = eol(input)?; let (input, _) = eol(input)?;
let (input, blank) = blank_lines(input); let (input, blank) = blank_lines(input);

View file

@ -1,12 +1,8 @@
use std::borrow::Cow; use std::borrow::Cow;
use nom::{ use memchr::memchr;
combinator::{peek, verify},
error::ParseError,
IResult,
};
use crate::parsers::{line, take_lines_while}; use crate::parsers::{blank_lines, take_lines_while};
/// Table Elemenet /// Table Elemenet
#[derive(Debug)] #[derive(Debug)]
@ -16,20 +12,64 @@ use crate::parsers::{line, take_lines_while};
pub enum Table<'a> { pub enum Table<'a> {
/// "org" type table /// "org" type table
#[cfg_attr(feature = "ser", serde(rename = "org"))] #[cfg_attr(feature = "ser", serde(rename = "org"))]
Org { tblfm: Option<Cow<'a, str>> }, Org {
#[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))]
tblfm: Option<Cow<'a, str>>,
/// Numbers of blank lines between last table's line and next non-blank
/// line or buffer's end
post_blank: usize,
},
/// "table.el" type table /// "table.el" type table
#[cfg_attr(feature = "ser", serde(rename = "table.el"))] #[cfg_attr(feature = "ser", serde(rename = "table.el"))]
TableEl { value: Cow<'a, str> }, TableEl {
value: Cow<'a, str>,
/// Numbers of blank lines between last table's line and next non-blank
/// line or buffer's end
post_blank: usize,
},
} }
impl Table<'_> { impl Table<'_> {
pub fn parse_table_el(input: &str) -> Option<(&str, Table<'_>)> {
let first_line = memchr(b'\n', input.as_bytes())
.map(|i| input[0..i].trim())
.unwrap_or_else(|| input.trim());
// first line must be the "+-" string and followed by plus or minus signs
if !first_line.starts_with("+-")
|| first_line
.as_bytes()
.iter()
.any(|&c| c != b'+' && c != b'-')
{
return None;
}
let (input, content) = take_lines_while(|line| {
let line = line.trim_start();
line.starts_with('|') || line.starts_with('+')
})(input);
let (input, blank) = blank_lines(input);
Some((
input,
Table::TableEl {
value: content.into(),
post_blank: blank,
},
))
}
pub fn into_owned(self) -> Table<'static> { pub fn into_owned(self) -> Table<'static> {
match self { match self {
Table::Org { tblfm } => Table::Org { Table::Org { tblfm, post_blank } => Table::Org {
tblfm: tblfm.map(Into::into).map(Cow::Owned), tblfm: tblfm.map(Into::into).map(Cow::Owned),
post_blank: post_blank,
}, },
Table::TableEl { value } => Table::TableEl { Table::TableEl { value, post_blank } => Table::TableEl {
value: value.into_owned().into(), value: value.into_owned().into(),
post_blank: post_blank,
}, },
} }
} }
@ -46,57 +86,28 @@ pub enum TableRow {
Rule, Rule,
} }
impl TableRow {
pub(crate) fn parse(input: &str) -> Option<TableRow> {
if input.starts_with("|-") {
Some(TableRow::Rule)
} else if input.starts_with('|') {
Some(TableRow::Standard)
} else {
None
}
}
}
pub fn parse_table_el(input: &str) -> Option<(&str, &str)> {
parse_table_el_internal::<()>(input).ok()
}
fn parse_table_el_internal<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, &'a str, E> {
let (input, _) = peek(verify(line, |s: &str| {
let s = s.trim();
s.starts_with("+-") && s.as_bytes().iter().all(|&c| c == b'+' || c == b'-')
}))(input)?;
let (input, content) =
take_lines_while(|line| line.starts_with('|') || line.starts_with('+'))(input);
Ok((input, content))
}
#[test] #[test]
fn parse_table_el_() { fn parse_table_el_() {
use nom::error::VerboseError;
assert_eq!( assert_eq!(
parse_table_el_internal::<VerboseError<&str>>( Table::parse_table_el(
r#"+---+ r#" +---+
| | | |
+---+ +---+
"# "#
), ),
Ok(( Some((
r#" "",
"#, Table::TableEl {
r#"+---+ value: r#" +---+
| | | |
+---+ +---+
"# "#
.into(),
post_blank: 1
}
)) ))
); );
assert!(parse_table_el_internal::<VerboseError<&str>>("").is_err()); assert!(Table::parse_table_el("").is_none());
assert!(parse_table_el_internal::<VerboseError<&str>>("+----|---").is_err()); assert!(Table::parse_table_el("+----|---").is_none());
} }

View file

@ -12,11 +12,10 @@ use nom::{bytes::complete::take_while1, combinator::verify, error::ParseError, I
use crate::config::ParseConfig; use crate::config::ParseConfig;
use crate::elements::{ use crate::elements::{
block::parse_block_element, emphasis::parse_emphasis, keyword::parse_keyword, block::parse_block_element, emphasis::parse_emphasis, keyword::parse_keyword,
radio_target::parse_radio_target, table::parse_table_el, BabelCall, CenterBlock, Clock, radio_target::parse_radio_target, BabelCall, CenterBlock, Clock, Comment, CommentBlock, Cookie,
Comment, CommentBlock, Cookie, Drawer, DynBlock, Element, ExampleBlock, ExportBlock, Drawer, DynBlock, Element, ExampleBlock, ExportBlock, FixedWidth, FnDef, FnRef, InlineCall,
FixedWidth, FnDef, FnRef, InlineCall, InlineSrc, Keyword, Link, List, ListItem, Macros, InlineSrc, Keyword, Link, List, ListItem, Macros, QuoteBlock, Rule, Snippet, SourceBlock,
QuoteBlock, Rule, Snippet, SourceBlock, SpecialBlock, Table, TableRow, Target, Timestamp, SpecialBlock, Table, TableRow, Target, Timestamp, Title, VerseBlock,
Title, VerseBlock,
}; };
pub trait ElementArena<'a> { pub trait ElementArena<'a> {
@ -268,20 +267,21 @@ pub fn parse_block<'a, T: ElementArena<'a>>(
parent: NodeId, parent: NodeId,
containers: &mut Vec<Container<'a>>, containers: &mut Vec<Container<'a>>,
) -> Option<&'a str> { ) -> Option<&'a str> {
// footnote definitions must be start at column 0 match contents
if let Some((tail, (fn_def, content))) = FnDef::parse(contents) { .as_bytes()
let node = arena.append(fn_def, parent); .iter()
containers.push(Container::Block { content, node }); .find(|c| !c.is_ascii_whitespace())?
return Some(tail); {
} b'[' => {
let (tail, (fn_def, content)) = FnDef::parse(contents)?;
if let Some(tail) = parse_list(arena, contents, parent, containers) { let node = arena.append(fn_def, parent);
return Some(tail); containers.push(Container::Block { content, node });
} Some(tail)
}
let contents = contents.trim_start(); b'0'..=b'9' | b'*' => {
let tail = parse_list(arena, contents, parent, containers)?;
match contents.as_bytes().get(0)? { Some(tail)
}
b'C' => { b'C' => {
let (tail, clock) = Clock::parse(contents)?; let (tail, clock) = Clock::parse(contents)?;
arena.append(clock, parent); arena.append(clock, parent);
@ -292,9 +292,13 @@ pub fn parse_block<'a, T: ElementArena<'a>>(
None None
} }
b'-' => { b'-' => {
let (tail, rule) = Rule::parse(contents)?; if let Some((tail, rule)) = Rule::parse(contents) {
arena.append(rule, parent); arena.append(rule, parent);
Some(tail) Some(tail)
} else {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
} }
b':' => { b':' => {
if let Some((tail, (drawer, content))) = Drawer::parse(contents) { if let Some((tail, (drawer, content))) = Drawer::parse(contents) {
@ -308,9 +312,18 @@ pub fn parse_block<'a, T: ElementArena<'a>>(
} }
} }
b'|' => { b'|' => {
let tail = parse_table(arena, contents, containers, parent)?; let tail = parse_org_table(arena, contents, containers, parent);
Some(tail) Some(tail)
} }
b'+' => {
if let Some((tail, table)) = Table::parse_table_el(contents) {
arena.append(table, parent);
Some(tail)
} else {
let tail = parse_list(arena, contents, parent, containers)?;
Some(tail)
}
}
b'#' => { b'#' => {
if let Some((tail, (name, args, content, blank))) = parse_block_element(contents) { if let Some((tail, (name, args, content, blank))) = parse_block_element(contents) {
match_block( match_block(
@ -687,45 +700,39 @@ pub fn parse_list<'a, T: ElementArena<'a>>(
Some(tail) Some(tail)
} }
pub fn parse_table<'a, T: ElementArena<'a>>( pub fn parse_org_table<'a, T: ElementArena<'a>>(
arena: &mut T, arena: &mut T,
contents: &'a str, contents: &'a str,
containers: &mut Vec<Container<'a>>, containers: &mut Vec<Container<'a>>,
parent: NodeId, parent: NodeId,
) -> Option<&'a str> { ) -> &'a str {
if contents.trim_start().starts_with('|') { let (tail, contents) = take_lines_while(|line| line.trim_start().starts_with('|'))(contents);
let table_node = arena.append(Table::Org { tblfm: None }, parent); let (tail, blank) = blank_lines(tail);
let mut last_end = 0; let parent = arena.append(
for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) { Table::Org {
let line = contents[last_end..start].trim(); tblfm: None,
match TableRow::parse(line) { post_blank: blank,
Some(TableRow::Standard) => { },
let row_node = arena.append(TableRow::Standard, table_node); parent,
for cell in line[1..].split_terminator('|') { );
let cell_node = arena.append(Element::TableCell, row_node);
containers.push(Container::Inline { let mut last_end = 0;
content: cell.trim(), for start in memchr_iter(b'\n', contents.as_bytes()).chain(once(contents.len())) {
node: cell_node, let line = contents[last_end..start].trim_start();
}); if line.starts_with("|-") {
} arena.append(TableRow::Rule, parent);
} } else {
Some(TableRow::Rule) => { let parent = arena.append(TableRow::Standard, parent);
arena.append(TableRow::Rule, table_node); for content in line.split_terminator('|').skip(1) {
} let node = arena.append(Element::TableCell, parent);
None => return Some(&contents[last_end..]), containers.push(Container::Inline { content, node });
} }
last_end = start + 1;
} }
last_end = start + 1;
Some("")
} else {
let (tail, value) = parse_table_el(contents)?;
let value = value.into();
arena.append(Table::TableEl { value }, parent);
Some(tail)
} }
tail
} }
pub fn line<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> { pub fn line<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> {