refactor(elements): minor refactoring

This commit is contained in:
PoiScript 2020-04-14 17:59:45 +08:00
parent 020548fad9
commit c2849d05fb
15 changed files with 619 additions and 492 deletions

View file

@ -8,7 +8,141 @@ use nom::{
IResult,
};
use crate::parsers::{blank_lines, line, take_lines_while};
use crate::elements::Element;
use crate::parse::combinators::{blank_lines_count, line, lines_till};
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct RawBlock<'a> {
pub name: &'a str,
pub arguments: &'a str,
pub pre_blank: usize,
pub contents: &'a str,
pub contents_without_blank_lines: &'a str,
pub post_blank: usize,
}
impl<'a> RawBlock<'a> {
pub fn parse(input: &'a str) -> Option<(&str, RawBlock)> {
Self::parse_internal::<()>(input).ok()
}
fn parse_internal<E>(input: &'a str) -> IResult<&str, RawBlock, E>
where
E: ParseError<&'a str>,
{
let (input, _) = space0(input)?;
let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
let (input, arguments) = line(input)?;
let end_line = format!("#+END_{}", name);
let (input, contents) =
lines_till(|line| line.trim().eq_ignore_ascii_case(&end_line))(input)?;
dbg!(contents);
let (contents_without_blank_lines, pre_blank) = blank_lines_count(contents)?;
dbg!(contents_without_blank_lines);
dbg!(pre_blank);
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
RawBlock {
name,
contents,
arguments: arguments.trim(),
pre_blank,
contents_without_blank_lines,
post_blank,
},
))
}
pub fn into_element(self) -> (Element<'a>, &'a str) {
let RawBlock {
name,
contents,
arguments,
pre_blank,
contents_without_blank_lines,
post_blank,
} = self;
let arguments: Option<Cow<'a, str>> = if arguments.is_empty() {
None
} else {
Some(arguments.into())
};
let element = match &*name.to_uppercase() {
"CENTER" => CenterBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"QUOTE" => QuoteBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"VERSE" => VerseBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"COMMENT" => CommentBlock {
data: arguments,
contents: contents.into(),
post_blank,
}
.into(),
"EXAMPLE" => ExampleBlock {
data: arguments,
contents: contents.into(),
post_blank,
}
.into(),
"EXPORT" => ExportBlock {
data: arguments.unwrap_or_default(),
contents: contents.into(),
post_blank,
}
.into(),
"SRC" => {
let (language, arguments) = match &arguments {
Some(Cow::Borrowed(args)) => {
let (language, arguments) =
args.split_at(args.find(' ').unwrap_or_else(|| args.len()));
(language.into(), arguments.into())
}
None => (Cow::Borrowed(""), Cow::Borrowed("")),
_ => unreachable!(
"`parse_block_element` returns `Some(Cow::Borrowed)` or `None`"
),
};
SourceBlock {
arguments,
language,
contents: contents.into(),
post_blank,
}
.into()
}
_ => SpecialBlock {
parameters: arguments,
name: name.into(),
pre_blank,
post_blank,
}
.into(),
};
(element, contents_without_blank_lines)
}
}
/// Special Block Element
#[derive(Debug)]
@ -220,59 +354,48 @@ impl SourceBlock<'_> {
// TODO: fn retain_labels() -> bool { }
}
#[inline]
pub fn parse_block_element(input: &str) -> Option<(&str, (&str, Option<&str>, &str, usize))> {
parse_block_element_internal::<()>(input).ok()
}
#[inline]
fn parse_block_element_internal<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (&str, Option<&str>, &str, usize), E> {
let (input, _) = space0(input)?;
let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
let (input, args) = line(input)?;
let end_line = format!("#+END_{}", name);
let (input, contents) =
take_lines_while(|line| !line.trim().eq_ignore_ascii_case(&end_line))(input);
let (input, _) = line(input)?;
let (input, blank) = blank_lines(input);
Ok((
input,
(
name,
if args.trim().is_empty() {
None
} else {
Some(args.trim())
},
contents,
blank,
),
))
}
#[test]
fn parse() {
use nom::error::VerboseError;
assert_eq!(
parse_block_element_internal::<VerboseError<&str>>(
RawBlock::parse_internal::<VerboseError<&str>>(
r#"#+BEGIN_SRC
#+END_SRC"#
),
Ok(("", ("SRC".into(), None, "", 0)))
Ok((
"",
RawBlock {
contents: "",
contents_without_blank_lines: "",
pre_blank: 0,
post_blank: 0,
name: "SRC".into(),
arguments: ""
}
))
);
assert_eq!(
parse_block_element_internal::<VerboseError<&str>>(
RawBlock::parse_internal::<VerboseError<&str>>(
r#"#+begin_src
#+end_src"#
),
Ok(("", ("src".into(), None, "", 0)))
Ok((
"",
RawBlock {
contents: "",
contents_without_blank_lines: "",
pre_blank: 0,
post_blank: 0,
name: "src".into(),
arguments: ""
}
))
);
assert_eq!(
parse_block_element_internal::<VerboseError<&str>>(
RawBlock::parse_internal::<VerboseError<&str>>(
r#"#+BEGIN_SRC javascript
console.log('Hello World!');
#+END_SRC
@ -281,12 +404,14 @@ console.log('Hello World!');
),
Ok((
"",
(
"SRC".into(),
Some("javascript".into()),
"console.log('Hello World!');\n",
1
)
RawBlock {
contents: "console.log('Hello World!');\n",
contents_without_blank_lines: "console.log('Hello World!');\n",
pre_blank: 0,
post_blank: 1,
name: "SRC".into(),
arguments: "javascript"
}
))
);
// TODO: more testing

View file

@ -10,8 +10,7 @@ use nom::{
};
use crate::elements::timestamp::{parse_inactive, Datetime, Timestamp};
use crate::parsers::{blank_lines, eol};
use crate::parse::combinators::{blank_lines_count, eol};
/// Clock Element
#[cfg_attr(test, derive(PartialEq))]
@ -156,7 +155,7 @@ fn parse_clock<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Cloc
let (input, _) = space0(input)?;
let (input, duration) = recognize(separated_pair(digit1, char(':'), digit1))(input)?;
let (input, _) = eol(input)?;
let (input, blank) = blank_lines(input);
let (input, blank) = blank_lines_count(input)?;
Ok((
input,
Clock::Closed {
@ -175,7 +174,7 @@ fn parse_clock<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Cloc
delay,
} => {
let (input, _) = eol(input)?;
let (input, blank) = blank_lines(input);
let (input, blank) = blank_lines_count(input)?;
Ok((
input,
Clock::Running {

View file

@ -1,6 +1,11 @@
use std::borrow::Cow;
use crate::parsers::{blank_lines, take_lines_while};
use nom::{
error::{ErrorKind, ParseError},
Err, IResult,
};
use crate::parse::combinators::{blank_lines_count, lines_while};
#[derive(Debug, Default)]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
@ -13,22 +18,31 @@ pub struct Comment<'a> {
}
impl Comment<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Comment<'_>)> {
let (input, value) = take_lines_while(|line| {
pub(crate) fn parse(input: &str) -> Option<(&str, Comment)> {
Self::parse_internal::<()>(input).ok()
}
fn parse_internal<'a, E>(input: &'a str) -> IResult<&str, Comment, E>
where
E: ParseError<&'a str>,
{
let (input, value) = lines_while(|line| {
let line = line.trim_start();
line == "#" || line.starts_with("# ")
})(input);
let (input, blank) = blank_lines(input);
})(input)?;
if value.is_empty() {
return None;
// TODO: better error kind
return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0)));
}
Some((
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
Comment {
value: value.into(),
post_blank: blank,
post_blank,
},
))
}

View file

@ -8,7 +8,7 @@ use nom::{
IResult,
};
use crate::parsers::{blank_lines, eol, line, take_lines_while};
use crate::parse::combinators::{blank_lines_count, eol, lines_till};
/// Drawer Element
#[derive(Debug, Default)]
@ -40,23 +40,25 @@ impl Drawer<'_> {
}
#[inline]
pub fn parse_drawer<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (Drawer, &str), E> {
pub fn parse_drawer<'a, E>(input: &'a str) -> IResult<&str, (Drawer, &str), E>
where
E: ParseError<&'a str>,
{
let (input, (mut drawer, content)) = parse_drawer_without_blank(input)?;
let (content, blank) = blank_lines(content);
let (content, blank) = blank_lines_count(content)?;
drawer.pre_blank = blank;
let (input, blank) = blank_lines(input);
let (input, blank) = blank_lines_count(input)?;
drawer.post_blank = blank;
Ok((input, (drawer, content)))
}
pub fn parse_drawer_without_blank<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (Drawer, &str), E> {
pub fn parse_drawer_without_blank<'a, E>(input: &'a str) -> IResult<&str, (Drawer, &str), E>
where
E: ParseError<&'a str>,
{
let (input, _) = space0(input)?;
let (input, name) = delimited(
tag(":"),
@ -64,9 +66,7 @@ pub fn parse_drawer_without_blank<'a, E: ParseError<&'a str>>(
tag(":"),
)(input)?;
let (input, _) = eol(input)?;
let (input, contents) =
take_lines_while(|line| !line.trim().eq_ignore_ascii_case(":END:"))(input);
let (input, _) = line(input)?;
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input)?;
Ok((
input,
@ -124,4 +124,7 @@ fn parse() {
)
))
);
// https://github.com/PoiScript/orgize/issues/9
assert!(parse_drawer::<()>(":SPAGHETTI:\n").is_err());
}

View file

@ -7,7 +7,7 @@ use nom::{
IResult,
};
use crate::parsers::{blank_lines, line, take_lines_while};
use crate::parse::combinators::{blank_lines_count, line, lines_till};
/// Dynamic Block Element
#[derive(Debug, Default)]
@ -43,19 +43,18 @@ impl DynBlock<'_> {
}
#[inline]
fn parse_dyn_block<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (DynBlock, &str), E> {
fn parse_dyn_block<'a, E>(input: &'a str) -> IResult<&str, (DynBlock, &str), E>
where
E: ParseError<&'a str>,
{
let (input, _) = space0(input)?;
let (input, _) = tag_no_case("#+BEGIN:")(input)?;
let (input, _) = space1(input)?;
let (input, name) = alpha1(input)?;
let (input, args) = line(input)?;
let (input, contents) =
take_lines_while(|line| !line.trim().eq_ignore_ascii_case("#+END:"))(input);
let (contents, pre_blank) = blank_lines(contents);
let (input, _) = line(input)?;
let (input, post_blank) = blank_lines(input);
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case("#+END:"))(input)?;
let (contents, pre_blank) = blank_lines_count(contents)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,

View file

@ -1,25 +1,55 @@
use bytecount::count;
use memchr::memchr_iter;
#[inline]
pub(crate) fn parse_emphasis(text: &str, marker: u8) -> Option<(&str, &str)> {
debug_assert!(text.len() >= 3);
use crate::elements::Element;
let bytes = text.as_bytes();
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct Emphasis<'a> {
marker: u8,
contents: &'a str,
}
if bytes[1].is_ascii_whitespace() {
return None;
}
for i in memchr_iter(marker, bytes).skip(1) {
if count(&bytes[1..i], b'\n') >= 2 {
break;
} else if validate_marker(i, text) {
return Some((&text[i + 1..], &text[1..i]));
impl<'a> Emphasis<'a> {
pub fn parse(text: &str, marker: u8) -> Option<(&str, Emphasis)> {
debug_assert!(text.len() >= 3);
let bytes = text.as_bytes();
if bytes[1].is_ascii_whitespace() {
return None;
}
for i in memchr_iter(marker, bytes).skip(1) {
if count(&bytes[1..i], b'\n') >= 2 {
break;
} else if validate_marker(i, text) {
return Some((
&text[i + 1..],
Emphasis {
marker,
contents: &text[1..i],
},
));
}
}
None
}
None
pub fn into_element(self) -> (Element<'a>, &'a str) {
let Emphasis { marker, contents } = self;
let element = match marker {
b'*' => Element::Bold,
b'+' => Element::Strike,
b'/' => Element::Italic,
b'_' => Element::Underline,
b'=' => Element::Verbatim {
value: contents.into(),
},
b'~' => Element::Code {
value: contents.into(),
},
_ => unreachable!(),
};
(element, contents)
}
}
fn validate_marker(pos: usize, text: &str) -> bool {
@ -37,12 +67,39 @@ fn validate_marker(pos: usize, text: &str) -> bool {
#[test]
fn parse() {
assert_eq!(parse_emphasis("*bold*", b'*'), Some(("", "bold")));
assert_eq!(parse_emphasis("*bo*ld*", b'*'), Some(("", "bo*ld")));
assert_eq!(parse_emphasis("*bo\nld*", b'*'), Some(("", "bo\nld")));
assert_eq!(parse_emphasis("*bold*a", b'*'), None);
assert_eq!(parse_emphasis("*bold*", b'/'), None);
assert_eq!(parse_emphasis("*bold *", b'*'), None);
assert_eq!(parse_emphasis("* bold*", b'*'), None);
assert_eq!(parse_emphasis("*b\nol\nd*", b'*'), None);
assert_eq!(
Emphasis::parse("*bold*", b'*'),
Some((
"",
Emphasis {
contents: "bold",
marker: b'*'
}
))
);
assert_eq!(
Emphasis::parse("*bo*ld*", b'*'),
Some((
"",
Emphasis {
contents: "bo*ld",
marker: b'*'
}
))
);
assert_eq!(
Emphasis::parse("*bo\nld*", b'*'),
Some((
"",
Emphasis {
contents: "bo\nld",
marker: b'*'
}
))
);
assert_eq!(Emphasis::parse("*bold*a", b'*'), None);
assert_eq!(Emphasis::parse("*bold*", b'/'), None);
assert_eq!(Emphasis::parse("*bold *", b'*'), None);
assert_eq!(Emphasis::parse("* bold*", b'*'), None);
assert_eq!(Emphasis::parse("*b\nol\nd*", b'*'), None);
}

View file

@ -1,6 +1,11 @@
use std::borrow::Cow;
use crate::parsers::{blank_lines, take_lines_while};
use nom::{
error::{ErrorKind, ParseError},
Err, IResult,
};
use crate::parse::combinators::{blank_lines_count, lines_while};
#[derive(Debug, Default)]
#[cfg_attr(test, derive(PartialEq))]
@ -14,22 +19,31 @@ pub struct FixedWidth<'a> {
}
impl FixedWidth<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth<'_>)> {
let (input, value) = take_lines_while(|line| {
pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth)> {
Self::parse_internal::<()>(input).ok()
}
fn parse_internal<'a, E>(input: &'a str) -> IResult<&str, FixedWidth, E>
where
E: ParseError<&'a str>,
{
let (input, value) = lines_while(|line| {
let line = line.trim_start();
line == ":" || line.starts_with(": ")
})(input);
let (input, blank) = blank_lines(input);
})(input)?;
if value.is_empty() {
return None;
// TODO: better error kind
return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0)));
}
Some((
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
FixedWidth {
value: value.into(),
post_blank: blank,
post_blank,
},
))
}

View file

@ -7,7 +7,7 @@ use nom::{
IResult,
};
use crate::parsers::{blank_lines, line};
use crate::parse::combinators::{blank_lines_count, line};
/// Footnote Definition Element
#[cfg_attr(test, derive(PartialEq))]
@ -23,7 +23,33 @@ pub struct FnDef<'a> {
impl FnDef<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, (FnDef, &str))> {
parse_fn_def::<()>(input).ok()
Self::parse_internal::<()>(input).ok()
}
fn parse_internal<'a, E>(input: &'a str) -> IResult<&str, (FnDef, &str), E>
where
E: ParseError<&'a str>,
{
let (input, label) = delimited(
tag("[fn:"),
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
tag("]"),
)(input)?;
let (input, content) = line(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
(
FnDef {
label: label.into(),
post_blank,
},
content,
),
))
}
pub fn into_owned(self) -> FnDef<'static> {
@ -34,34 +60,12 @@ impl FnDef<'_> {
}
}
#[inline]
fn parse_fn_def<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, (FnDef, &str), E> {
let (input, label) = delimited(
tag("[fn:"),
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
tag("]"),
)(input)?;
let (input, content) = line(input)?;
let (input, blank) = blank_lines(input);
Ok((
input,
(
FnDef {
label: label.into(),
post_blank: blank,
},
content,
),
))
}
#[test]
fn parse() {
use nom::error::VerboseError;
assert_eq!(
parse_fn_def::<VerboseError<&str>>("[fn:1] https://orgmode.org"),
FnDef::parse_internal::<VerboseError<&str>>("[fn:1] https://orgmode.org"),
Ok((
"",
(
@ -74,7 +78,7 @@ fn parse() {
))
);
assert_eq!(
parse_fn_def::<VerboseError<&str>>("[fn:word_1] https://orgmode.org"),
FnDef::parse_internal::<VerboseError<&str>>("[fn:word_1] https://orgmode.org"),
Ok((
"",
(
@ -87,7 +91,7 @@ fn parse() {
))
);
assert_eq!(
parse_fn_def::<VerboseError<&str>>("[fn:WORD-1] https://orgmode.org"),
FnDef::parse_internal::<VerboseError<&str>>("[fn:WORD-1] https://orgmode.org"),
Ok((
"",
(
@ -100,7 +104,7 @@ fn parse() {
))
);
assert_eq!(
parse_fn_def::<VerboseError<&str>>("[fn:WORD]"),
FnDef::parse_internal::<VerboseError<&str>>("[fn:WORD]"),
Ok((
"",
(
@ -113,7 +117,7 @@ fn parse() {
))
);
assert!(parse_fn_def::<VerboseError<&str>>("[fn:] https://orgmode.org").is_err());
assert!(parse_fn_def::<VerboseError<&str>>("[fn:wor d] https://orgmode.org").is_err());
assert!(parse_fn_def::<VerboseError<&str>>("[fn:WORD https://orgmode.org").is_err());
assert!(FnDef::parse_internal::<VerboseError<&str>>("[fn:] https://orgmode.org").is_err());
assert!(FnDef::parse_internal::<VerboseError<&str>>("[fn:wor d] https://orgmode.org").is_err());
assert!(FnDef::parse_internal::<VerboseError<&str>>("[fn:WORD https://orgmode.org").is_err());
}

View file

@ -9,7 +9,76 @@ use nom::{
IResult,
};
use crate::parsers::{blank_lines, line};
use crate::elements::Element;
use crate::parse::combinators::{blank_lines_count, line};
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct RawKeyword<'a> {
pub key: &'a str,
pub value: &'a str,
pub optional: Option<&'a str>,
pub post_blank: usize,
}
impl<'a> RawKeyword<'a> {
pub fn parse(input: &'a str) -> Option<(&str, RawKeyword)> {
Self::parse_internal::<()>(input).ok()
}
fn parse_internal<E>(input: &'a str) -> IResult<&str, RawKeyword, E>
where
E: ParseError<&'a str>,
{
let (input, _) = space0(input)?;
let (input, _) = tag("#+")(input)?;
let (input, key) =
take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
let (input, optional) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
let (input, _) = tag(":")(input)?;
let (input, value) = line(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
RawKeyword {
key,
optional,
value: value.trim(),
post_blank,
},
))
}
pub fn into_element(self) -> Element<'a> {
let RawKeyword {
key,
value,
optional,
post_blank,
} = self;
if (&*key).eq_ignore_ascii_case("CALL") {
BabelCall {
value: value.into(),
post_blank,
}
.into()
} else {
Keyword {
key: key.into(),
optional: optional.map(Into::into),
value: value.into(),
post_blank,
}
.into()
}
}
}
/// Keyword Element
#[cfg_attr(test, derive(PartialEq))]
@ -39,9 +108,9 @@ impl Keyword<'_> {
}
/// Babel Call Element
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug)]
pub struct BabelCall<'a> {
/// Babel call value
pub value: Cow<'a, str>,
@ -59,70 +128,112 @@ impl BabelCall<'_> {
}
}
#[inline]
pub fn parse_keyword(input: &str) -> Option<(&str, (&str, Option<&str>, &str, usize))> {
parse_keyword_internal::<()>(input).ok()
}
#[inline]
fn parse_keyword_internal<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (&str, Option<&str>, &str, usize), E> {
let (input, _) = space0(input)?;
let (input, _) = tag("#+")(input)?;
let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
let (input, optional) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
let (input, _) = tag(":")(input)?;
let (input, value) = line(input)?;
let (input, blank) = blank_lines(input);
Ok((input, (key, optional, value.trim(), blank)))
}
#[test]
fn parse() {
use nom::error::VerboseError;
assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+KEY:"),
Ok(("", ("KEY", None, "", 0)))
RawKeyword::parse_internal::<VerboseError<&str>>("#+KEY:"),
Ok((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "",
post_blank: 0
}
))
);
assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+KEY: VALUE"),
Ok(("", ("KEY", None, "VALUE", 0)))
RawKeyword::parse_internal::<VerboseError<&str>>("#+KEY: VALUE"),
Ok((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "VALUE",
post_blank: 0
}
))
);
assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+K_E_Y: VALUE"),
Ok(("", ("K_E_Y", None, "VALUE", 0)))
RawKeyword::parse_internal::<VerboseError<&str>>("#+K_E_Y: VALUE"),
Ok((
"",
RawKeyword {
key: "K_E_Y",
optional: None,
value: "VALUE",
post_blank: 0
}
))
);
assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+KEY:VALUE\n"),
Ok(("", ("KEY", None, "VALUE", 0)))
RawKeyword::parse_internal::<VerboseError<&str>>("#+KEY:VALUE\n"),
Ok((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "VALUE",
post_blank: 0
}
))
);
assert!(parse_keyword_internal::<VerboseError<&str>>("#+KE Y: VALUE").is_err());
assert!(parse_keyword_internal::<VerboseError<&str>>("#+ KEY: VALUE").is_err());
assert!(RawKeyword::parse_internal::<VerboseError<&str>>("#+KE Y: VALUE").is_err());
assert!(RawKeyword::parse_internal::<VerboseError<&str>>("#+ KEY: VALUE").is_err());
assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+RESULTS:"),
Ok(("", ("RESULTS", None, "", 0)))
RawKeyword::parse_internal::<VerboseError<&str>>("#+RESULTS:"),
Ok((
"",
RawKeyword {
key: "RESULTS",
optional: None,
value: "",
post_blank: 0
}
))
);
assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+ATTR_LATEX: :width 5cm\n"),
Ok(("", ("ATTR_LATEX", None, ":width 5cm", 0)))
RawKeyword::parse_internal::<VerboseError<&str>>("#+ATTR_LATEX: :width 5cm\n"),
Ok((
"",
RawKeyword {
key: "ATTR_LATEX",
optional: None,
value: ":width 5cm",
post_blank: 0
}
))
);
assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+CALL: double(n=4)"),
Ok(("", ("CALL", None, "double(n=4)", 0)))
RawKeyword::parse_internal::<VerboseError<&str>>("#+CALL: double(n=4)"),
Ok((
"",
RawKeyword {
key: "CALL",
optional: None,
value: "double(n=4)",
post_blank: 0
}
))
);
assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+CAPTION[Short caption]: Longer caption."),
Ok(("", ("CAPTION", Some("Short caption"), "Longer caption.", 0)))
RawKeyword::parse_internal::<VerboseError<&str>>(
"#+CAPTION[Short caption]: Longer caption."
),
Ok((
"",
RawKeyword {
key: "CAPTION",
optional: Some("Short caption"),
value: "Longer caption.",
post_blank: 0
}
))
);
}

View file

@ -2,7 +2,7 @@ use nom::{
bytes::complete::take_while_m_n, character::complete::space0, error::ParseError, IResult,
};
use crate::parsers::{blank_lines, eol};
use crate::parse::combinators::{blank_lines_count, eol};
#[derive(Debug, Default)]
#[cfg_attr(test, derive(PartialEq))]
@ -19,12 +19,15 @@ impl Rule {
}
}
fn parse_rule<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Rule, E> {
fn parse_rule<'a, E>(input: &'a str) -> IResult<&str, Rule, E>
where
E: ParseError<&'a str>,
{
let (input, _) = space0(input)?;
let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?;
let (input, _) = eol(input)?;
let (input, blank) = blank_lines(input);
Ok((input, Rule { post_blank: blank }))
let (input, post_blank) = blank_lines_count(input)?;
Ok((input, Rule { post_blank }))
}
#[test]

View file

@ -1,8 +1,11 @@
use std::borrow::Cow;
use memchr::memchr;
use nom::{
error::{ErrorKind, ParseError},
Err, IResult,
};
use crate::parsers::{blank_lines, take_lines_while};
use crate::parse::combinators::{blank_lines_count, line, lines_while};
/// Table Element
#[derive(Debug)]
@ -31,33 +34,42 @@ pub enum Table<'a> {
}
impl Table<'_> {
pub fn parse_table_el(input: &str) -> Option<(&str, Table<'_>)> {
let first_line = memchr(b'\n', input.as_bytes())
.map(|i| input[0..i].trim())
.unwrap_or_else(|| input.trim());
pub fn parse_table_el(input: &str) -> Option<(&str, Table)> {
Self::parse_table_el_internal::<()>(input).ok()
}
// first line must be the "+-" string and followed by plus or minus signs
fn parse_table_el_internal<'a, E>(input: &'a str) -> IResult<&str, Table, E>
where
E: ParseError<&'a str>,
{
let (_, first_line) = line(input)?;
let first_line = first_line.trim();
// Table.el tables start at lines beginning with "+-" string and followed by plus or minus signs
if !first_line.starts_with("+-")
|| first_line
.as_bytes()
.iter()
.any(|&c| c != b'+' && c != b'-')
{
return None;
// TODO: better error kind
return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0)));
}
let (input, content) = take_lines_while(|line| {
// Table.el tables end at the first line not starting with either a vertical line or a plus sign.
let (input, content) = lines_while(|line| {
let line = line.trim_start();
line.starts_with('|') || line.starts_with('+')
})(input);
})(input)?;
let (input, blank) = blank_lines(input);
let (input, post_blank) = blank_lines_count(input)?;
Some((
Ok((
input,
Table::TableEl {
value: content.into(),
post_blank: blank,
post_blank,
},
))
}
@ -70,12 +82,12 @@ impl Table<'_> {
has_header,
} => Table::Org {
tblfm: tblfm.map(Into::into).map(Cow::Owned),
post_blank: post_blank,
has_header: has_header,
post_blank,
has_header,
},
Table::TableEl { value, post_blank } => Table::TableEl {
value: value.into_owned().into(),
post_blank: post_blank,
post_blank,
},
}
}

View file

@ -17,7 +17,7 @@ use nom::{
use crate::{
config::ParseConfig,
elements::{drawer::parse_drawer_without_blank, Planning, Timestamp},
parsers::{blank_lines, line, skip_empty_lines, take_one_word},
parse::combinators::{blank_lines_count, line, one_word},
};
/// Title Element
@ -123,17 +123,20 @@ impl Default for Title<'_> {
}
#[inline]
fn parse_title<'a, E: ParseError<&'a str>>(
fn parse_title<'a, E>(
input: &'a str,
config: &ParseConfig,
) -> IResult<&'a str, (Title<'a>, &'a str), E> {
) -> IResult<&'a str, (Title<'a>, &'a str), E>
where
E: ParseError<&'a str>,
{
let (input, level) = map(take_while(|c: char| c == '*'), |s: &str| s.len())(input)?;
debug_assert!(level > 0);
let (input, keyword) = opt(preceded(
space1,
verify(take_one_word, |s: &str| {
verify(one_word, |s: &str| {
config.todo_keywords.0.iter().any(|x| x == s)
|| config.todo_keywords.1.iter().any(|x| x == s)
}),
@ -142,7 +145,7 @@ fn parse_title<'a, E: ParseError<&'a str>>(
let (input, priority) = opt(preceded(
space1,
map_parser(
take_one_word,
one_word,
delimited(
tag("[#"),
verify(anychar, |c: &char| c.is_ascii_uppercase()),
@ -168,7 +171,7 @@ fn parse_title<'a, E: ParseError<&'a str>>(
.unwrap_or((input, None));
let (input, properties) = opt(parse_properties_drawer)(input)?;
let (input, blank) = blank_lines(input);
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
@ -181,7 +184,7 @@ fn parse_title<'a, E: ParseError<&'a str>>(
tags,
raw: raw.into(),
planning,
post_blank: blank,
post_blank,
},
raw,
),
@ -211,7 +214,8 @@ fn parse_properties_drawer<'a, E: ParseError<&'a str>>(
fn parse_node_property<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (&str, &str), E> {
let input = skip_empty_lines(input).trim_start();
let (input, _) = blank_lines_count(input)?;
let input = input.trim_start();
let (input, name) = map(delimited(tag(":"), take_until(":"), tag(":")), |s: &str| {
s.trim_end_matches('+')
})(input)?;

View file

@ -225,6 +225,7 @@ pub mod elements;
pub mod export;
mod headline;
mod org;
mod parse;
mod parsers;
mod validate;

View file

@ -6,7 +6,7 @@ use crate::{
config::{ParseConfig, DEFAULT_CONFIG},
elements::{Element, Keyword},
export::{DefaultHtmlHandler, DefaultOrgHandler, HtmlHandler, OrgHandler},
parsers::{blank_lines, parse_container, Container, OwnedArena},
parsers::{blank_lines_count, parse_container, Container, OwnedArena},
};
pub struct Org<'a> {
@ -41,7 +41,7 @@ impl<'a> Org<'a> {
/// Parses string `text` into `Org` struct with custom `ParseConfig`.
pub fn parse_custom(text: &'a str, config: &ParseConfig) -> Org<'a> {
let mut arena = Arena::new();
let (text, pre_blank) = blank_lines(text);
let (text, pre_blank) = blank_lines_count(text);
let root = arena.new_node(Element::Document { pre_blank });
let mut org = Org { arena, root };
@ -62,7 +62,7 @@ impl<'a> Org<'a> {
/// Likes `parse_custom`, but accepts `String`.
pub fn parse_string_custom(text: String, config: &ParseConfig) -> Org<'static> {
let mut arena = Arena::new();
let (text, pre_blank) = blank_lines(&text);
let (text, pre_blank) = blank_lines_count(&text);
let root = arena.new_node(Element::Document { pre_blank });
let mut org = Org { arena, root };

View file

@ -1,20 +1,19 @@
use std::borrow::Cow;
use std::iter::once;
use std::marker::PhantomData;
use indextree::{Arena, NodeId};
use jetscii::{bytes, BytesConst};
use memchr::{memchr, memchr_iter};
use nom::{bytes::complete::take_while1, combinator::verify, error::ParseError, IResult};
use nom::bytes::complete::take_while1;
use crate::config::ParseConfig;
use crate::elements::{
block::parse_block_element, emphasis::parse_emphasis, keyword::parse_keyword,
radio_target::parse_radio_target, BabelCall, CenterBlock, Clock, Comment, CommentBlock, Cookie,
Drawer, DynBlock, Element, ExampleBlock, ExportBlock, FixedWidth, FnDef, FnRef, InlineCall,
InlineSrc, Keyword, Link, List, ListItem, Macros, QuoteBlock, Rule, Snippet, SourceBlock,
SpecialBlock, Table, TableCell, TableRow, Target, Timestamp, Title, VerseBlock,
block::RawBlock, emphasis::Emphasis, keyword::RawKeyword, radio_target::parse_radio_target,
Clock, Comment, Cookie, Drawer, DynBlock, Element, FixedWidth, FnDef, FnRef, InlineCall,
InlineSrc, Link, List, ListItem, Macros, Rule, Snippet, Table, TableCell, TableRow, Target,
Timestamp, Title,
};
use crate::parse::combinators::lines_while;
pub trait ElementArena<'a> {
fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId
@ -28,7 +27,9 @@ pub trait ElementArena<'a> {
T: Into<Element<'a>>;
}
impl<'a> ElementArena<'a> for Arena<Element<'a>> {
pub type BorrowedArena<'a> = Arena<Element<'a>>;
impl<'a> ElementArena<'a> for BorrowedArena<'a> {
fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId
where
T: Into<Element<'a>>,
@ -153,7 +154,8 @@ pub fn parse_section_and_headlines<'a, T: ElementArena<'a>>(
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let content = skip_empty_lines(content);
let content = blank_lines_count(content).0;
if content.is_empty() {
return;
}
@ -194,10 +196,10 @@ pub fn parse_blocks<'a, T: ElementArena<'a>>(
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = skip_empty_lines(content);
let mut tail = blank_lines_count(content).0;
if let Some(new_tail) = parse_block(content, arena, parent, containers) {
tail = skip_empty_lines(new_tail);
tail = blank_lines_count(new_tail).0;
}
let mut text = tail;
@ -208,13 +210,13 @@ pub fn parse_blocks<'a, T: ElementArena<'a>>(
.map(|i| i + 1)
.unwrap_or_else(|| tail.len());
if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) {
let (tail_, blank) = blank_lines(&tail[i..]);
let (tail_, blank) = blank_lines_count(&tail[i..]);
debug_assert_ne!(tail, tail_);
tail = tail_;
let node = arena.append(
Element::Paragraph {
// including current line (&tail[0..i])
// including the current line (&tail[0..i])
post_blank: blank + 1,
},
parent,
@ -239,8 +241,8 @@ pub fn parse_blocks<'a, T: ElementArena<'a>>(
pos = 0;
}
debug_assert_ne!(tail, skip_empty_lines(new_tail));
tail = skip_empty_lines(new_tail);
debug_assert_ne!(tail, blank_lines_count(new_tail).0);
tail = blank_lines_count(new_tail).0;
text = tail;
} else {
debug_assert_ne!(tail, &tail[i..]);
@ -323,41 +325,27 @@ pub fn parse_block<'a, T: ElementArena<'a>>(
}
}
b'#' => {
if let Some((tail, (name, args, content, blank))) = parse_block_element(contents) {
match_block(
arena,
parent,
containers,
name.into(),
args.map(Into::into),
content,
blank,
);
if let Some((tail, block)) = RawBlock::parse(contents) {
let (element, content) = block.into_element();
// avoid use after free
let is_block_container = match element {
Element::CenterBlock(_)
| Element::QuoteBlock(_)
| Element::VerseBlock(_)
| Element::SpecialBlock(_) => true,
_ => false,
};
let node = arena.append(element, parent);
if is_block_container {
containers.push(Container::Block { content, node });
}
Some(tail)
} else if let Some((tail, (dyn_block, content))) = DynBlock::parse(contents) {
let node = arena.append(dyn_block, parent);
containers.push(Container::Block { content, node });
Some(tail)
} else if let Some((tail, (key, optional, value, blank))) = parse_keyword(contents) {
if (&*key).eq_ignore_ascii_case("CALL") {
arena.append(
BabelCall {
value: value.into(),
post_blank: blank,
},
parent,
);
} else {
arena.append(
Keyword {
key: key.into(),
optional: optional.map(Into::into),
value: value.into(),
post_blank: blank,
},
parent,
);
}
} else if let Some((tail, keyword)) = RawKeyword::parse(contents) {
arena.append(keyword.into_element(), parent);
Some(tail)
} else {
let (tail, comment) = Comment::parse(contents)?;
@ -369,118 +357,6 @@ pub fn parse_block<'a, T: ElementArena<'a>>(
}
}
pub fn match_block<'a, T: ElementArena<'a>>(
arena: &mut T,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
name: Cow<'a, str>,
parameters: Option<Cow<'a, str>>,
content: &'a str,
post_blank: usize,
) {
match &*name.to_uppercase() {
"CENTER" => {
let (content, pre_blank) = blank_lines(content);
let node = arena.append(
CenterBlock {
parameters,
pre_blank,
post_blank,
},
parent,
);
containers.push(Container::Block { content, node });
}
"QUOTE" => {
let (content, pre_blank) = blank_lines(content);
let node = arena.append(
QuoteBlock {
parameters,
pre_blank,
post_blank,
},
parent,
);
containers.push(Container::Block { content, node });
}
"VERSE" => {
let (content, pre_blank) = blank_lines(content);
let node = arena.append(
VerseBlock {
parameters,
pre_blank,
post_blank,
},
parent,
);
containers.push(Container::Block { content, node });
}
"COMMENT" => {
arena.append(
CommentBlock {
data: parameters,
contents: content.into(),
post_blank,
},
parent,
);
}
"EXAMPLE" => {
arena.append(
ExampleBlock {
data: parameters,
contents: content.into(),
post_blank,
},
parent,
);
}
"EXPORT" => {
arena.append(
ExportBlock {
data: parameters.unwrap_or_default(),
contents: content.into(),
post_blank,
},
parent,
);
}
"SRC" => {
let (language, arguments) = match &parameters {
Some(Cow::Borrowed(args)) => {
let (language, arguments) =
args.split_at(args.find(' ').unwrap_or_else(|| args.len()));
(language.into(), arguments.into())
}
None => (Cow::Borrowed(""), Cow::Borrowed("")),
_ => unreachable!("`parse_block_element` returns `Some(Cow::Borrowed)` or `None`"),
};
arena.append(
SourceBlock {
arguments,
language,
contents: content.into(),
post_blank,
},
parent,
);
}
_ => {
let (content, pre_blank) = blank_lines(content);
let node = arena.append(
SpecialBlock {
parameters,
name,
pre_blank,
post_blank,
},
parent,
);
containers.push(Container::Block { content, node });
}
}
}
struct InlinePositions<'a> {
bytes: &'a [u8],
pos: usize,
@ -565,7 +441,9 @@ pub fn parse_inline<'a, T: ElementArena<'a>>(
return None;
}
match contents.as_bytes()[0] {
let byte = contents.as_bytes()[0];
match byte {
b'@' => {
let (tail, snippet) = Snippet::parse(contents)?;
arena.append(snippet, parent);
@ -608,40 +486,17 @@ pub fn parse_inline<'a, T: ElementArena<'a>>(
Some(tail)
}
}
b'*' => {
let (tail, content) = parse_emphasis(contents, b'*')?;
let node = arena.append(Element::Bold, parent);
containers.push(Container::Inline { content, node });
Some(tail)
}
b'+' => {
let (tail, content) = parse_emphasis(contents, b'+')?;
let node = arena.append(Element::Strike, parent);
containers.push(Container::Inline { content, node });
Some(tail)
}
b'/' => {
let (tail, content) = parse_emphasis(contents, b'/')?;
let node = arena.append(Element::Italic, parent);
containers.push(Container::Inline { content, node });
Some(tail)
}
b'_' => {
let (tail, content) = parse_emphasis(contents, b'_')?;
let node = arena.append(Element::Underline, parent);
containers.push(Container::Inline { content, node });
Some(tail)
}
b'=' => {
let (tail, value) = parse_emphasis(contents, b'=')?;
let value = value.into();
arena.append(Element::Verbatim { value }, parent);
Some(tail)
}
b'~' => {
let (tail, value) = parse_emphasis(contents, b'~')?;
let value = value.into();
arena.append(Element::Code { value }, parent);
b'*' | b'+' | b'/' | b'_' | b'=' | b'~' => {
let (tail, emphasis) = Emphasis::parse(contents, byte)?;
let (element, content) = emphasis.into_element();
let is_inline_container = match element {
Element::Bold | Element::Strike | Element::Italic | Element::Underline => true,
_ => false,
};
let node = arena.append(element, parent);
if is_inline_container {
containers.push(Container::Inline { content, node });
}
Some(tail)
}
b's' => {
@ -684,14 +539,14 @@ pub fn parse_list<'a, T: ElementArena<'a>>(
}
}
let (tail, blank) = blank_lines(tail);
let (tail, post_blank) = blank_lines_count(tail);
arena.set(
parent,
List {
indent: first_item_indent,
ordered: first_item_ordered,
post_blank: blank,
post_blank,
},
);
@ -704,8 +559,10 @@ pub fn parse_org_table<'a, T: ElementArena<'a>>(
containers: &mut Vec<Container<'a>>,
parent: NodeId,
) -> &'a str {
let (tail, contents) = take_lines_while(|line| line.trim_start().starts_with('|'))(contents);
let (tail, blank) = blank_lines(tail);
let (tail, contents) =
lines_while::<_, ()>(|line| line.trim_start().starts_with('|'))(contents)
.unwrap_or((contents, ""));
let (tail, post_blank) = blank_lines_count(tail);
let mut iter = contents.trim_end().lines().peekable();
@ -735,7 +592,7 @@ pub fn parse_org_table<'a, T: ElementArena<'a>>(
let parent = arena.append(
Table::Org {
tblfm: None,
post_blank: blank,
post_blank,
has_header,
},
parent,
@ -775,56 +632,18 @@ pub fn parse_org_table<'a, T: ElementArena<'a>>(
tail
}
pub fn line<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> {
if let Some(i) = memchr(b'\n', input.as_bytes()) {
if i > 0 && input.as_bytes()[i - 1] == b'\r' {
Ok((&input[i + 1..], &input[0..i - 1]))
} else {
Ok((&input[i + 1..], &input[0..i]))
}
} else {
Ok(("", input))
}
}
pub fn eol<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> {
verify(line, |s: &str| {
s.as_bytes().iter().all(|c| c.is_ascii_whitespace())
})(input)
}
pub fn take_lines_while(predicate: impl Fn(&str) -> bool) -> impl Fn(&str) -> (&str, &str) {
move |input| {
let mut last_end = 0;
for i in memchr_iter(b'\n', input.as_bytes()) {
if i > 0 && input.as_bytes()[i - 1] == b'\r' {
if !predicate(&input[last_end..i - 1]) {
return (&input[last_end..], &input[0..last_end]);
}
} else if !predicate(&input[last_end..i]) {
return (&input[last_end..], &input[0..last_end]);
}
last_end = i + 1;
}
if !predicate(&input[last_end..]) {
(&input[last_end..], &input[0..last_end])
} else {
("", input)
}
}
}
pub fn skip_empty_lines(input: &str) -> &str {
take_lines_while(|line| line.as_bytes().iter().all(|c| c.is_ascii_whitespace()))(input).0
pub fn blank_lines_count(input: &str) -> (&str, usize) {
crate::parse::combinators::blank_lines_count::<()>(input).unwrap_or((input, 0))
}
pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> {
let (input_, level) = parse_headline_level(input)?;
let (input_, content) = take_lines_while(move |line| {
let (input_, content) = lines_while::<_, ()>(move |line| {
parse_headline_level(line)
.map(|(_, l)| l > level)
.unwrap_or(true)
})(input_);
})(input_)
.unwrap_or((input_, ""));
Some((input_, (&input[0..level + content.len()], level)))
}
@ -837,41 +656,3 @@ pub fn parse_headline_level(input: &str) -> Option<(&str, usize)> {
None
}
}
pub fn take_one_word<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> {
take_while1(|c: char| !c.is_ascii_whitespace())(input)
}
#[test]
pub fn test_skip_empty_lines() {
assert_eq!(skip_empty_lines("foo"), "foo");
assert_eq!(skip_empty_lines(" foo"), " foo");
assert_eq!(skip_empty_lines(" \nfoo\n"), "foo\n");
assert_eq!(skip_empty_lines(" \n\n\nfoo\n"), "foo\n");
assert_eq!(skip_empty_lines(" \n \n\nfoo\n"), "foo\n");
assert_eq!(skip_empty_lines(" \n \n\n foo\n"), " foo\n");
}
pub fn blank_lines(input: &str) -> (&str, usize) {
let bytes = input.as_bytes();
let mut blank = 0;
let mut last_end = 0;
for i in memchr_iter(b'\n', bytes) {
if bytes[last_end..i].iter().all(u8::is_ascii_whitespace) {
blank += 1;
} else {
break;
}
last_end = 1 + i;
}
(&input[last_end..], blank)
}
#[test]
pub fn test_blank_lines() {
assert_eq!(blank_lines("foo"), ("foo", 0));
assert_eq!(blank_lines(" foo"), (" foo", 0));
assert_eq!(blank_lines(" \t\nfoo\n"), ("foo\n", 1));
assert_eq!(blank_lines("\n \r\n\nfoo\n"), ("foo\n", 3));
assert_eq!(blank_lines("\r\n \n \r\n foo\n"), (" foo\n", 3));
}