refactor(elements): minor refactoring

This commit is contained in:
PoiScript 2020-04-14 17:59:45 +08:00
parent 020548fad9
commit c2849d05fb
15 changed files with 619 additions and 492 deletions

View file

@ -8,7 +8,141 @@ use nom::{
IResult, IResult,
}; };
use crate::parsers::{blank_lines, line, take_lines_while}; use crate::elements::Element;
use crate::parse::combinators::{blank_lines_count, line, lines_till};
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct RawBlock<'a> {
pub name: &'a str,
pub arguments: &'a str,
pub pre_blank: usize,
pub contents: &'a str,
pub contents_without_blank_lines: &'a str,
pub post_blank: usize,
}
impl<'a> RawBlock<'a> {
pub fn parse(input: &'a str) -> Option<(&str, RawBlock)> {
Self::parse_internal::<()>(input).ok()
}
fn parse_internal<E>(input: &'a str) -> IResult<&str, RawBlock, E>
where
E: ParseError<&'a str>,
{
let (input, _) = space0(input)?;
let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
let (input, arguments) = line(input)?;
let end_line = format!("#+END_{}", name);
let (input, contents) =
lines_till(|line| line.trim().eq_ignore_ascii_case(&end_line))(input)?;
dbg!(contents);
let (contents_without_blank_lines, pre_blank) = blank_lines_count(contents)?;
dbg!(contents_without_blank_lines);
dbg!(pre_blank);
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
RawBlock {
name,
contents,
arguments: arguments.trim(),
pre_blank,
contents_without_blank_lines,
post_blank,
},
))
}
pub fn into_element(self) -> (Element<'a>, &'a str) {
let RawBlock {
name,
contents,
arguments,
pre_blank,
contents_without_blank_lines,
post_blank,
} = self;
let arguments: Option<Cow<'a, str>> = if arguments.is_empty() {
None
} else {
Some(arguments.into())
};
let element = match &*name.to_uppercase() {
"CENTER" => CenterBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"QUOTE" => QuoteBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"VERSE" => VerseBlock {
parameters: arguments,
pre_blank,
post_blank,
}
.into(),
"COMMENT" => CommentBlock {
data: arguments,
contents: contents.into(),
post_blank,
}
.into(),
"EXAMPLE" => ExampleBlock {
data: arguments,
contents: contents.into(),
post_blank,
}
.into(),
"EXPORT" => ExportBlock {
data: arguments.unwrap_or_default(),
contents: contents.into(),
post_blank,
}
.into(),
"SRC" => {
let (language, arguments) = match &arguments {
Some(Cow::Borrowed(args)) => {
let (language, arguments) =
args.split_at(args.find(' ').unwrap_or_else(|| args.len()));
(language.into(), arguments.into())
}
None => (Cow::Borrowed(""), Cow::Borrowed("")),
_ => unreachable!(
"`parse_block_element` returns `Some(Cow::Borrowed)` or `None`"
),
};
SourceBlock {
arguments,
language,
contents: contents.into(),
post_blank,
}
.into()
}
_ => SpecialBlock {
parameters: arguments,
name: name.into(),
pre_blank,
post_blank,
}
.into(),
};
(element, contents_without_blank_lines)
}
}
/// Special Block Element /// Special Block Element
#[derive(Debug)] #[derive(Debug)]
@ -220,59 +354,48 @@ impl SourceBlock<'_> {
// TODO: fn retain_labels() -> bool { } // TODO: fn retain_labels() -> bool { }
} }
#[inline]
pub fn parse_block_element(input: &str) -> Option<(&str, (&str, Option<&str>, &str, usize))> {
parse_block_element_internal::<()>(input).ok()
}
#[inline]
fn parse_block_element_internal<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (&str, Option<&str>, &str, usize), E> {
let (input, _) = space0(input)?;
let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
let (input, args) = line(input)?;
let end_line = format!("#+END_{}", name);
let (input, contents) =
take_lines_while(|line| !line.trim().eq_ignore_ascii_case(&end_line))(input);
let (input, _) = line(input)?;
let (input, blank) = blank_lines(input);
Ok((
input,
(
name,
if args.trim().is_empty() {
None
} else {
Some(args.trim())
},
contents,
blank,
),
))
}
#[test] #[test]
fn parse() { fn parse() {
use nom::error::VerboseError; use nom::error::VerboseError;
assert_eq!( assert_eq!(
parse_block_element_internal::<VerboseError<&str>>( RawBlock::parse_internal::<VerboseError<&str>>(
r#"#+BEGIN_SRC r#"#+BEGIN_SRC
#+END_SRC"# #+END_SRC"#
), ),
Ok(("", ("SRC".into(), None, "", 0))) Ok((
"",
RawBlock {
contents: "",
contents_without_blank_lines: "",
pre_blank: 0,
post_blank: 0,
name: "SRC".into(),
arguments: ""
}
))
); );
assert_eq!( assert_eq!(
parse_block_element_internal::<VerboseError<&str>>( RawBlock::parse_internal::<VerboseError<&str>>(
r#"#+begin_src r#"#+begin_src
#+end_src"# #+end_src"#
), ),
Ok(("", ("src".into(), None, "", 0))) Ok((
"",
RawBlock {
contents: "",
contents_without_blank_lines: "",
pre_blank: 0,
post_blank: 0,
name: "src".into(),
arguments: ""
}
))
); );
assert_eq!( assert_eq!(
parse_block_element_internal::<VerboseError<&str>>( RawBlock::parse_internal::<VerboseError<&str>>(
r#"#+BEGIN_SRC javascript r#"#+BEGIN_SRC javascript
console.log('Hello World!'); console.log('Hello World!');
#+END_SRC #+END_SRC
@ -281,12 +404,14 @@ console.log('Hello World!');
), ),
Ok(( Ok((
"", "",
( RawBlock {
"SRC".into(), contents: "console.log('Hello World!');\n",
Some("javascript".into()), contents_without_blank_lines: "console.log('Hello World!');\n",
"console.log('Hello World!');\n", pre_blank: 0,
1 post_blank: 1,
) name: "SRC".into(),
arguments: "javascript"
}
)) ))
); );
// TODO: more testing // TODO: more testing

View file

@ -10,8 +10,7 @@ use nom::{
}; };
use crate::elements::timestamp::{parse_inactive, Datetime, Timestamp}; use crate::elements::timestamp::{parse_inactive, Datetime, Timestamp};
use crate::parse::combinators::{blank_lines_count, eol};
use crate::parsers::{blank_lines, eol};
/// Clock Element /// Clock Element
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
@ -156,7 +155,7 @@ fn parse_clock<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Cloc
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
let (input, duration) = recognize(separated_pair(digit1, char(':'), digit1))(input)?; let (input, duration) = recognize(separated_pair(digit1, char(':'), digit1))(input)?;
let (input, _) = eol(input)?; let (input, _) = eol(input)?;
let (input, blank) = blank_lines(input); let (input, blank) = blank_lines_count(input)?;
Ok(( Ok((
input, input,
Clock::Closed { Clock::Closed {
@ -175,7 +174,7 @@ fn parse_clock<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Cloc
delay, delay,
} => { } => {
let (input, _) = eol(input)?; let (input, _) = eol(input)?;
let (input, blank) = blank_lines(input); let (input, blank) = blank_lines_count(input)?;
Ok(( Ok((
input, input,
Clock::Running { Clock::Running {

View file

@ -1,6 +1,11 @@
use std::borrow::Cow; use std::borrow::Cow;
use crate::parsers::{blank_lines, take_lines_while}; use nom::{
error::{ErrorKind, ParseError},
Err, IResult,
};
use crate::parse::combinators::{blank_lines_count, lines_while};
#[derive(Debug, Default)] #[derive(Debug, Default)]
#[cfg_attr(feature = "ser", derive(serde::Serialize))] #[cfg_attr(feature = "ser", derive(serde::Serialize))]
@ -13,22 +18,31 @@ pub struct Comment<'a> {
} }
impl Comment<'_> { impl Comment<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, Comment<'_>)> { pub(crate) fn parse(input: &str) -> Option<(&str, Comment)> {
let (input, value) = take_lines_while(|line| { Self::parse_internal::<()>(input).ok()
let line = line.trim_start();
line == "#" || line.starts_with("# ")
})(input);
let (input, blank) = blank_lines(input);
if value.is_empty() {
return None;
} }
Some(( fn parse_internal<'a, E>(input: &'a str) -> IResult<&str, Comment, E>
where
E: ParseError<&'a str>,
{
let (input, value) = lines_while(|line| {
let line = line.trim_start();
line == "#" || line.starts_with("# ")
})(input)?;
if value.is_empty() {
// TODO: better error kind
return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0)));
}
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input, input,
Comment { Comment {
value: value.into(), value: value.into(),
post_blank: blank, post_blank,
}, },
)) ))
} }

View file

@ -8,7 +8,7 @@ use nom::{
IResult, IResult,
}; };
use crate::parsers::{blank_lines, eol, line, take_lines_while}; use crate::parse::combinators::{blank_lines_count, eol, lines_till};
/// Drawer Element /// Drawer Element
#[derive(Debug, Default)] #[derive(Debug, Default)]
@ -40,23 +40,25 @@ impl Drawer<'_> {
} }
#[inline] #[inline]
pub fn parse_drawer<'a, E: ParseError<&'a str>>( pub fn parse_drawer<'a, E>(input: &'a str) -> IResult<&str, (Drawer, &str), E>
input: &'a str, where
) -> IResult<&str, (Drawer, &str), E> { E: ParseError<&'a str>,
{
let (input, (mut drawer, content)) = parse_drawer_without_blank(input)?; let (input, (mut drawer, content)) = parse_drawer_without_blank(input)?;
let (content, blank) = blank_lines(content); let (content, blank) = blank_lines_count(content)?;
drawer.pre_blank = blank; drawer.pre_blank = blank;
let (input, blank) = blank_lines(input); let (input, blank) = blank_lines_count(input)?;
drawer.post_blank = blank; drawer.post_blank = blank;
Ok((input, (drawer, content))) Ok((input, (drawer, content)))
} }
pub fn parse_drawer_without_blank<'a, E: ParseError<&'a str>>( pub fn parse_drawer_without_blank<'a, E>(input: &'a str) -> IResult<&str, (Drawer, &str), E>
input: &'a str, where
) -> IResult<&str, (Drawer, &str), E> { E: ParseError<&'a str>,
{
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
let (input, name) = delimited( let (input, name) = delimited(
tag(":"), tag(":"),
@ -64,9 +66,7 @@ pub fn parse_drawer_without_blank<'a, E: ParseError<&'a str>>(
tag(":"), tag(":"),
)(input)?; )(input)?;
let (input, _) = eol(input)?; let (input, _) = eol(input)?;
let (input, contents) = let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input)?;
take_lines_while(|line| !line.trim().eq_ignore_ascii_case(":END:"))(input);
let (input, _) = line(input)?;
Ok(( Ok((
input, input,
@ -124,4 +124,7 @@ fn parse() {
) )
)) ))
); );
// https://github.com/PoiScript/orgize/issues/9
assert!(parse_drawer::<()>(":SPAGHETTI:\n").is_err());
} }

View file

@ -7,7 +7,7 @@ use nom::{
IResult, IResult,
}; };
use crate::parsers::{blank_lines, line, take_lines_while}; use crate::parse::combinators::{blank_lines_count, line, lines_till};
/// Dynamic Block Element /// Dynamic Block Element
#[derive(Debug, Default)] #[derive(Debug, Default)]
@ -43,19 +43,18 @@ impl DynBlock<'_> {
} }
#[inline] #[inline]
fn parse_dyn_block<'a, E: ParseError<&'a str>>( fn parse_dyn_block<'a, E>(input: &'a str) -> IResult<&str, (DynBlock, &str), E>
input: &'a str, where
) -> IResult<&str, (DynBlock, &str), E> { E: ParseError<&'a str>,
{
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
let (input, _) = tag_no_case("#+BEGIN:")(input)?; let (input, _) = tag_no_case("#+BEGIN:")(input)?;
let (input, _) = space1(input)?; let (input, _) = space1(input)?;
let (input, name) = alpha1(input)?; let (input, name) = alpha1(input)?;
let (input, args) = line(input)?; let (input, args) = line(input)?;
let (input, contents) = let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case("#+END:"))(input)?;
take_lines_while(|line| !line.trim().eq_ignore_ascii_case("#+END:"))(input); let (contents, pre_blank) = blank_lines_count(contents)?;
let (contents, pre_blank) = blank_lines(contents); let (input, post_blank) = blank_lines_count(input)?;
let (input, _) = line(input)?;
let (input, post_blank) = blank_lines(input);
Ok(( Ok((
input, input,

View file

@ -1,25 +1,55 @@
use bytecount::count; use bytecount::count;
use memchr::memchr_iter; use memchr::memchr_iter;
#[inline] use crate::elements::Element;
pub(crate) fn parse_emphasis(text: &str, marker: u8) -> Option<(&str, &str)> {
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct Emphasis<'a> {
marker: u8,
contents: &'a str,
}
impl<'a> Emphasis<'a> {
pub fn parse(text: &str, marker: u8) -> Option<(&str, Emphasis)> {
debug_assert!(text.len() >= 3); debug_assert!(text.len() >= 3);
let bytes = text.as_bytes(); let bytes = text.as_bytes();
if bytes[1].is_ascii_whitespace() { if bytes[1].is_ascii_whitespace() {
return None; return None;
} }
for i in memchr_iter(marker, bytes).skip(1) { for i in memchr_iter(marker, bytes).skip(1) {
if count(&bytes[1..i], b'\n') >= 2 { if count(&bytes[1..i], b'\n') >= 2 {
break; break;
} else if validate_marker(i, text) { } else if validate_marker(i, text) {
return Some((&text[i + 1..], &text[1..i])); return Some((
&text[i + 1..],
Emphasis {
marker,
contents: &text[1..i],
},
));
} }
} }
None
}
None pub fn into_element(self) -> (Element<'a>, &'a str) {
let Emphasis { marker, contents } = self;
let element = match marker {
b'*' => Element::Bold,
b'+' => Element::Strike,
b'/' => Element::Italic,
b'_' => Element::Underline,
b'=' => Element::Verbatim {
value: contents.into(),
},
b'~' => Element::Code {
value: contents.into(),
},
_ => unreachable!(),
};
(element, contents)
}
} }
fn validate_marker(pos: usize, text: &str) -> bool { fn validate_marker(pos: usize, text: &str) -> bool {
@ -37,12 +67,39 @@ fn validate_marker(pos: usize, text: &str) -> bool {
#[test] #[test]
fn parse() { fn parse() {
assert_eq!(parse_emphasis("*bold*", b'*'), Some(("", "bold"))); assert_eq!(
assert_eq!(parse_emphasis("*bo*ld*", b'*'), Some(("", "bo*ld"))); Emphasis::parse("*bold*", b'*'),
assert_eq!(parse_emphasis("*bo\nld*", b'*'), Some(("", "bo\nld"))); Some((
assert_eq!(parse_emphasis("*bold*a", b'*'), None); "",
assert_eq!(parse_emphasis("*bold*", b'/'), None); Emphasis {
assert_eq!(parse_emphasis("*bold *", b'*'), None); contents: "bold",
assert_eq!(parse_emphasis("* bold*", b'*'), None); marker: b'*'
assert_eq!(parse_emphasis("*b\nol\nd*", b'*'), None); }
))
);
assert_eq!(
Emphasis::parse("*bo*ld*", b'*'),
Some((
"",
Emphasis {
contents: "bo*ld",
marker: b'*'
}
))
);
assert_eq!(
Emphasis::parse("*bo\nld*", b'*'),
Some((
"",
Emphasis {
contents: "bo\nld",
marker: b'*'
}
))
);
assert_eq!(Emphasis::parse("*bold*a", b'*'), None);
assert_eq!(Emphasis::parse("*bold*", b'/'), None);
assert_eq!(Emphasis::parse("*bold *", b'*'), None);
assert_eq!(Emphasis::parse("* bold*", b'*'), None);
assert_eq!(Emphasis::parse("*b\nol\nd*", b'*'), None);
} }

View file

@ -1,6 +1,11 @@
use std::borrow::Cow; use std::borrow::Cow;
use crate::parsers::{blank_lines, take_lines_while}; use nom::{
error::{ErrorKind, ParseError},
Err, IResult,
};
use crate::parse::combinators::{blank_lines_count, lines_while};
#[derive(Debug, Default)] #[derive(Debug, Default)]
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
@ -14,22 +19,31 @@ pub struct FixedWidth<'a> {
} }
impl FixedWidth<'_> { impl FixedWidth<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth<'_>)> { pub(crate) fn parse(input: &str) -> Option<(&str, FixedWidth)> {
let (input, value) = take_lines_while(|line| { Self::parse_internal::<()>(input).ok()
let line = line.trim_start();
line == ":" || line.starts_with(": ")
})(input);
let (input, blank) = blank_lines(input);
if value.is_empty() {
return None;
} }
Some(( fn parse_internal<'a, E>(input: &'a str) -> IResult<&str, FixedWidth, E>
where
E: ParseError<&'a str>,
{
let (input, value) = lines_while(|line| {
let line = line.trim_start();
line == ":" || line.starts_with(": ")
})(input)?;
if value.is_empty() {
// TODO: better error kind
return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0)));
}
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input, input,
FixedWidth { FixedWidth {
value: value.into(), value: value.into(),
post_blank: blank, post_blank,
}, },
)) ))
} }

View file

@ -7,7 +7,7 @@ use nom::{
IResult, IResult,
}; };
use crate::parsers::{blank_lines, line}; use crate::parse::combinators::{blank_lines_count, line};
/// Footnote Definition Element /// Footnote Definition Element
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
@ -23,7 +23,33 @@ pub struct FnDef<'a> {
impl FnDef<'_> { impl FnDef<'_> {
pub(crate) fn parse(input: &str) -> Option<(&str, (FnDef, &str))> { pub(crate) fn parse(input: &str) -> Option<(&str, (FnDef, &str))> {
parse_fn_def::<()>(input).ok() Self::parse_internal::<()>(input).ok()
}
fn parse_internal<'a, E>(input: &'a str) -> IResult<&str, (FnDef, &str), E>
where
E: ParseError<&'a str>,
{
let (input, label) = delimited(
tag("[fn:"),
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
tag("]"),
)(input)?;
let (input, content) = line(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
(
FnDef {
label: label.into(),
post_blank,
},
content,
),
))
} }
pub fn into_owned(self) -> FnDef<'static> { pub fn into_owned(self) -> FnDef<'static> {
@ -34,34 +60,12 @@ impl FnDef<'_> {
} }
} }
#[inline]
fn parse_fn_def<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, (FnDef, &str), E> {
let (input, label) = delimited(
tag("[fn:"),
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
tag("]"),
)(input)?;
let (input, content) = line(input)?;
let (input, blank) = blank_lines(input);
Ok((
input,
(
FnDef {
label: label.into(),
post_blank: blank,
},
content,
),
))
}
#[test] #[test]
fn parse() { fn parse() {
use nom::error::VerboseError; use nom::error::VerboseError;
assert_eq!( assert_eq!(
parse_fn_def::<VerboseError<&str>>("[fn:1] https://orgmode.org"), FnDef::parse_internal::<VerboseError<&str>>("[fn:1] https://orgmode.org"),
Ok(( Ok((
"", "",
( (
@ -74,7 +78,7 @@ fn parse() {
)) ))
); );
assert_eq!( assert_eq!(
parse_fn_def::<VerboseError<&str>>("[fn:word_1] https://orgmode.org"), FnDef::parse_internal::<VerboseError<&str>>("[fn:word_1] https://orgmode.org"),
Ok(( Ok((
"", "",
( (
@ -87,7 +91,7 @@ fn parse() {
)) ))
); );
assert_eq!( assert_eq!(
parse_fn_def::<VerboseError<&str>>("[fn:WORD-1] https://orgmode.org"), FnDef::parse_internal::<VerboseError<&str>>("[fn:WORD-1] https://orgmode.org"),
Ok(( Ok((
"", "",
( (
@ -100,7 +104,7 @@ fn parse() {
)) ))
); );
assert_eq!( assert_eq!(
parse_fn_def::<VerboseError<&str>>("[fn:WORD]"), FnDef::parse_internal::<VerboseError<&str>>("[fn:WORD]"),
Ok(( Ok((
"", "",
( (
@ -113,7 +117,7 @@ fn parse() {
)) ))
); );
assert!(parse_fn_def::<VerboseError<&str>>("[fn:] https://orgmode.org").is_err()); assert!(FnDef::parse_internal::<VerboseError<&str>>("[fn:] https://orgmode.org").is_err());
assert!(parse_fn_def::<VerboseError<&str>>("[fn:wor d] https://orgmode.org").is_err()); assert!(FnDef::parse_internal::<VerboseError<&str>>("[fn:wor d] https://orgmode.org").is_err());
assert!(parse_fn_def::<VerboseError<&str>>("[fn:WORD https://orgmode.org").is_err()); assert!(FnDef::parse_internal::<VerboseError<&str>>("[fn:WORD https://orgmode.org").is_err());
} }

View file

@ -9,7 +9,76 @@ use nom::{
IResult, IResult,
}; };
use crate::parsers::{blank_lines, line}; use crate::elements::Element;
use crate::parse::combinators::{blank_lines_count, line};
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub(crate) struct RawKeyword<'a> {
pub key: &'a str,
pub value: &'a str,
pub optional: Option<&'a str>,
pub post_blank: usize,
}
impl<'a> RawKeyword<'a> {
pub fn parse(input: &'a str) -> Option<(&str, RawKeyword)> {
Self::parse_internal::<()>(input).ok()
}
fn parse_internal<E>(input: &'a str) -> IResult<&str, RawKeyword, E>
where
E: ParseError<&'a str>,
{
let (input, _) = space0(input)?;
let (input, _) = tag("#+")(input)?;
let (input, key) =
take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
let (input, optional) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
let (input, _) = tag(":")(input)?;
let (input, value) = line(input)?;
let (input, post_blank) = blank_lines_count(input)?;
Ok((
input,
RawKeyword {
key,
optional,
value: value.trim(),
post_blank,
},
))
}
pub fn into_element(self) -> Element<'a> {
let RawKeyword {
key,
value,
optional,
post_blank,
} = self;
if (&*key).eq_ignore_ascii_case("CALL") {
BabelCall {
value: value.into(),
post_blank,
}
.into()
} else {
Keyword {
key: key.into(),
optional: optional.map(Into::into),
value: value.into(),
post_blank,
}
.into()
}
}
}
/// Keyword Element /// Keyword Element
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
@ -39,9 +108,9 @@ impl Keyword<'_> {
} }
/// Babel Call Element /// Babel Call Element
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "ser", derive(serde::Serialize))] #[cfg_attr(feature = "ser", derive(serde::Serialize))]
#[derive(Debug)]
pub struct BabelCall<'a> { pub struct BabelCall<'a> {
/// Babel call value /// Babel call value
pub value: Cow<'a, str>, pub value: Cow<'a, str>,
@ -59,70 +128,112 @@ impl BabelCall<'_> {
} }
} }
#[inline]
pub fn parse_keyword(input: &str) -> Option<(&str, (&str, Option<&str>, &str, usize))> {
parse_keyword_internal::<()>(input).ok()
}
#[inline]
fn parse_keyword_internal<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&str, (&str, Option<&str>, &str, usize), E> {
let (input, _) = space0(input)?;
let (input, _) = tag("#+")(input)?;
let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
let (input, optional) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
let (input, _) = tag(":")(input)?;
let (input, value) = line(input)?;
let (input, blank) = blank_lines(input);
Ok((input, (key, optional, value.trim(), blank)))
}
#[test] #[test]
fn parse() { fn parse() {
use nom::error::VerboseError; use nom::error::VerboseError;
assert_eq!( assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+KEY:"), RawKeyword::parse_internal::<VerboseError<&str>>("#+KEY:"),
Ok(("", ("KEY", None, "", 0))) Ok((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "",
post_blank: 0
}
))
); );
assert_eq!( assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+KEY: VALUE"), RawKeyword::parse_internal::<VerboseError<&str>>("#+KEY: VALUE"),
Ok(("", ("KEY", None, "VALUE", 0))) Ok((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "VALUE",
post_blank: 0
}
))
); );
assert_eq!( assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+K_E_Y: VALUE"), RawKeyword::parse_internal::<VerboseError<&str>>("#+K_E_Y: VALUE"),
Ok(("", ("K_E_Y", None, "VALUE", 0))) Ok((
"",
RawKeyword {
key: "K_E_Y",
optional: None,
value: "VALUE",
post_blank: 0
}
))
); );
assert_eq!( assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+KEY:VALUE\n"), RawKeyword::parse_internal::<VerboseError<&str>>("#+KEY:VALUE\n"),
Ok(("", ("KEY", None, "VALUE", 0))) Ok((
"",
RawKeyword {
key: "KEY",
optional: None,
value: "VALUE",
post_blank: 0
}
))
); );
assert!(parse_keyword_internal::<VerboseError<&str>>("#+KE Y: VALUE").is_err()); assert!(RawKeyword::parse_internal::<VerboseError<&str>>("#+KE Y: VALUE").is_err());
assert!(parse_keyword_internal::<VerboseError<&str>>("#+ KEY: VALUE").is_err()); assert!(RawKeyword::parse_internal::<VerboseError<&str>>("#+ KEY: VALUE").is_err());
assert_eq!( assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+RESULTS:"), RawKeyword::parse_internal::<VerboseError<&str>>("#+RESULTS:"),
Ok(("", ("RESULTS", None, "", 0))) Ok((
"",
RawKeyword {
key: "RESULTS",
optional: None,
value: "",
post_blank: 0
}
))
); );
assert_eq!( assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+ATTR_LATEX: :width 5cm\n"), RawKeyword::parse_internal::<VerboseError<&str>>("#+ATTR_LATEX: :width 5cm\n"),
Ok(("", ("ATTR_LATEX", None, ":width 5cm", 0))) Ok((
"",
RawKeyword {
key: "ATTR_LATEX",
optional: None,
value: ":width 5cm",
post_blank: 0
}
))
); );
assert_eq!( assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+CALL: double(n=4)"), RawKeyword::parse_internal::<VerboseError<&str>>("#+CALL: double(n=4)"),
Ok(("", ("CALL", None, "double(n=4)", 0))) Ok((
"",
RawKeyword {
key: "CALL",
optional: None,
value: "double(n=4)",
post_blank: 0
}
))
); );
assert_eq!( assert_eq!(
parse_keyword_internal::<VerboseError<&str>>("#+CAPTION[Short caption]: Longer caption."), RawKeyword::parse_internal::<VerboseError<&str>>(
Ok(("", ("CAPTION", Some("Short caption"), "Longer caption.", 0))) "#+CAPTION[Short caption]: Longer caption."
),
Ok((
"",
RawKeyword {
key: "CAPTION",
optional: Some("Short caption"),
value: "Longer caption.",
post_blank: 0
}
))
); );
} }

View file

@ -2,7 +2,7 @@ use nom::{
bytes::complete::take_while_m_n, character::complete::space0, error::ParseError, IResult, bytes::complete::take_while_m_n, character::complete::space0, error::ParseError, IResult,
}; };
use crate::parsers::{blank_lines, eol}; use crate::parse::combinators::{blank_lines_count, eol};
#[derive(Debug, Default)] #[derive(Debug, Default)]
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
@ -19,12 +19,15 @@ impl Rule {
} }
} }
fn parse_rule<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, Rule, E> { fn parse_rule<'a, E>(input: &'a str) -> IResult<&str, Rule, E>
where
E: ParseError<&'a str>,
{
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?; let (input, _) = take_while_m_n(5, usize::max_value(), |c| c == '-')(input)?;
let (input, _) = eol(input)?; let (input, _) = eol(input)?;
let (input, blank) = blank_lines(input); let (input, post_blank) = blank_lines_count(input)?;
Ok((input, Rule { post_blank: blank })) Ok((input, Rule { post_blank }))
} }
#[test] #[test]

View file

@ -1,8 +1,11 @@
use std::borrow::Cow; use std::borrow::Cow;
use memchr::memchr; use nom::{
error::{ErrorKind, ParseError},
Err, IResult,
};
use crate::parsers::{blank_lines, take_lines_while}; use crate::parse::combinators::{blank_lines_count, line, lines_while};
/// Table Element /// Table Element
#[derive(Debug)] #[derive(Debug)]
@ -31,33 +34,42 @@ pub enum Table<'a> {
} }
impl Table<'_> { impl Table<'_> {
pub fn parse_table_el(input: &str) -> Option<(&str, Table<'_>)> { pub fn parse_table_el(input: &str) -> Option<(&str, Table)> {
let first_line = memchr(b'\n', input.as_bytes()) Self::parse_table_el_internal::<()>(input).ok()
.map(|i| input[0..i].trim()) }
.unwrap_or_else(|| input.trim());
// first line must be the "+-" string and followed by plus or minus signs fn parse_table_el_internal<'a, E>(input: &'a str) -> IResult<&str, Table, E>
where
E: ParseError<&'a str>,
{
let (_, first_line) = line(input)?;
let first_line = first_line.trim();
// Table.el tables start at lines beginning with "+-" string and followed by plus or minus signs
if !first_line.starts_with("+-") if !first_line.starts_with("+-")
|| first_line || first_line
.as_bytes() .as_bytes()
.iter() .iter()
.any(|&c| c != b'+' && c != b'-') .any(|&c| c != b'+' && c != b'-')
{ {
return None; // TODO: better error kind
return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0)));
} }
let (input, content) = take_lines_while(|line| { // Table.el tables end at the first line not starting with either a vertical line or a plus sign.
let (input, content) = lines_while(|line| {
let line = line.trim_start(); let line = line.trim_start();
line.starts_with('|') || line.starts_with('+') line.starts_with('|') || line.starts_with('+')
})(input); })(input)?;
let (input, blank) = blank_lines(input); let (input, post_blank) = blank_lines_count(input)?;
Some(( Ok((
input, input,
Table::TableEl { Table::TableEl {
value: content.into(), value: content.into(),
post_blank: blank, post_blank,
}, },
)) ))
} }
@ -70,12 +82,12 @@ impl Table<'_> {
has_header, has_header,
} => Table::Org { } => Table::Org {
tblfm: tblfm.map(Into::into).map(Cow::Owned), tblfm: tblfm.map(Into::into).map(Cow::Owned),
post_blank: post_blank, post_blank,
has_header: has_header, has_header,
}, },
Table::TableEl { value, post_blank } => Table::TableEl { Table::TableEl { value, post_blank } => Table::TableEl {
value: value.into_owned().into(), value: value.into_owned().into(),
post_blank: post_blank, post_blank,
}, },
} }
} }

View file

@ -17,7 +17,7 @@ use nom::{
use crate::{ use crate::{
config::ParseConfig, config::ParseConfig,
elements::{drawer::parse_drawer_without_blank, Planning, Timestamp}, elements::{drawer::parse_drawer_without_blank, Planning, Timestamp},
parsers::{blank_lines, line, skip_empty_lines, take_one_word}, parse::combinators::{blank_lines_count, line, one_word},
}; };
/// Title Element /// Title Element
@ -123,17 +123,20 @@ impl Default for Title<'_> {
} }
#[inline] #[inline]
fn parse_title<'a, E: ParseError<&'a str>>( fn parse_title<'a, E>(
input: &'a str, input: &'a str,
config: &ParseConfig, config: &ParseConfig,
) -> IResult<&'a str, (Title<'a>, &'a str), E> { ) -> IResult<&'a str, (Title<'a>, &'a str), E>
where
E: ParseError<&'a str>,
{
let (input, level) = map(take_while(|c: char| c == '*'), |s: &str| s.len())(input)?; let (input, level) = map(take_while(|c: char| c == '*'), |s: &str| s.len())(input)?;
debug_assert!(level > 0); debug_assert!(level > 0);
let (input, keyword) = opt(preceded( let (input, keyword) = opt(preceded(
space1, space1,
verify(take_one_word, |s: &str| { verify(one_word, |s: &str| {
config.todo_keywords.0.iter().any(|x| x == s) config.todo_keywords.0.iter().any(|x| x == s)
|| config.todo_keywords.1.iter().any(|x| x == s) || config.todo_keywords.1.iter().any(|x| x == s)
}), }),
@ -142,7 +145,7 @@ fn parse_title<'a, E: ParseError<&'a str>>(
let (input, priority) = opt(preceded( let (input, priority) = opt(preceded(
space1, space1,
map_parser( map_parser(
take_one_word, one_word,
delimited( delimited(
tag("[#"), tag("[#"),
verify(anychar, |c: &char| c.is_ascii_uppercase()), verify(anychar, |c: &char| c.is_ascii_uppercase()),
@ -168,7 +171,7 @@ fn parse_title<'a, E: ParseError<&'a str>>(
.unwrap_or((input, None)); .unwrap_or((input, None));
let (input, properties) = opt(parse_properties_drawer)(input)?; let (input, properties) = opt(parse_properties_drawer)(input)?;
let (input, blank) = blank_lines(input); let (input, post_blank) = blank_lines_count(input)?;
Ok(( Ok((
input, input,
@ -181,7 +184,7 @@ fn parse_title<'a, E: ParseError<&'a str>>(
tags, tags,
raw: raw.into(), raw: raw.into(),
planning, planning,
post_blank: blank, post_blank,
}, },
raw, raw,
), ),
@ -211,7 +214,8 @@ fn parse_properties_drawer<'a, E: ParseError<&'a str>>(
fn parse_node_property<'a, E: ParseError<&'a str>>( fn parse_node_property<'a, E: ParseError<&'a str>>(
input: &'a str, input: &'a str,
) -> IResult<&str, (&str, &str), E> { ) -> IResult<&str, (&str, &str), E> {
let input = skip_empty_lines(input).trim_start(); let (input, _) = blank_lines_count(input)?;
let input = input.trim_start();
let (input, name) = map(delimited(tag(":"), take_until(":"), tag(":")), |s: &str| { let (input, name) = map(delimited(tag(":"), take_until(":"), tag(":")), |s: &str| {
s.trim_end_matches('+') s.trim_end_matches('+')
})(input)?; })(input)?;

View file

@ -225,6 +225,7 @@ pub mod elements;
pub mod export; pub mod export;
mod headline; mod headline;
mod org; mod org;
mod parse;
mod parsers; mod parsers;
mod validate; mod validate;

View file

@ -6,7 +6,7 @@ use crate::{
config::{ParseConfig, DEFAULT_CONFIG}, config::{ParseConfig, DEFAULT_CONFIG},
elements::{Element, Keyword}, elements::{Element, Keyword},
export::{DefaultHtmlHandler, DefaultOrgHandler, HtmlHandler, OrgHandler}, export::{DefaultHtmlHandler, DefaultOrgHandler, HtmlHandler, OrgHandler},
parsers::{blank_lines, parse_container, Container, OwnedArena}, parsers::{blank_lines_count, parse_container, Container, OwnedArena},
}; };
pub struct Org<'a> { pub struct Org<'a> {
@ -41,7 +41,7 @@ impl<'a> Org<'a> {
/// Parses string `text` into `Org` struct with custom `ParseConfig`. /// Parses string `text` into `Org` struct with custom `ParseConfig`.
pub fn parse_custom(text: &'a str, config: &ParseConfig) -> Org<'a> { pub fn parse_custom(text: &'a str, config: &ParseConfig) -> Org<'a> {
let mut arena = Arena::new(); let mut arena = Arena::new();
let (text, pre_blank) = blank_lines(text); let (text, pre_blank) = blank_lines_count(text);
let root = arena.new_node(Element::Document { pre_blank }); let root = arena.new_node(Element::Document { pre_blank });
let mut org = Org { arena, root }; let mut org = Org { arena, root };
@ -62,7 +62,7 @@ impl<'a> Org<'a> {
/// Likes `parse_custom`, but accepts `String`. /// Likes `parse_custom`, but accepts `String`.
pub fn parse_string_custom(text: String, config: &ParseConfig) -> Org<'static> { pub fn parse_string_custom(text: String, config: &ParseConfig) -> Org<'static> {
let mut arena = Arena::new(); let mut arena = Arena::new();
let (text, pre_blank) = blank_lines(&text); let (text, pre_blank) = blank_lines_count(&text);
let root = arena.new_node(Element::Document { pre_blank }); let root = arena.new_node(Element::Document { pre_blank });
let mut org = Org { arena, root }; let mut org = Org { arena, root };

View file

@ -1,20 +1,19 @@
use std::borrow::Cow;
use std::iter::once; use std::iter::once;
use std::marker::PhantomData; use std::marker::PhantomData;
use indextree::{Arena, NodeId}; use indextree::{Arena, NodeId};
use jetscii::{bytes, BytesConst}; use jetscii::{bytes, BytesConst};
use memchr::{memchr, memchr_iter}; use memchr::{memchr, memchr_iter};
use nom::{bytes::complete::take_while1, combinator::verify, error::ParseError, IResult}; use nom::bytes::complete::take_while1;
use crate::config::ParseConfig; use crate::config::ParseConfig;
use crate::elements::{ use crate::elements::{
block::parse_block_element, emphasis::parse_emphasis, keyword::parse_keyword, block::RawBlock, emphasis::Emphasis, keyword::RawKeyword, radio_target::parse_radio_target,
radio_target::parse_radio_target, BabelCall, CenterBlock, Clock, Comment, CommentBlock, Cookie, Clock, Comment, Cookie, Drawer, DynBlock, Element, FixedWidth, FnDef, FnRef, InlineCall,
Drawer, DynBlock, Element, ExampleBlock, ExportBlock, FixedWidth, FnDef, FnRef, InlineCall, InlineSrc, Link, List, ListItem, Macros, Rule, Snippet, Table, TableCell, TableRow, Target,
InlineSrc, Keyword, Link, List, ListItem, Macros, QuoteBlock, Rule, Snippet, SourceBlock, Timestamp, Title,
SpecialBlock, Table, TableCell, TableRow, Target, Timestamp, Title, VerseBlock,
}; };
use crate::parse::combinators::lines_while;
pub trait ElementArena<'a> { pub trait ElementArena<'a> {
fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId
@ -28,7 +27,9 @@ pub trait ElementArena<'a> {
T: Into<Element<'a>>; T: Into<Element<'a>>;
} }
impl<'a> ElementArena<'a> for Arena<Element<'a>> { pub type BorrowedArena<'a> = Arena<Element<'a>>;
impl<'a> ElementArena<'a> for BorrowedArena<'a> {
fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId fn append<T>(&mut self, element: T, parent: NodeId) -> NodeId
where where
T: Into<Element<'a>>, T: Into<Element<'a>>,
@ -153,7 +154,8 @@ pub fn parse_section_and_headlines<'a, T: ElementArena<'a>>(
parent: NodeId, parent: NodeId,
containers: &mut Vec<Container<'a>>, containers: &mut Vec<Container<'a>>,
) { ) {
let content = skip_empty_lines(content); let content = blank_lines_count(content).0;
if content.is_empty() { if content.is_empty() {
return; return;
} }
@ -194,10 +196,10 @@ pub fn parse_blocks<'a, T: ElementArena<'a>>(
parent: NodeId, parent: NodeId,
containers: &mut Vec<Container<'a>>, containers: &mut Vec<Container<'a>>,
) { ) {
let mut tail = skip_empty_lines(content); let mut tail = blank_lines_count(content).0;
if let Some(new_tail) = parse_block(content, arena, parent, containers) { if let Some(new_tail) = parse_block(content, arena, parent, containers) {
tail = skip_empty_lines(new_tail); tail = blank_lines_count(new_tail).0;
} }
let mut text = tail; let mut text = tail;
@ -208,13 +210,13 @@ pub fn parse_blocks<'a, T: ElementArena<'a>>(
.map(|i| i + 1) .map(|i| i + 1)
.unwrap_or_else(|| tail.len()); .unwrap_or_else(|| tail.len());
if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) { if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) {
let (tail_, blank) = blank_lines(&tail[i..]); let (tail_, blank) = blank_lines_count(&tail[i..]);
debug_assert_ne!(tail, tail_); debug_assert_ne!(tail, tail_);
tail = tail_; tail = tail_;
let node = arena.append( let node = arena.append(
Element::Paragraph { Element::Paragraph {
// including current line (&tail[0..i]) // including the current line (&tail[0..i])
post_blank: blank + 1, post_blank: blank + 1,
}, },
parent, parent,
@ -239,8 +241,8 @@ pub fn parse_blocks<'a, T: ElementArena<'a>>(
pos = 0; pos = 0;
} }
debug_assert_ne!(tail, skip_empty_lines(new_tail)); debug_assert_ne!(tail, blank_lines_count(new_tail).0);
tail = skip_empty_lines(new_tail); tail = blank_lines_count(new_tail).0;
text = tail; text = tail;
} else { } else {
debug_assert_ne!(tail, &tail[i..]); debug_assert_ne!(tail, &tail[i..]);
@ -323,41 +325,27 @@ pub fn parse_block<'a, T: ElementArena<'a>>(
} }
} }
b'#' => { b'#' => {
if let Some((tail, (name, args, content, blank))) = parse_block_element(contents) { if let Some((tail, block)) = RawBlock::parse(contents) {
match_block( let (element, content) = block.into_element();
arena, // avoid use after free
parent, let is_block_container = match element {
containers, Element::CenterBlock(_)
name.into(), | Element::QuoteBlock(_)
args.map(Into::into), | Element::VerseBlock(_)
content, | Element::SpecialBlock(_) => true,
blank, _ => false,
); };
let node = arena.append(element, parent);
if is_block_container {
containers.push(Container::Block { content, node });
}
Some(tail) Some(tail)
} else if let Some((tail, (dyn_block, content))) = DynBlock::parse(contents) { } else if let Some((tail, (dyn_block, content))) = DynBlock::parse(contents) {
let node = arena.append(dyn_block, parent); let node = arena.append(dyn_block, parent);
containers.push(Container::Block { content, node }); containers.push(Container::Block { content, node });
Some(tail) Some(tail)
} else if let Some((tail, (key, optional, value, blank))) = parse_keyword(contents) { } else if let Some((tail, keyword)) = RawKeyword::parse(contents) {
if (&*key).eq_ignore_ascii_case("CALL") { arena.append(keyword.into_element(), parent);
arena.append(
BabelCall {
value: value.into(),
post_blank: blank,
},
parent,
);
} else {
arena.append(
Keyword {
key: key.into(),
optional: optional.map(Into::into),
value: value.into(),
post_blank: blank,
},
parent,
);
}
Some(tail) Some(tail)
} else { } else {
let (tail, comment) = Comment::parse(contents)?; let (tail, comment) = Comment::parse(contents)?;
@ -369,118 +357,6 @@ pub fn parse_block<'a, T: ElementArena<'a>>(
} }
} }
pub fn match_block<'a, T: ElementArena<'a>>(
arena: &mut T,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
name: Cow<'a, str>,
parameters: Option<Cow<'a, str>>,
content: &'a str,
post_blank: usize,
) {
match &*name.to_uppercase() {
"CENTER" => {
let (content, pre_blank) = blank_lines(content);
let node = arena.append(
CenterBlock {
parameters,
pre_blank,
post_blank,
},
parent,
);
containers.push(Container::Block { content, node });
}
"QUOTE" => {
let (content, pre_blank) = blank_lines(content);
let node = arena.append(
QuoteBlock {
parameters,
pre_blank,
post_blank,
},
parent,
);
containers.push(Container::Block { content, node });
}
"VERSE" => {
let (content, pre_blank) = blank_lines(content);
let node = arena.append(
VerseBlock {
parameters,
pre_blank,
post_blank,
},
parent,
);
containers.push(Container::Block { content, node });
}
"COMMENT" => {
arena.append(
CommentBlock {
data: parameters,
contents: content.into(),
post_blank,
},
parent,
);
}
"EXAMPLE" => {
arena.append(
ExampleBlock {
data: parameters,
contents: content.into(),
post_blank,
},
parent,
);
}
"EXPORT" => {
arena.append(
ExportBlock {
data: parameters.unwrap_or_default(),
contents: content.into(),
post_blank,
},
parent,
);
}
"SRC" => {
let (language, arguments) = match &parameters {
Some(Cow::Borrowed(args)) => {
let (language, arguments) =
args.split_at(args.find(' ').unwrap_or_else(|| args.len()));
(language.into(), arguments.into())
}
None => (Cow::Borrowed(""), Cow::Borrowed("")),
_ => unreachable!("`parse_block_element` returns `Some(Cow::Borrowed)` or `None`"),
};
arena.append(
SourceBlock {
arguments,
language,
contents: content.into(),
post_blank,
},
parent,
);
}
_ => {
let (content, pre_blank) = blank_lines(content);
let node = arena.append(
SpecialBlock {
parameters,
name,
pre_blank,
post_blank,
},
parent,
);
containers.push(Container::Block { content, node });
}
}
}
struct InlinePositions<'a> { struct InlinePositions<'a> {
bytes: &'a [u8], bytes: &'a [u8],
pos: usize, pos: usize,
@ -565,7 +441,9 @@ pub fn parse_inline<'a, T: ElementArena<'a>>(
return None; return None;
} }
match contents.as_bytes()[0] { let byte = contents.as_bytes()[0];
match byte {
b'@' => { b'@' => {
let (tail, snippet) = Snippet::parse(contents)?; let (tail, snippet) = Snippet::parse(contents)?;
arena.append(snippet, parent); arena.append(snippet, parent);
@ -608,40 +486,17 @@ pub fn parse_inline<'a, T: ElementArena<'a>>(
Some(tail) Some(tail)
} }
} }
b'*' => { b'*' | b'+' | b'/' | b'_' | b'=' | b'~' => {
let (tail, content) = parse_emphasis(contents, b'*')?; let (tail, emphasis) = Emphasis::parse(contents, byte)?;
let node = arena.append(Element::Bold, parent); let (element, content) = emphasis.into_element();
let is_inline_container = match element {
Element::Bold | Element::Strike | Element::Italic | Element::Underline => true,
_ => false,
};
let node = arena.append(element, parent);
if is_inline_container {
containers.push(Container::Inline { content, node }); containers.push(Container::Inline { content, node });
Some(tail)
} }
b'+' => {
let (tail, content) = parse_emphasis(contents, b'+')?;
let node = arena.append(Element::Strike, parent);
containers.push(Container::Inline { content, node });
Some(tail)
}
b'/' => {
let (tail, content) = parse_emphasis(contents, b'/')?;
let node = arena.append(Element::Italic, parent);
containers.push(Container::Inline { content, node });
Some(tail)
}
b'_' => {
let (tail, content) = parse_emphasis(contents, b'_')?;
let node = arena.append(Element::Underline, parent);
containers.push(Container::Inline { content, node });
Some(tail)
}
b'=' => {
let (tail, value) = parse_emphasis(contents, b'=')?;
let value = value.into();
arena.append(Element::Verbatim { value }, parent);
Some(tail)
}
b'~' => {
let (tail, value) = parse_emphasis(contents, b'~')?;
let value = value.into();
arena.append(Element::Code { value }, parent);
Some(tail) Some(tail)
} }
b's' => { b's' => {
@ -684,14 +539,14 @@ pub fn parse_list<'a, T: ElementArena<'a>>(
} }
} }
let (tail, blank) = blank_lines(tail); let (tail, post_blank) = blank_lines_count(tail);
arena.set( arena.set(
parent, parent,
List { List {
indent: first_item_indent, indent: first_item_indent,
ordered: first_item_ordered, ordered: first_item_ordered,
post_blank: blank, post_blank,
}, },
); );
@ -704,8 +559,10 @@ pub fn parse_org_table<'a, T: ElementArena<'a>>(
containers: &mut Vec<Container<'a>>, containers: &mut Vec<Container<'a>>,
parent: NodeId, parent: NodeId,
) -> &'a str { ) -> &'a str {
let (tail, contents) = take_lines_while(|line| line.trim_start().starts_with('|'))(contents); let (tail, contents) =
let (tail, blank) = blank_lines(tail); lines_while::<_, ()>(|line| line.trim_start().starts_with('|'))(contents)
.unwrap_or((contents, ""));
let (tail, post_blank) = blank_lines_count(tail);
let mut iter = contents.trim_end().lines().peekable(); let mut iter = contents.trim_end().lines().peekable();
@ -735,7 +592,7 @@ pub fn parse_org_table<'a, T: ElementArena<'a>>(
let parent = arena.append( let parent = arena.append(
Table::Org { Table::Org {
tblfm: None, tblfm: None,
post_blank: blank, post_blank,
has_header, has_header,
}, },
parent, parent,
@ -775,56 +632,18 @@ pub fn parse_org_table<'a, T: ElementArena<'a>>(
tail tail
} }
pub fn line<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> { pub fn blank_lines_count(input: &str) -> (&str, usize) {
if let Some(i) = memchr(b'\n', input.as_bytes()) { crate::parse::combinators::blank_lines_count::<()>(input).unwrap_or((input, 0))
if i > 0 && input.as_bytes()[i - 1] == b'\r' {
Ok((&input[i + 1..], &input[0..i - 1]))
} else {
Ok((&input[i + 1..], &input[0..i]))
}
} else {
Ok(("", input))
}
}
pub fn eol<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> {
verify(line, |s: &str| {
s.as_bytes().iter().all(|c| c.is_ascii_whitespace())
})(input)
}
pub fn take_lines_while(predicate: impl Fn(&str) -> bool) -> impl Fn(&str) -> (&str, &str) {
move |input| {
let mut last_end = 0;
for i in memchr_iter(b'\n', input.as_bytes()) {
if i > 0 && input.as_bytes()[i - 1] == b'\r' {
if !predicate(&input[last_end..i - 1]) {
return (&input[last_end..], &input[0..last_end]);
}
} else if !predicate(&input[last_end..i]) {
return (&input[last_end..], &input[0..last_end]);
}
last_end = i + 1;
}
if !predicate(&input[last_end..]) {
(&input[last_end..], &input[0..last_end])
} else {
("", input)
}
}
}
pub fn skip_empty_lines(input: &str) -> &str {
take_lines_while(|line| line.as_bytes().iter().all(|c| c.is_ascii_whitespace()))(input).0
} }
pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> { pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> {
let (input_, level) = parse_headline_level(input)?; let (input_, level) = parse_headline_level(input)?;
let (input_, content) = take_lines_while(move |line| { let (input_, content) = lines_while::<_, ()>(move |line| {
parse_headline_level(line) parse_headline_level(line)
.map(|(_, l)| l > level) .map(|(_, l)| l > level)
.unwrap_or(true) .unwrap_or(true)
})(input_); })(input_)
.unwrap_or((input_, ""));
Some((input_, (&input[0..level + content.len()], level))) Some((input_, (&input[0..level + content.len()], level)))
} }
@ -837,41 +656,3 @@ pub fn parse_headline_level(input: &str) -> Option<(&str, usize)> {
None None
} }
} }
pub fn take_one_word<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> {
take_while1(|c: char| !c.is_ascii_whitespace())(input)
}
#[test]
pub fn test_skip_empty_lines() {
assert_eq!(skip_empty_lines("foo"), "foo");
assert_eq!(skip_empty_lines(" foo"), " foo");
assert_eq!(skip_empty_lines(" \nfoo\n"), "foo\n");
assert_eq!(skip_empty_lines(" \n\n\nfoo\n"), "foo\n");
assert_eq!(skip_empty_lines(" \n \n\nfoo\n"), "foo\n");
assert_eq!(skip_empty_lines(" \n \n\n foo\n"), " foo\n");
}
pub fn blank_lines(input: &str) -> (&str, usize) {
let bytes = input.as_bytes();
let mut blank = 0;
let mut last_end = 0;
for i in memchr_iter(b'\n', bytes) {
if bytes[last_end..i].iter().all(u8::is_ascii_whitespace) {
blank += 1;
} else {
break;
}
last_end = 1 + i;
}
(&input[last_end..], blank)
}
#[test]
pub fn test_blank_lines() {
assert_eq!(blank_lines("foo"), ("foo", 0));
assert_eq!(blank_lines(" foo"), (" foo", 0));
assert_eq!(blank_lines(" \t\nfoo\n"), ("foo\n", 1));
assert_eq!(blank_lines("\n \r\n\nfoo\n"), ("foo\n", 3));
assert_eq!(blank_lines("\r\n \n \r\n foo\n"), (" foo\n", 3));
}