refactor(elements): rewrite most parsers with nom

This commit is contained in:
PoiScript 2019-08-04 17:46:10 +08:00
parent 8d18fb04c1
commit 37c33a82f0
22 changed files with 336 additions and 402 deletions

View file

@ -1,4 +1,6 @@
use memchr::{memchr, memchr_iter}; use nom::{bytes::complete::tag_no_case, character::complete::alpha1, sequence::preceded, IResult};
use crate::parsers::{take_lines_till, take_until_eol};
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)] #[derive(Debug)]
@ -9,40 +11,23 @@ pub struct Block<'a> {
impl Block<'_> { impl Block<'_> {
#[inline] #[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, Block<'_>, &str)> { pub(crate) fn parse(input: &str) -> IResult<&str, (Block<'_>, &str)> {
debug_assert!(text.starts_with("#+")); let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
let (input, args) = take_until_eol(input)?;
let end_line = format!(r"#+END_{}", name);
let (input, contents) =
take_lines_till(|line| line.eq_ignore_ascii_case(&end_line))(input)?;
if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" { Ok((
return None; input,
} (
Block {
let mut lines = memchr_iter(b'\n', text.as_bytes()); name,
args: if args.is_empty() { None } else { Some(args) },
let (name, args, off) = lines },
.next() contents,
.map(|i| { ),
memchr(b' ', &text.as_bytes()[8..i]) ))
.map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1))
.unwrap_or((&text[8..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let mut pos = off;
let end = format!(r"#+END_{}", name.to_uppercase());
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(&end) {
return Some((&text[i + 1..], Block { name, args }, &text[off..pos]));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case(&end) {
Some(("", Block { name, args }, &text[off..pos]))
} else {
None
}
} }
} }
@ -50,24 +35,28 @@ impl Block<'_> {
fn parse() { fn parse() {
assert_eq!( assert_eq!(
Block::parse("#+BEGIN_SRC\n#+END_SRC"), Block::parse("#+BEGIN_SRC\n#+END_SRC"),
Some(( Ok((
"", "",
Block { (
name: "SRC", Block {
args: None, name: "SRC",
}, args: None,
"" },
""
)
)) ))
); );
assert_eq!( assert_eq!(
Block::parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"), Block::parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"),
Some(( Ok((
"", "",
Block { (
name: "SRC", Block {
args: Some("javascript"), name: "SRC",
}, args: Some("javascript"),
"console.log('Hello World!');\n" },
"console.log('Hello World!');\n"
)
)) ))
); );
// TODO: more testing // TODO: more testing

View file

@ -1,5 +1,13 @@
use nom::sequence::separated_pair;
use nom::{
bytes::complete::tag,
character::complete::{char, digit1, space0},
combinator::{peek, recognize},
IResult,
};
use crate::elements::{Datetime, Element, Timestamp}; use crate::elements::{Datetime, Element, Timestamp};
use memchr::memchr; use crate::parsers::eol;
/// clock elements /// clock elements
/// ///
@ -25,24 +33,11 @@ pub enum Clock<'a> {
} }
impl Clock<'_> { impl Clock<'_> {
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>)> { pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (text, eol) = memchr(b'\n', text.as_bytes()) let (input, _) = tag("CLOCK:")(input)?;
.map(|i| (text[..i].trim(), i + 1)) let (input, _) = space0(input)?;
.unwrap_or_else(|| (text.trim(), text.len())); let (input, _) = peek(tag("["))(input)?;
let (input, timestamp) = Timestamp::parse_inactive(input)?;
if !text.starts_with("CLOCK:") {
return None;
}
let tail = &text["CLOCK:".len()..].trim_start();
if !tail.starts_with('[') {
return None;
}
let (tail, timestamp) = Timestamp::parse_inactive(tail).ok()?;
let tail = tail.trim();
match timestamp { match timestamp {
Timestamp::InactiveRange { Timestamp::InactiveRange {
@ -51,50 +46,39 @@ impl Clock<'_> {
repeater, repeater,
delay, delay,
} => { } => {
if tail.starts_with("=>") { let (input, _) = space0(input)?;
let duration = &tail[3..].trim(); let (input, _) = tag("=>")(input)?;
let colon = memchr(b':', duration.as_bytes())?; let (input, _) = space0(input)?;
if duration.as_bytes()[0..colon].iter().all(u8::is_ascii_digit) let (input, duration) =
&& colon == duration.len() - 3 recognize(separated_pair(digit1, char(':'), digit1))(input)?;
&& duration.as_bytes()[colon + 1].is_ascii_digit() let (input, _) = eol(input)?;
&& duration.as_bytes()[colon + 2].is_ascii_digit() Ok((
{ input,
Some(( Element::Clock(Clock::Closed {
&text[eol..], start,
Element::Clock(Clock::Closed { end,
start, repeater,
end, delay,
repeater, duration,
delay, }),
duration, ))
}),
))
} else {
None
}
} else {
None
}
} }
Timestamp::Inactive { Timestamp::Inactive {
start, start,
repeater, repeater,
delay, delay,
} => { } => {
if tail.is_empty() { let (input, _) = eol(input)?;
Some(( Ok((
&text[eol..], input,
Element::Clock(Clock::Running { Element::Clock(Clock::Running {
start, start,
repeater, repeater,
delay, delay,
}), }),
)) ))
} else {
None
}
} }
_ => None, _ => unreachable!(),
} }
} }
@ -154,7 +138,7 @@ impl Clock<'_> {
fn parse() { fn parse() {
assert_eq!( assert_eq!(
Clock::parse("CLOCK: [2003-09-16 Tue 09:39]"), Clock::parse("CLOCK: [2003-09-16 Tue 09:39]"),
Some(( Ok((
"", "",
Element::Clock(Clock::Running { Element::Clock(Clock::Running {
start: Datetime { start: Datetime {
@ -172,7 +156,7 @@ fn parse() {
); );
assert_eq!( assert_eq!(
Clock::parse("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00"), Clock::parse("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00"),
Some(( Ok((
"", "",
Element::Clock(Clock::Closed { Element::Clock(Clock::Closed {
start: Datetime { start: Datetime {

View file

@ -1,4 +1,11 @@
use memchr::{memchr, memchr2}; use nom::{
branch::alt,
bytes::complete::tag,
character::complete::digit0,
combinator::recognize,
sequence::{delimited, pair, separated_pair},
IResult,
};
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))] #[cfg_attr(feature = "serde", derive(serde::Serialize))]
@ -9,31 +16,17 @@ pub struct Cookie<'a> {
impl Cookie<'_> { impl Cookie<'_> {
#[inline] #[inline]
pub(crate) fn parse(src: &str) -> Option<(&str, Cookie<'_>)> { pub(crate) fn parse(input: &str) -> IResult<&str, Cookie<'_>> {
debug_assert!(src.starts_with('[')); let (input, value) = recognize(delimited(
tag("["),
alt((
separated_pair(digit0, tag("/"), digit0),
pair(digit0, tag("%")),
)),
tag("]"),
))(input)?;
let bytes = src.as_bytes(); Ok((input, Cookie { value }))
let num1 =
memchr2(b'%', b'/', bytes).filter(|&i| bytes[1..i].iter().all(u8::is_ascii_digit))?;
if bytes[num1] == b'%' && *bytes.get(num1 + 1)? == b']' {
Some((
&src[num1 + 2..],
Cookie {
value: &src[0..num1 + 2],
},
))
} else {
let num2 = memchr(b']', bytes)
.filter(|&i| bytes[num1 + 1..i].iter().all(u8::is_ascii_digit))?;
Some((
&src[num2 + 1..],
Cookie {
value: &src[0..num2 + 1],
},
))
}
} }
} }
@ -41,29 +34,26 @@ impl Cookie<'_> {
fn parse() { fn parse() {
assert_eq!( assert_eq!(
Cookie::parse("[1/10]"), Cookie::parse("[1/10]"),
Some(("", Cookie { value: "[1/10]" })) Ok(("", Cookie { value: "[1/10]" }))
); );
assert_eq!( assert_eq!(
Cookie::parse("[1/1000]"), Cookie::parse("[1/1000]"),
Some(("", Cookie { value: "[1/1000]" })) Ok(("", Cookie { value: "[1/1000]" }))
); );
assert_eq!( assert_eq!(Cookie::parse("[10%]"), Ok(("", Cookie { value: "[10%]" })));
Cookie::parse("[10%]"), assert_eq!(Cookie::parse("[%]"), Ok(("", Cookie { value: "[%]" })));
Some(("", Cookie { value: "[10%]" })) assert_eq!(Cookie::parse("[/]"), Ok(("", Cookie { value: "[/]" })));
);
assert_eq!(Cookie::parse("[%]"), Some(("", Cookie { value: "[%]" })));
assert_eq!(Cookie::parse("[/]"), Some(("", Cookie { value: "[/]" })));
assert_eq!( assert_eq!(
Cookie::parse("[100/]"), Cookie::parse("[100/]"),
Some(("", Cookie { value: "[100/]" })) Ok(("", Cookie { value: "[100/]" }))
); );
assert_eq!( assert_eq!(
Cookie::parse("[/100]"), Cookie::parse("[/100]"),
Some(("", Cookie { value: "[/100]" })) Ok(("", Cookie { value: "[/100]" }))
); );
assert_eq!(Cookie::parse("[10% ]"), None); assert!(Cookie::parse("[10% ]").is_err());
assert_eq!(Cookie::parse("[1//100]"), None); assert!(Cookie::parse("[1//100]").is_err());
assert_eq!(Cookie::parse("[1\\100]"), None); assert!(Cookie::parse("[1\\100]").is_err());
assert_eq!(Cookie::parse("[10%%]"), None); assert!(Cookie::parse("[10%%]").is_err());
} }

View file

@ -1,6 +1,11 @@
use memchr::memchr_iter;
use crate::elements::Element; use crate::elements::Element;
use crate::parsers::{eol, take_lines_till};
use nom::{
bytes::complete::{tag, take_while1},
sequence::delimited,
IResult,
};
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))] #[cfg_attr(feature = "serde", derive(serde::Serialize))]
@ -11,47 +16,16 @@ pub struct Drawer<'a> {
impl Drawer<'_> { impl Drawer<'_> {
#[inline] #[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> { pub(crate) fn parse(input: &str) -> IResult<&str, (Element<'_>, &str)> {
debug_assert!(text.starts_with(':')); let (input, name) = delimited(
tag(":"),
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),
tag(":"),
)(input)?;
let (input, _) = eol(input)?;
let (input, contents) = take_lines_till(|line| line.eq_ignore_ascii_case(":END:"))(input)?;
let mut lines = memchr_iter(b'\n', text.as_bytes()); Ok((input, (Element::Drawer(Drawer { name }), contents)))
let (name, off) = lines
.next()
.map(|i| (text[1..i].trim_end(), i + 1))
.filter(|(name, _)| {
name.ends_with(':')
&& name[0..name.len() - 1]
.as_bytes()
.iter()
.all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_')
})?;
let mut pos = off;
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(":END:") {
return Some((
&text[i + 1..],
Element::Drawer(Drawer {
name: &name[0..name.len() - 1],
}),
&text[off..pos],
));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case(":END:") {
Some((
"",
Element::Drawer(Drawer {
name: &name[0..name.len() - 1],
}),
&text[off..pos],
))
} else {
None
}
} }
} }
@ -59,10 +33,12 @@ impl Drawer<'_> {
fn parse() { fn parse() {
assert_eq!( assert_eq!(
Drawer::parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"), Drawer::parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
Some(( Ok((
"", "",
Element::Drawer(Drawer { name: "PROPERTIES" }), (
" :CUSTOM_ID: id\n" Element::Drawer(Drawer { name: "PROPERTIES" }),
" :CUSTOM_ID: id\n"
)
)) ))
) )
} }

View file

@ -1,6 +1,11 @@
use crate::elements::Element; use crate::elements::Element;
use crate::parsers::{take_lines_till, take_until_eol};
use memchr::{memchr, memchr_iter}; use nom::{
bytes::complete::tag_no_case,
character::complete::{alpha1, space1},
IResult,
};
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))] #[cfg_attr(feature = "serde", derive(serde::Serialize))]
@ -13,60 +18,24 @@ pub struct DynBlock<'a> {
impl DynBlock<'_> { impl DynBlock<'_> {
#[inline] #[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> { pub(crate) fn parse(input: &str) -> IResult<&str, (Element<'_>, &str)> {
debug_assert!(text.starts_with("#+")); let (input, _) = tag_no_case("#+BEGIN:")(input)?;
let (input, _) = space1(input)?;
let (input, name) = alpha1(input)?;
let (input, args) = take_until_eol(input)?;
if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") { let (input, contents) = take_lines_till(|line| line.eq_ignore_ascii_case("#+END:"))(input)?;
return None;
}
let bytes = text.as_bytes(); Ok((
let mut lines = memchr_iter(b'\n', bytes); input,
(
let (name, para, off) = lines
.next()
.map(|i| {
memchr(b' ', &bytes["#+BEGIN: ".len()..i])
.map(|x| {
(
&text["#+BEGIN: ".len().."#+BEGIN: ".len() + x],
Some(text["#+BEGIN: ".len() + x..i].trim()),
i + 1,
)
})
.unwrap_or((&text["#+BEGIN: ".len()..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let mut pos = off;
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case("#+END:") {
return Some((
&text[i + 1..],
Element::DynBlock(DynBlock {
block_name: name,
arguments: para,
}),
&text[off..pos],
));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case("#+END:") {
Some((
"",
Element::DynBlock(DynBlock { Element::DynBlock(DynBlock {
block_name: name, block_name: name,
arguments: para, arguments: if args.is_empty() { None } else { Some(args) },
}), }),
&text[off..pos], contents,
)) ),
} else { ))
None
}
} }
} }
@ -75,13 +44,15 @@ fn parse() {
// TODO: testing // TODO: testing
assert_eq!( assert_eq!(
DynBlock::parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"), DynBlock::parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"),
Some(( Ok((
"", "",
Element::DynBlock(DynBlock { (
block_name: "clocktable", Element::DynBlock(DynBlock {
arguments: Some(":scope file"), block_name: "clocktable",
}), arguments: Some(":scope file"),
"CONTENTS\n" }),
"CONTENTS\n"
)
)) ))
); );
} }

View file

@ -1,5 +1,5 @@
use bytecount::count; use bytecount::count;
use memchr::memchr; use memchr::memchr_iter;
#[inline] #[inline]
pub(crate) fn parse(text: &str, marker: u8) -> Option<(&str, &str)> { pub(crate) fn parse(text: &str, marker: u8) -> Option<(&str, &str)> {
@ -11,31 +11,27 @@ pub(crate) fn parse(text: &str, marker: u8) -> Option<(&str, &str)> {
return None; return None;
} }
let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..=i], b'\n') < 2)?; for i in memchr_iter(marker, bytes).skip(1) {
if count(&bytes[1..i], b'\n') >= 2 {
if bytes[end].is_ascii_whitespace() { break;
return None; } else if validate_marker(i, text) {
return Some((&text[i + 1..], &text[1..i]));
}
} }
if let Some(&post) = bytes.get(end + 2) { None
if post == b' ' }
|| post == b'-'
|| post == b'.' fn validate_marker(pos: usize, text: &str) -> bool {
|| post == b',' if text.as_bytes()[pos - 1].is_ascii_whitespace() {
|| post == b':' false
|| post == b'!' } else if let Some(&post) = text.as_bytes().get(pos + 1) {
|| post == b'?' match post {
|| post == b'\'' b' ' | b'-' | b'.' | b',' | b':' | b'!' | b'?' | b'\'' | b'\n' | b')' | b'}' => true,
|| post == b'\n' _ => false,
|| post == b')'
|| post == b'}'
{
Some((&text[end + 2..], &text[1..end + 1]))
} else {
None
} }
} else { } else {
Some((&text[end + 2..], &text[1..end + 1])) true
} }
} }
@ -46,6 +42,7 @@ mod tests {
use super::parse; use super::parse;
assert_eq!(parse("*bold*", b'*'), Some(("", "bold"))); assert_eq!(parse("*bold*", b'*'), Some(("", "bold")));
assert_eq!(parse("*bo*ld*", b'*'), Some(("", "bo*ld")));
assert_eq!(parse("*bo\nld*", b'*'), Some(("", "bo\nld"))); assert_eq!(parse("*bo\nld*", b'*'), Some(("", "bo\nld")));
assert_eq!(parse("*bold*a", b'*'), None); assert_eq!(parse("*bold*a", b'*'), None);
assert_eq!(parse("*bold*", b'/'), None); assert_eq!(parse("*bold*", b'/'), None);

View file

@ -1,6 +1,7 @@
use memchr::memchr; use memchr::memchr;
use nom::{ use nom::{
bytes::complete::{tag, take_while1}, bytes::complete::{tag, take_while1},
sequence::delimited,
IResult, IResult,
}; };
@ -12,10 +13,12 @@ pub struct FnDef<'a> {
} }
fn parse_label(input: &str) -> IResult<&str, &str> { fn parse_label(input: &str) -> IResult<&str, &str> {
let (input, _) = tag("[fn:")(input)?; let (input, label) = delimited(
let (input, label) = tag("[fn:"),
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?; take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
let (input, _) = tag("]")(input)?; tag("]"),
)(input)?;
Ok((input, label)) Ok((input, label))
} }

View file

@ -1,55 +1,46 @@
use memchr::{memchr2, memchr2_iter}; use memchr::memchr2_iter;
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
error::ErrorKind,
error_position,
sequence::preceded,
Err, IResult,
};
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))] #[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug)] #[derive(Debug)]
pub struct FnRef<'a> { pub struct FnRef<'a> {
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))] pub label: &'a str,
pub label: Option<&'a str>,
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))] #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
pub definition: Option<&'a str>, pub definition: Option<&'a str>,
} }
fn balanced_brackets(input: &str) -> IResult<&str, &str> {
let mut pairs = 1;
for i in memchr2_iter(b'[', b']', input.as_bytes()) {
if input.as_bytes()[i] == b'[' {
pairs += 1;
} else if pairs != 1 {
pairs -= 1;
} else {
return Ok((&input[i..], &input[0..i]));
}
}
Err(Err::Error(error_position!(input, ErrorKind::Tag)))
}
impl FnRef<'_> { impl FnRef<'_> {
#[inline] #[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, FnRef<'_>)> { pub(crate) fn parse(input: &str) -> IResult<&str, FnRef<'_>> {
debug_assert!(text.starts_with("[fn:")); let (input, _) = tag("[fn:")(input)?;
let (input, label) =
take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?;
let (input, definition) = opt(preceded(tag(":"), balanced_brackets))(input)?;
let (input, _) = tag("]")(input)?;
let bytes = text.as_bytes(); Ok((input, FnRef { label, definition }))
let (label, off) = memchr2(b']', b':', &bytes["[fn:".len()..])
.filter(|&i| {
bytes["[fn:".len().."[fn:".len() + i]
.iter()
.all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_')
})
.map(|i| {
(
if i == 0 {
None
} else {
Some(&text["[fn:".len().."[fn:".len() + i])
},
"[fn:".len() + i,
)
})?;
let (definition, off) = if bytes[off] == b':' {
let mut pairs = 1;
memchr2_iter(b'[', b']', &bytes[off..])
.find(|&i| {
if bytes[i + off] == b'[' {
pairs += 1;
} else {
pairs -= 1;
}
pairs == 0
})
.map(|i| (Some(&text[off + 1..off + i]), i + off + 1))?
} else {
(None, off + 1)
};
Some((&text[off..], FnRef { label, definition }))
} }
} }
@ -57,43 +48,44 @@ impl FnRef<'_> {
fn parse() { fn parse() {
assert_eq!( assert_eq!(
FnRef::parse("[fn:1]"), FnRef::parse("[fn:1]"),
Some(( Ok((
"", "",
FnRef { FnRef {
label: Some("1"), label: "1",
definition: None definition: None
}, },
)) ))
); );
assert_eq!( assert_eq!(
FnRef::parse("[fn:1:2]"), FnRef::parse("[fn:1:2]"),
Some(( Ok((
"", "",
FnRef { FnRef {
label: Some("1"), label: "1",
definition: Some("2") definition: Some("2")
}, },
)) ))
); );
assert_eq!( assert_eq!(
FnRef::parse("[fn::2]"), FnRef::parse("[fn::2]"),
Some(( Ok((
"", "",
FnRef { FnRef {
label: None, label: "",
definition: Some("2") definition: Some("2")
}, },
)) ))
); );
assert_eq!( assert_eq!(
FnRef::parse("[fn::[]]"), FnRef::parse("[fn::[]]"),
Some(( Ok((
"", "",
FnRef { FnRef {
label: None, label: "",
definition: Some("[]") definition: Some("[]")
}, },
)) ))
); );
assert_eq!(FnRef::parse("[fn::[]"), None);
assert!(FnRef::parse("[fn::[]").is_err());
} }

View file

@ -29,9 +29,8 @@ impl<'a> InlineCall<'a> {
let (input, _) = tag("call_")(input)?; let (input, _) = tag("call_")(input)?;
let (input, name) = take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')')(input)?; let (input, name) = take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')')(input)?;
let (input, inside_header) = opt(header)(input)?; let (input, inside_header) = opt(header)(input)?;
let (input, _) = tag("(")(input)?; let (input, arguments) =
let (input, arguments) = take_till(|c| c == ')' || c == '\n')(input)?; delimited(tag("("), take_till(|c| c == ')' || c == '\n'), tag(")"))(input)?;
let (input, _) = tag(")")(input)?;
let (input, end_header) = opt(header)(input)?; let (input, end_header) = opt(header)(input)?;
Ok(( Ok((

View file

@ -28,9 +28,8 @@ impl InlineSrc<'_> {
take_till(|c| c == '\n' || c == ']'), take_till(|c| c == '\n' || c == ']'),
tag("]"), tag("]"),
))(input)?; ))(input)?;
let (input, _) = tag("{")(input)?; let (input, body) =
let (input, body) = take_till(|c| c == '\n' || c == '}')(input)?; delimited(tag("{"), take_till(|c| c == '\n' || c == '}'), tag("}"))(input)?;
let (input, _) = tag("}")(input)?;
Ok(( Ok((
input, input,

View file

@ -1,11 +1,12 @@
use nom::{ use nom::{
bytes::complete::{tag, take_till, take_while}, bytes::complete::{tag, take_till},
combinator::{map, opt}, combinator::opt,
sequence::delimited, sequence::delimited,
IResult, IResult,
}; };
use crate::elements::Element; use crate::elements::Element;
use crate::parsers::take_until_eol;
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))] #[cfg_attr(feature = "serde", derive(serde::Serialize))]
@ -24,20 +25,19 @@ pub struct BabelCall<'a> {
pub value: &'a str, pub value: &'a str,
} }
fn optional(input: &str) -> IResult<&str, &str> {
delimited(tag("["), take_till(|c| c == ']' || c == '\n'), tag("]"))(input)
}
impl Keyword<'_> { impl Keyword<'_> {
#[inline] #[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = tag("#+")(input)?; let (input, _) = tag("#+")(input)?;
let (input, key) = let (input, key) =
take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?; take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
let (input, optional) = opt(optional)(input)?; let (input, optional) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
let (input, _) = tag(":")(input)?; let (input, _) = tag(":")(input)?;
let (input, value) = map(take_while(|c| c != '\n'), str::trim)(input)?; let (input, value) = take_until_eol(input)?;
let (input, _) = opt(tag("\n"))(input)?;
if key.eq_ignore_ascii_case("CALL") { if key.eq_ignore_ascii_case("CALL") {
Ok((input, Element::BabelCall(BabelCall { value }))) Ok((input, Element::BabelCall(BabelCall { value })))

View file

@ -1,6 +1,7 @@
use nom::{ use nom::{
bytes::complete::{tag, take_while}, bytes::complete::{tag, take_while},
combinator::opt, combinator::opt,
sequence::delimited,
IResult, IResult,
}; };
@ -18,16 +19,16 @@ pub struct Link<'a> {
impl Link<'_> { impl Link<'_> {
#[inline] #[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = tag("[[")(input)?; let (input, path) = delimited(
let (input, path) = tag("[["),
take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']')(input)?; take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'),
let (input, _) = tag("]")(input)?; tag("]"),
let (input, desc) = opt(|input| { )(input)?;
let (input, _) = tag("[")(input)?; let (input, desc) = opt(delimited(
let (input, desc) = take_while(|c: char| c != '[' && c != ']')(input)?; tag("["),
let (input, _) = tag("]")(input)?; take_while(|c: char| c != '[' && c != ']'),
Ok((input, desc)) tag("]"),
})(input)?; ))(input)?;
let (input, _) = tag("]")(input)?; let (input, _) = tag("]")(input)?;
Ok((input, Element::Link(Link { path, desc }))) Ok((input, Element::Link(Link { path, desc })))
} }

View file

@ -1,6 +1,7 @@
use nom::{ use nom::{
bytes::complete::{tag, take, take_until, take_while1}, bytes::complete::{tag, take, take_until, take_while1},
combinator::{opt, verify}, combinator::{opt, verify},
sequence::delimited,
IResult, IResult,
}; };
@ -23,12 +24,7 @@ impl Macros<'_> {
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
|s: &str| s.starts_with(|c: char| c.is_ascii_alphabetic()), |s: &str| s.starts_with(|c: char| c.is_ascii_alphabetic()),
)(input)?; )(input)?;
let (input, arguments) = opt(|input| { let (input, arguments) = opt(delimited(tag("("), take_until(")}}}"), take(1usize)))(input)?;
let (input, _) = tag("(")(input)?;
let (input, args) = take_until(")}}}")(input)?;
let (input, _) = take(1usize)(input)?;
Ok((input, args))
})(input)?;
let (input, _) = tag("}}}")(input)?; let (input, _) = tag("}}}")(input)?;
Ok((input, Element::Macros(Macros { name, arguments }))) Ok((input, Element::Macros(Macros { name, arguments })))

View file

@ -1,6 +1,7 @@
use nom::{ use nom::{
bytes::complete::{tag, take_while}, bytes::complete::{tag, take_while},
combinator::verify, combinator::verify,
sequence::delimited,
IResult, IResult,
}; };
@ -15,12 +16,14 @@ pub struct RadioTarget;
impl RadioTarget { impl RadioTarget {
#[inline] #[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, (Element, &str)> { pub(crate) fn parse(input: &str) -> IResult<&str, (Element, &str)> {
let (input, _) = tag("<<<")(input)?; let (input, contents) = delimited(
let (input, contents) = verify( tag("<<<"),
take_while(|c: char| c != '<' && c != '\n' && c != '>'), verify(
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '), take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
),
tag(">>>"),
)(input)?; )(input)?;
let (input, _) = tag(">>>")(input)?;
Ok((input, (Element::RadioTarget(RadioTarget), contents))) Ok((input, (Element::RadioTarget(RadioTarget), contents)))
} }

View file

@ -1,13 +1,8 @@
use nom::{ use nom::{bytes::complete::take_while_m_n, character::complete::space0, IResult};
branch::alt,
bytes::complete::{tag, take_while_m_n},
character::complete::space0,
error::ErrorKind,
Err, IResult,
};
use std::usize; use std::usize;
use crate::elements::Element; use crate::elements::Element;
use crate::parsers::eol;
pub struct Rule; pub struct Rule;
@ -16,20 +11,11 @@ impl Rule {
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
let (input, _) = take_while_m_n(5, usize::MAX, |c| c == '-')(input)?; let (input, _) = take_while_m_n(5, usize::MAX, |c| c == '-')(input)?;
let (input, _) = space0(input)?; let (input, _) = eol(input)?;
let (input, _) = alt((tag("\n"), eof))(input)?;
Ok((input, Element::Rule)) Ok((input, Element::Rule))
} }
} }
fn eof(input: &str) -> IResult<&str, &str> {
if input.is_empty() {
Ok(("", ""))
} else {
Err(Err::Error(("", ErrorKind::Tag)))
}
}
#[test] #[test]
fn parse() { fn parse() {
assert_eq!(Rule::parse("-----"), Ok(("", Element::Rule))); assert_eq!(Rule::parse("-----"), Ok(("", Element::Rule)));

View file

@ -1,5 +1,6 @@
use nom::{ use nom::{
bytes::complete::{tag, take, take_until, take_while1}, bytes::complete::{tag, take, take_until, take_while1},
sequence::{delimited, separated_pair},
IResult, IResult,
}; };
@ -16,11 +17,15 @@ pub struct Snippet<'a> {
impl Snippet<'_> { impl Snippet<'_> {
#[inline] #[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = tag("@@")(input)?; let (input, (name, value)) = delimited(
let (input, name) = take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-')(input)?; tag("@@"),
let (input, _) = tag(":")(input)?; separated_pair(
let (input, value) = take_until("@@")(input)?; take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'),
let (input, _) = take(2usize)(input)?; tag(":"),
take_until("@@"),
),
take(2usize),
)(input)?;
Ok((input, Element::Snippet(Snippet { name, value }))) Ok((input, Element::Snippet(Snippet { name, value })))
} }

View file

@ -1,6 +1,7 @@
use nom::{ use nom::{
bytes::complete::{tag, take_while}, bytes::complete::{tag, take_while},
combinator::verify, combinator::verify,
sequence::delimited,
IResult, IResult,
}; };
@ -16,12 +17,14 @@ pub struct Target<'a> {
impl Target<'_> { impl Target<'_> {
#[inline] #[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = tag("<<")(input)?; let (input, target) = delimited(
let (input, target) = verify( tag("<<"),
take_while(|c: char| c != '<' && c != '\n' && c != '>'), verify(
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '), take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
),
tag(">>"),
)(input)?; )(input)?;
let (input, _) = tag(">>")(input)?;
Ok((input, Element::Target(Target { target }))) Ok((input, Element::Target(Target { target })))
} }

View file

@ -2,6 +2,7 @@ use nom::{
bytes::complete::{tag, take, take_till, take_while, take_while_m_n}, bytes::complete::{tag, take, take_till, take_while, take_while_m_n},
character::complete::{space0, space1}, character::complete::{space0, space1},
combinator::{map, map_res, opt}, combinator::{map, map_res, opt},
sequence::preceded,
IResult, IResult,
}; };
@ -51,13 +52,9 @@ fn parse_datetime(input: &str) -> IResult<&str, Datetime<'_>> {
&& c != ']' && c != ']'
&& c != '>' && c != '>'
})(input)?; })(input)?;
let (input, (hour, minute)) = map( let (input, (hour, minute)) = map(opt(preceded(space1, parse_time)), |time| {
opt(|input| { (time.map(|t| t.0), time.map(|t| t.1))
let (input, _) = space1(input)?; })(input)?;
parse_time(input)
}),
|time| (time.map(|t| t.0), time.map(|t| t.1)),
)(input)?;
Ok(( Ok((
input, input,

View file

@ -56,10 +56,7 @@ pub trait OrgHandler<E: From<Error>> {
} }
Code { value } => write!(w, "~{}~", value)?, Code { value } => write!(w, "~{}~", value)?,
FnRef(fn_ref) => { FnRef(fn_ref) => {
write!(&mut w, "[fn:")?; write!(&mut w, "[fn:{}", fn_ref.label)?;
if let Some(label) = fn_ref.label {
write!(&mut w, "{}", label)?;
}
if let Some(definition) = fn_ref.definition { if let Some(definition) = fn_ref.definition {
write!(&mut w, ":{}", definition)?; write!(&mut w, ":{}", definition)?;
} }

View file

@ -221,6 +221,7 @@ pub mod elements;
pub mod export; pub mod export;
mod iter; mod iter;
mod org; mod org;
mod parsers;
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
mod serde; mod serde;

View file

@ -175,7 +175,7 @@ fn is_headline(text: &str) -> Option<usize> {
} else { } else {
None None
} }
} else if text.len() > 0 && text.as_bytes().iter().all(|&c| c == b'*') { } else if !text.is_empty() && text.as_bytes().iter().all(|&c| c == b'*') {
Some(text.len()) Some(text.len())
} else { } else {
None None
@ -291,7 +291,7 @@ fn parse_block<'a>(
let tail = contents.trim_start(); let tail = contents.trim_start();
if let Some((tail, clock)) = Clock::parse(tail) { if let Ok((tail, clock)) = Clock::parse(tail) {
return Some((tail, arena.new_node(clock))); return Some((tail, arena.new_node(clock)));
} }
@ -305,7 +305,7 @@ fn parse_block<'a>(
} }
if tail.starts_with(':') { if tail.starts_with(':') {
if let Some((tail, drawer, _content)) = Drawer::parse(tail) { if let Ok((tail, (drawer, _content))) = Drawer::parse(tail) {
return Some((tail, arena.new_node(drawer))); return Some((tail, arena.new_node(drawer)));
} }
} }
@ -349,7 +349,7 @@ fn parse_block<'a>(
} }
if tail.starts_with("#+") { if tail.starts_with("#+") {
if let Some((tail, block, content)) = Block::parse(tail) { if let Ok((tail, (block, content))) = Block::parse(tail) {
match &*block.name.to_uppercase() { match &*block.name.to_uppercase() {
"CENTER" => { "CENTER" => {
let node = arena.new_node(Element::CenterBlock(CenterBlock { let node = arena.new_node(Element::CenterBlock(CenterBlock {
@ -414,7 +414,7 @@ fn parse_block<'a>(
Some((tail, node)) Some((tail, node))
} }
} }
} else if let Some((tail, dyn_block, content)) = DynBlock::parse(tail) { } else if let Ok((tail, (dyn_block, content))) = DynBlock::parse(tail) {
let node = arena.new_node(dyn_block); let node = arena.new_node(dyn_block);
containers.push(Container::Block { content, node }); containers.push(Container::Block { content, node });
Some((tail, node)) Some((tail, node))
@ -546,8 +546,8 @@ fn parse_inline<'a>(
b'[' => { b'[' => {
if contents[1..].starts_with("fn:") { if contents[1..].starts_with("fn:") {
FnRef::parse(contents) FnRef::parse(contents)
.map(|(tail, fn_ref)| (tail, fn_ref.into())) .ok()
.map(|(tail, element)| (tail, arena.new_node(element))) .map(|(tail, fn_ref)| (tail, arena.new_node(fn_ref.into())))
} else if bytes[1] == b'[' { } else if bytes[1] == b'[' {
Link::parse(contents) Link::parse(contents)
.ok() .ok()
@ -555,11 +555,11 @@ fn parse_inline<'a>(
} else { } else {
Cookie::parse(contents) Cookie::parse(contents)
.map(|(tail, cookie)| (tail, cookie.into())) .map(|(tail, cookie)| (tail, cookie.into()))
.or_else(|| { .or_else(|_| {
Timestamp::parse_inactive(contents) Timestamp::parse_inactive(contents)
.map(|(tail, timestamp)| (tail, timestamp.into())) .map(|(tail, timestamp)| (tail, timestamp.into()))
.ok()
}) })
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))) .map(|(tail, element)| (tail, arena.new_node(element)))
} }
} }

45
src/parsers.rs Normal file
View file

@ -0,0 +1,45 @@
// resued nom parsers
use memchr::{memchr, memchr_iter};
use nom::{
bytes::complete::tag, character::complete::space0, error::ErrorKind, error_position, Err,
IResult,
};
pub(crate) fn eol(input: &str) -> IResult<&str, ()> {
let (input, _) = space0(input)?;
if input.is_empty() {
Ok(("", ()))
} else {
let (input, _) = tag("\n")(input)?;
Ok((input, ()))
}
}
pub(crate) fn take_until_eol(input: &str) -> IResult<&str, &str> {
if let Some(i) = memchr(b'\n', input.as_bytes()) {
Ok((&input[i + 1..], input[0..i].trim()))
} else {
Ok(("", input.trim()))
}
}
pub(crate) fn take_lines_till(
predicate: impl Fn(&str) -> bool,
) -> impl Fn(&str) -> IResult<&str, &str> {
move |input| {
let mut start = 0;
for i in memchr_iter(b'\n', input.as_bytes()) {
if predicate(input[start..i].trim()) {
return Ok((&input[i + 1..], &input[0..start]));
}
start = i + 1;
}
if predicate(input[start..].trim()) {
Ok(("", &input[0..start]))
} else {
Err(Err::Error(error_position!(input, ErrorKind::TakeTill1)))
}
}
}