refactor(elements): rewrite most parsers with nom

This commit is contained in:
PoiScript 2019-08-04 17:46:10 +08:00
parent 8d18fb04c1
commit 37c33a82f0
22 changed files with 336 additions and 402 deletions

View file

@ -1,4 +1,6 @@
use memchr::{memchr, memchr_iter};
use nom::{bytes::complete::tag_no_case, character::complete::alpha1, sequence::preceded, IResult};
use crate::parsers::{take_lines_till, take_until_eol};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
@ -9,40 +11,23 @@ pub struct Block<'a> {
impl Block<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, Block<'_>, &str)> {
debug_assert!(text.starts_with("#+"));
pub(crate) fn parse(input: &str) -> IResult<&str, (Block<'_>, &str)> {
let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
let (input, args) = take_until_eol(input)?;
let end_line = format!(r"#+END_{}", name);
let (input, contents) =
take_lines_till(|line| line.eq_ignore_ascii_case(&end_line))(input)?;
if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" {
return None;
}
let mut lines = memchr_iter(b'\n', text.as_bytes());
let (name, args, off) = lines
.next()
.map(|i| {
memchr(b' ', &text.as_bytes()[8..i])
.map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1))
.unwrap_or((&text[8..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let mut pos = off;
let end = format!(r"#+END_{}", name.to_uppercase());
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(&end) {
return Some((&text[i + 1..], Block { name, args }, &text[off..pos]));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case(&end) {
Some(("", Block { name, args }, &text[off..pos]))
} else {
None
}
Ok((
input,
(
Block {
name,
args: if args.is_empty() { None } else { Some(args) },
},
contents,
),
))
}
}
@ -50,24 +35,28 @@ impl Block<'_> {
fn parse() {
assert_eq!(
Block::parse("#+BEGIN_SRC\n#+END_SRC"),
Some((
Ok((
"",
Block {
name: "SRC",
args: None,
},
""
(
Block {
name: "SRC",
args: None,
},
""
)
))
);
assert_eq!(
Block::parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"),
Some((
Ok((
"",
Block {
name: "SRC",
args: Some("javascript"),
},
"console.log('Hello World!');\n"
(
Block {
name: "SRC",
args: Some("javascript"),
},
"console.log('Hello World!');\n"
)
))
);
// TODO: more testing

View file

@ -1,5 +1,13 @@
use nom::sequence::separated_pair;
use nom::{
bytes::complete::tag,
character::complete::{char, digit1, space0},
combinator::{peek, recognize},
IResult,
};
use crate::elements::{Datetime, Element, Timestamp};
use memchr::memchr;
use crate::parsers::eol;
/// clock elements
///
@ -25,24 +33,11 @@ pub enum Clock<'a> {
}
impl Clock<'_> {
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>)> {
let (text, eol) = memchr(b'\n', text.as_bytes())
.map(|i| (text[..i].trim(), i + 1))
.unwrap_or_else(|| (text.trim(), text.len()));
if !text.starts_with("CLOCK:") {
return None;
}
let tail = &text["CLOCK:".len()..].trim_start();
if !tail.starts_with('[') {
return None;
}
let (tail, timestamp) = Timestamp::parse_inactive(tail).ok()?;
let tail = tail.trim();
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = tag("CLOCK:")(input)?;
let (input, _) = space0(input)?;
let (input, _) = peek(tag("["))(input)?;
let (input, timestamp) = Timestamp::parse_inactive(input)?;
match timestamp {
Timestamp::InactiveRange {
@ -51,50 +46,39 @@ impl Clock<'_> {
repeater,
delay,
} => {
if tail.starts_with("=>") {
let duration = &tail[3..].trim();
let colon = memchr(b':', duration.as_bytes())?;
if duration.as_bytes()[0..colon].iter().all(u8::is_ascii_digit)
&& colon == duration.len() - 3
&& duration.as_bytes()[colon + 1].is_ascii_digit()
&& duration.as_bytes()[colon + 2].is_ascii_digit()
{
Some((
&text[eol..],
Element::Clock(Clock::Closed {
start,
end,
repeater,
delay,
duration,
}),
))
} else {
None
}
} else {
None
}
let (input, _) = space0(input)?;
let (input, _) = tag("=>")(input)?;
let (input, _) = space0(input)?;
let (input, duration) =
recognize(separated_pair(digit1, char(':'), digit1))(input)?;
let (input, _) = eol(input)?;
Ok((
input,
Element::Clock(Clock::Closed {
start,
end,
repeater,
delay,
duration,
}),
))
}
Timestamp::Inactive {
start,
repeater,
delay,
} => {
if tail.is_empty() {
Some((
&text[eol..],
Element::Clock(Clock::Running {
start,
repeater,
delay,
}),
))
} else {
None
}
let (input, _) = eol(input)?;
Ok((
input,
Element::Clock(Clock::Running {
start,
repeater,
delay,
}),
))
}
_ => None,
_ => unreachable!(),
}
}
@ -154,7 +138,7 @@ impl Clock<'_> {
fn parse() {
assert_eq!(
Clock::parse("CLOCK: [2003-09-16 Tue 09:39]"),
Some((
Ok((
"",
Element::Clock(Clock::Running {
start: Datetime {
@ -172,7 +156,7 @@ fn parse() {
);
assert_eq!(
Clock::parse("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00"),
Some((
Ok((
"",
Element::Clock(Clock::Closed {
start: Datetime {

View file

@ -1,4 +1,11 @@
use memchr::{memchr, memchr2};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::digit0,
combinator::recognize,
sequence::{delimited, pair, separated_pair},
IResult,
};
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
@ -9,31 +16,17 @@ pub struct Cookie<'a> {
impl Cookie<'_> {
#[inline]
pub(crate) fn parse(src: &str) -> Option<(&str, Cookie<'_>)> {
debug_assert!(src.starts_with('['));
pub(crate) fn parse(input: &str) -> IResult<&str, Cookie<'_>> {
let (input, value) = recognize(delimited(
tag("["),
alt((
separated_pair(digit0, tag("/"), digit0),
pair(digit0, tag("%")),
)),
tag("]"),
))(input)?;
let bytes = src.as_bytes();
let num1 =
memchr2(b'%', b'/', bytes).filter(|&i| bytes[1..i].iter().all(u8::is_ascii_digit))?;
if bytes[num1] == b'%' && *bytes.get(num1 + 1)? == b']' {
Some((
&src[num1 + 2..],
Cookie {
value: &src[0..num1 + 2],
},
))
} else {
let num2 = memchr(b']', bytes)
.filter(|&i| bytes[num1 + 1..i].iter().all(u8::is_ascii_digit))?;
Some((
&src[num2 + 1..],
Cookie {
value: &src[0..num2 + 1],
},
))
}
Ok((input, Cookie { value }))
}
}
@ -41,29 +34,26 @@ impl Cookie<'_> {
fn parse() {
assert_eq!(
Cookie::parse("[1/10]"),
Some(("", Cookie { value: "[1/10]" }))
Ok(("", Cookie { value: "[1/10]" }))
);
assert_eq!(
Cookie::parse("[1/1000]"),
Some(("", Cookie { value: "[1/1000]" }))
Ok(("", Cookie { value: "[1/1000]" }))
);
assert_eq!(
Cookie::parse("[10%]"),
Some(("", Cookie { value: "[10%]" }))
);
assert_eq!(Cookie::parse("[%]"), Some(("", Cookie { value: "[%]" })));
assert_eq!(Cookie::parse("[/]"), Some(("", Cookie { value: "[/]" })));
assert_eq!(Cookie::parse("[10%]"), Ok(("", Cookie { value: "[10%]" })));
assert_eq!(Cookie::parse("[%]"), Ok(("", Cookie { value: "[%]" })));
assert_eq!(Cookie::parse("[/]"), Ok(("", Cookie { value: "[/]" })));
assert_eq!(
Cookie::parse("[100/]"),
Some(("", Cookie { value: "[100/]" }))
Ok(("", Cookie { value: "[100/]" }))
);
assert_eq!(
Cookie::parse("[/100]"),
Some(("", Cookie { value: "[/100]" }))
Ok(("", Cookie { value: "[/100]" }))
);
assert_eq!(Cookie::parse("[10% ]"), None);
assert_eq!(Cookie::parse("[1//100]"), None);
assert_eq!(Cookie::parse("[1\\100]"), None);
assert_eq!(Cookie::parse("[10%%]"), None);
assert!(Cookie::parse("[10% ]").is_err());
assert!(Cookie::parse("[1//100]").is_err());
assert!(Cookie::parse("[1\\100]").is_err());
assert!(Cookie::parse("[10%%]").is_err());
}

View file

@ -1,6 +1,11 @@
use memchr::memchr_iter;
use crate::elements::Element;
use crate::parsers::{eol, take_lines_till};
use nom::{
bytes::complete::{tag, take_while1},
sequence::delimited,
IResult,
};
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
@ -11,47 +16,16 @@ pub struct Drawer<'a> {
impl Drawer<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> {
debug_assert!(text.starts_with(':'));
pub(crate) fn parse(input: &str) -> IResult<&str, (Element<'_>, &str)> {
let (input, name) = delimited(
tag(":"),
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),
tag(":"),
)(input)?;
let (input, _) = eol(input)?;
let (input, contents) = take_lines_till(|line| line.eq_ignore_ascii_case(":END:"))(input)?;
let mut lines = memchr_iter(b'\n', text.as_bytes());
let (name, off) = lines
.next()
.map(|i| (text[1..i].trim_end(), i + 1))
.filter(|(name, _)| {
name.ends_with(':')
&& name[0..name.len() - 1]
.as_bytes()
.iter()
.all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_')
})?;
let mut pos = off;
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(":END:") {
return Some((
&text[i + 1..],
Element::Drawer(Drawer {
name: &name[0..name.len() - 1],
}),
&text[off..pos],
));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case(":END:") {
Some((
"",
Element::Drawer(Drawer {
name: &name[0..name.len() - 1],
}),
&text[off..pos],
))
} else {
None
}
Ok((input, (Element::Drawer(Drawer { name }), contents)))
}
}
@ -59,10 +33,12 @@ impl Drawer<'_> {
fn parse() {
assert_eq!(
Drawer::parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
Some((
Ok((
"",
Element::Drawer(Drawer { name: "PROPERTIES" }),
" :CUSTOM_ID: id\n"
(
Element::Drawer(Drawer { name: "PROPERTIES" }),
" :CUSTOM_ID: id\n"
)
))
)
}

View file

@ -1,6 +1,11 @@
use crate::elements::Element;
use crate::parsers::{take_lines_till, take_until_eol};
use memchr::{memchr, memchr_iter};
use nom::{
bytes::complete::tag_no_case,
character::complete::{alpha1, space1},
IResult,
};
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
@ -13,60 +18,24 @@ pub struct DynBlock<'a> {
impl DynBlock<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> {
debug_assert!(text.starts_with("#+"));
pub(crate) fn parse(input: &str) -> IResult<&str, (Element<'_>, &str)> {
let (input, _) = tag_no_case("#+BEGIN:")(input)?;
let (input, _) = space1(input)?;
let (input, name) = alpha1(input)?;
let (input, args) = take_until_eol(input)?;
if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") {
return None;
}
let (input, contents) = take_lines_till(|line| line.eq_ignore_ascii_case("#+END:"))(input)?;
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', bytes);
let (name, para, off) = lines
.next()
.map(|i| {
memchr(b' ', &bytes["#+BEGIN: ".len()..i])
.map(|x| {
(
&text["#+BEGIN: ".len().."#+BEGIN: ".len() + x],
Some(text["#+BEGIN: ".len() + x..i].trim()),
i + 1,
)
})
.unwrap_or((&text["#+BEGIN: ".len()..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let mut pos = off;
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case("#+END:") {
return Some((
&text[i + 1..],
Element::DynBlock(DynBlock {
block_name: name,
arguments: para,
}),
&text[off..pos],
));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case("#+END:") {
Some((
"",
Ok((
input,
(
Element::DynBlock(DynBlock {
block_name: name,
arguments: para,
arguments: if args.is_empty() { None } else { Some(args) },
}),
&text[off..pos],
))
} else {
None
}
contents,
),
))
}
}
@ -75,13 +44,15 @@ fn parse() {
// TODO: testing
assert_eq!(
DynBlock::parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"),
Some((
Ok((
"",
Element::DynBlock(DynBlock {
block_name: "clocktable",
arguments: Some(":scope file"),
}),
"CONTENTS\n"
(
Element::DynBlock(DynBlock {
block_name: "clocktable",
arguments: Some(":scope file"),
}),
"CONTENTS\n"
)
))
);
}

View file

@ -1,5 +1,5 @@
use bytecount::count;
use memchr::memchr;
use memchr::memchr_iter;
#[inline]
pub(crate) fn parse(text: &str, marker: u8) -> Option<(&str, &str)> {
@ -11,31 +11,27 @@ pub(crate) fn parse(text: &str, marker: u8) -> Option<(&str, &str)> {
return None;
}
let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..=i], b'\n') < 2)?;
if bytes[end].is_ascii_whitespace() {
return None;
for i in memchr_iter(marker, bytes).skip(1) {
if count(&bytes[1..i], b'\n') >= 2 {
break;
} else if validate_marker(i, text) {
return Some((&text[i + 1..], &text[1..i]));
}
}
if let Some(&post) = bytes.get(end + 2) {
if post == b' '
|| post == b'-'
|| post == b'.'
|| post == b','
|| post == b':'
|| post == b'!'
|| post == b'?'
|| post == b'\''
|| post == b'\n'
|| post == b')'
|| post == b'}'
{
Some((&text[end + 2..], &text[1..end + 1]))
} else {
None
None
}
fn validate_marker(pos: usize, text: &str) -> bool {
if text.as_bytes()[pos - 1].is_ascii_whitespace() {
false
} else if let Some(&post) = text.as_bytes().get(pos + 1) {
match post {
b' ' | b'-' | b'.' | b',' | b':' | b'!' | b'?' | b'\'' | b'\n' | b')' | b'}' => true,
_ => false,
}
} else {
Some((&text[end + 2..], &text[1..end + 1]))
true
}
}
@ -46,6 +42,7 @@ mod tests {
use super::parse;
assert_eq!(parse("*bold*", b'*'), Some(("", "bold")));
assert_eq!(parse("*bo*ld*", b'*'), Some(("", "bo*ld")));
assert_eq!(parse("*bo\nld*", b'*'), Some(("", "bo\nld")));
assert_eq!(parse("*bold*a", b'*'), None);
assert_eq!(parse("*bold*", b'/'), None);

View file

@ -1,6 +1,7 @@
use memchr::memchr;
use nom::{
bytes::complete::{tag, take_while1},
sequence::delimited,
IResult,
};
@ -12,10 +13,12 @@ pub struct FnDef<'a> {
}
fn parse_label(input: &str) -> IResult<&str, &str> {
let (input, _) = tag("[fn:")(input)?;
let (input, label) =
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?;
let (input, _) = tag("]")(input)?;
let (input, label) = delimited(
tag("[fn:"),
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
tag("]"),
)(input)?;
Ok((input, label))
}

View file

@ -1,55 +1,46 @@
use memchr::{memchr2, memchr2_iter};
use memchr::memchr2_iter;
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
error::ErrorKind,
error_position,
sequence::preceded,
Err, IResult,
};
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug)]
pub struct FnRef<'a> {
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
pub label: Option<&'a str>,
pub label: &'a str,
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
pub definition: Option<&'a str>,
}
fn balanced_brackets(input: &str) -> IResult<&str, &str> {
let mut pairs = 1;
for i in memchr2_iter(b'[', b']', input.as_bytes()) {
if input.as_bytes()[i] == b'[' {
pairs += 1;
} else if pairs != 1 {
pairs -= 1;
} else {
return Ok((&input[i..], &input[0..i]));
}
}
Err(Err::Error(error_position!(input, ErrorKind::Tag)))
}
impl FnRef<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, FnRef<'_>)> {
debug_assert!(text.starts_with("[fn:"));
pub(crate) fn parse(input: &str) -> IResult<&str, FnRef<'_>> {
let (input, _) = tag("[fn:")(input)?;
let (input, label) =
take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?;
let (input, definition) = opt(preceded(tag(":"), balanced_brackets))(input)?;
let (input, _) = tag("]")(input)?;
let bytes = text.as_bytes();
let (label, off) = memchr2(b']', b':', &bytes["[fn:".len()..])
.filter(|&i| {
bytes["[fn:".len().."[fn:".len() + i]
.iter()
.all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_')
})
.map(|i| {
(
if i == 0 {
None
} else {
Some(&text["[fn:".len().."[fn:".len() + i])
},
"[fn:".len() + i,
)
})?;
let (definition, off) = if bytes[off] == b':' {
let mut pairs = 1;
memchr2_iter(b'[', b']', &bytes[off..])
.find(|&i| {
if bytes[i + off] == b'[' {
pairs += 1;
} else {
pairs -= 1;
}
pairs == 0
})
.map(|i| (Some(&text[off + 1..off + i]), i + off + 1))?
} else {
(None, off + 1)
};
Some((&text[off..], FnRef { label, definition }))
Ok((input, FnRef { label, definition }))
}
}
@ -57,43 +48,44 @@ impl FnRef<'_> {
fn parse() {
assert_eq!(
FnRef::parse("[fn:1]"),
Some((
Ok((
"",
FnRef {
label: Some("1"),
label: "1",
definition: None
},
))
);
assert_eq!(
FnRef::parse("[fn:1:2]"),
Some((
Ok((
"",
FnRef {
label: Some("1"),
label: "1",
definition: Some("2")
},
))
);
assert_eq!(
FnRef::parse("[fn::2]"),
Some((
Ok((
"",
FnRef {
label: None,
label: "",
definition: Some("2")
},
))
);
assert_eq!(
FnRef::parse("[fn::[]]"),
Some((
Ok((
"",
FnRef {
label: None,
label: "",
definition: Some("[]")
},
))
);
assert_eq!(FnRef::parse("[fn::[]"), None);
assert!(FnRef::parse("[fn::[]").is_err());
}

View file

@ -29,9 +29,8 @@ impl<'a> InlineCall<'a> {
let (input, _) = tag("call_")(input)?;
let (input, name) = take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')')(input)?;
let (input, inside_header) = opt(header)(input)?;
let (input, _) = tag("(")(input)?;
let (input, arguments) = take_till(|c| c == ')' || c == '\n')(input)?;
let (input, _) = tag(")")(input)?;
let (input, arguments) =
delimited(tag("("), take_till(|c| c == ')' || c == '\n'), tag(")"))(input)?;
let (input, end_header) = opt(header)(input)?;
Ok((

View file

@ -28,9 +28,8 @@ impl InlineSrc<'_> {
take_till(|c| c == '\n' || c == ']'),
tag("]"),
))(input)?;
let (input, _) = tag("{")(input)?;
let (input, body) = take_till(|c| c == '\n' || c == '}')(input)?;
let (input, _) = tag("}")(input)?;
let (input, body) =
delimited(tag("{"), take_till(|c| c == '\n' || c == '}'), tag("}"))(input)?;
Ok((
input,

View file

@ -1,11 +1,12 @@
use nom::{
bytes::complete::{tag, take_till, take_while},
combinator::{map, opt},
bytes::complete::{tag, take_till},
combinator::opt,
sequence::delimited,
IResult,
};
use crate::elements::Element;
use crate::parsers::take_until_eol;
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
@ -24,20 +25,19 @@ pub struct BabelCall<'a> {
pub value: &'a str,
}
fn optional(input: &str) -> IResult<&str, &str> {
delimited(tag("["), take_till(|c| c == ']' || c == '\n'), tag("]"))(input)
}
impl Keyword<'_> {
#[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = tag("#+")(input)?;
let (input, key) =
take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
let (input, optional) = opt(optional)(input)?;
let (input, optional) = opt(delimited(
tag("["),
take_till(|c| c == ']' || c == '\n'),
tag("]"),
))(input)?;
let (input, _) = tag(":")(input)?;
let (input, value) = map(take_while(|c| c != '\n'), str::trim)(input)?;
let (input, _) = opt(tag("\n"))(input)?;
let (input, value) = take_until_eol(input)?;
if key.eq_ignore_ascii_case("CALL") {
Ok((input, Element::BabelCall(BabelCall { value })))

View file

@ -1,6 +1,7 @@
use nom::{
bytes::complete::{tag, take_while},
combinator::opt,
sequence::delimited,
IResult,
};
@ -18,16 +19,16 @@ pub struct Link<'a> {
impl Link<'_> {
#[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = tag("[[")(input)?;
let (input, path) =
take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']')(input)?;
let (input, _) = tag("]")(input)?;
let (input, desc) = opt(|input| {
let (input, _) = tag("[")(input)?;
let (input, desc) = take_while(|c: char| c != '[' && c != ']')(input)?;
let (input, _) = tag("]")(input)?;
Ok((input, desc))
})(input)?;
let (input, path) = delimited(
tag("[["),
take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'),
tag("]"),
)(input)?;
let (input, desc) = opt(delimited(
tag("["),
take_while(|c: char| c != '[' && c != ']'),
tag("]"),
))(input)?;
let (input, _) = tag("]")(input)?;
Ok((input, Element::Link(Link { path, desc })))
}

View file

@ -1,6 +1,7 @@
use nom::{
bytes::complete::{tag, take, take_until, take_while1},
combinator::{opt, verify},
sequence::delimited,
IResult,
};
@ -23,12 +24,7 @@ impl Macros<'_> {
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
|s: &str| s.starts_with(|c: char| c.is_ascii_alphabetic()),
)(input)?;
let (input, arguments) = opt(|input| {
let (input, _) = tag("(")(input)?;
let (input, args) = take_until(")}}}")(input)?;
let (input, _) = take(1usize)(input)?;
Ok((input, args))
})(input)?;
let (input, arguments) = opt(delimited(tag("("), take_until(")}}}"), take(1usize)))(input)?;
let (input, _) = tag("}}}")(input)?;
Ok((input, Element::Macros(Macros { name, arguments })))

View file

@ -1,6 +1,7 @@
use nom::{
bytes::complete::{tag, take_while},
combinator::verify,
sequence::delimited,
IResult,
};
@ -15,12 +16,14 @@ pub struct RadioTarget;
impl RadioTarget {
#[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, (Element, &str)> {
let (input, _) = tag("<<<")(input)?;
let (input, contents) = verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
let (input, contents) = delimited(
tag("<<<"),
verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
),
tag(">>>"),
)(input)?;
let (input, _) = tag(">>>")(input)?;
Ok((input, (Element::RadioTarget(RadioTarget), contents)))
}

View file

@ -1,13 +1,8 @@
use nom::{
branch::alt,
bytes::complete::{tag, take_while_m_n},
character::complete::space0,
error::ErrorKind,
Err, IResult,
};
use nom::{bytes::complete::take_while_m_n, character::complete::space0, IResult};
use std::usize;
use crate::elements::Element;
use crate::parsers::eol;
pub struct Rule;
@ -16,20 +11,11 @@ impl Rule {
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = space0(input)?;
let (input, _) = take_while_m_n(5, usize::MAX, |c| c == '-')(input)?;
let (input, _) = space0(input)?;
let (input, _) = alt((tag("\n"), eof))(input)?;
let (input, _) = eol(input)?;
Ok((input, Element::Rule))
}
}
fn eof(input: &str) -> IResult<&str, &str> {
if input.is_empty() {
Ok(("", ""))
} else {
Err(Err::Error(("", ErrorKind::Tag)))
}
}
#[test]
fn parse() {
assert_eq!(Rule::parse("-----"), Ok(("", Element::Rule)));

View file

@ -1,5 +1,6 @@
use nom::{
bytes::complete::{tag, take, take_until, take_while1},
sequence::{delimited, separated_pair},
IResult,
};
@ -16,11 +17,15 @@ pub struct Snippet<'a> {
impl Snippet<'_> {
#[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = tag("@@")(input)?;
let (input, name) = take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-')(input)?;
let (input, _) = tag(":")(input)?;
let (input, value) = take_until("@@")(input)?;
let (input, _) = take(2usize)(input)?;
let (input, (name, value)) = delimited(
tag("@@"),
separated_pair(
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'),
tag(":"),
take_until("@@"),
),
take(2usize),
)(input)?;
Ok((input, Element::Snippet(Snippet { name, value })))
}

View file

@ -1,6 +1,7 @@
use nom::{
bytes::complete::{tag, take_while},
combinator::verify,
sequence::delimited,
IResult,
};
@ -16,12 +17,14 @@ pub struct Target<'a> {
impl Target<'_> {
#[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
let (input, _) = tag("<<")(input)?;
let (input, target) = verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
let (input, target) = delimited(
tag("<<"),
verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
),
tag(">>"),
)(input)?;
let (input, _) = tag(">>")(input)?;
Ok((input, Element::Target(Target { target })))
}

View file

@ -2,6 +2,7 @@ use nom::{
bytes::complete::{tag, take, take_till, take_while, take_while_m_n},
character::complete::{space0, space1},
combinator::{map, map_res, opt},
sequence::preceded,
IResult,
};
@ -51,13 +52,9 @@ fn parse_datetime(input: &str) -> IResult<&str, Datetime<'_>> {
&& c != ']'
&& c != '>'
})(input)?;
let (input, (hour, minute)) = map(
opt(|input| {
let (input, _) = space1(input)?;
parse_time(input)
}),
|time| (time.map(|t| t.0), time.map(|t| t.1)),
)(input)?;
let (input, (hour, minute)) = map(opt(preceded(space1, parse_time)), |time| {
(time.map(|t| t.0), time.map(|t| t.1))
})(input)?;
Ok((
input,

View file

@ -56,10 +56,7 @@ pub trait OrgHandler<E: From<Error>> {
}
Code { value } => write!(w, "~{}~", value)?,
FnRef(fn_ref) => {
write!(&mut w, "[fn:")?;
if let Some(label) = fn_ref.label {
write!(&mut w, "{}", label)?;
}
write!(&mut w, "[fn:{}", fn_ref.label)?;
if let Some(definition) = fn_ref.definition {
write!(&mut w, ":{}", definition)?;
}

View file

@ -221,6 +221,7 @@ pub mod elements;
pub mod export;
mod iter;
mod org;
mod parsers;
#[cfg(feature = "serde")]
mod serde;

View file

@ -175,7 +175,7 @@ fn is_headline(text: &str) -> Option<usize> {
} else {
None
}
} else if text.len() > 0 && text.as_bytes().iter().all(|&c| c == b'*') {
} else if !text.is_empty() && text.as_bytes().iter().all(|&c| c == b'*') {
Some(text.len())
} else {
None
@ -291,7 +291,7 @@ fn parse_block<'a>(
let tail = contents.trim_start();
if let Some((tail, clock)) = Clock::parse(tail) {
if let Ok((tail, clock)) = Clock::parse(tail) {
return Some((tail, arena.new_node(clock)));
}
@ -305,7 +305,7 @@ fn parse_block<'a>(
}
if tail.starts_with(':') {
if let Some((tail, drawer, _content)) = Drawer::parse(tail) {
if let Ok((tail, (drawer, _content))) = Drawer::parse(tail) {
return Some((tail, arena.new_node(drawer)));
}
}
@ -349,7 +349,7 @@ fn parse_block<'a>(
}
if tail.starts_with("#+") {
if let Some((tail, block, content)) = Block::parse(tail) {
if let Ok((tail, (block, content))) = Block::parse(tail) {
match &*block.name.to_uppercase() {
"CENTER" => {
let node = arena.new_node(Element::CenterBlock(CenterBlock {
@ -414,7 +414,7 @@ fn parse_block<'a>(
Some((tail, node))
}
}
} else if let Some((tail, dyn_block, content)) = DynBlock::parse(tail) {
} else if let Ok((tail, (dyn_block, content))) = DynBlock::parse(tail) {
let node = arena.new_node(dyn_block);
containers.push(Container::Block { content, node });
Some((tail, node))
@ -546,8 +546,8 @@ fn parse_inline<'a>(
b'[' => {
if contents[1..].starts_with("fn:") {
FnRef::parse(contents)
.map(|(tail, fn_ref)| (tail, fn_ref.into()))
.map(|(tail, element)| (tail, arena.new_node(element)))
.ok()
.map(|(tail, fn_ref)| (tail, arena.new_node(fn_ref.into())))
} else if bytes[1] == b'[' {
Link::parse(contents)
.ok()
@ -555,11 +555,11 @@ fn parse_inline<'a>(
} else {
Cookie::parse(contents)
.map(|(tail, cookie)| (tail, cookie.into()))
.or_else(|| {
.or_else(|_| {
Timestamp::parse_inactive(contents)
.map(|(tail, timestamp)| (tail, timestamp.into()))
.ok()
})
.ok()
.map(|(tail, element)| (tail, arena.new_node(element)))
}
}

45
src/parsers.rs Normal file
View file

@ -0,0 +1,45 @@
// resued nom parsers
use memchr::{memchr, memchr_iter};
use nom::{
bytes::complete::tag, character::complete::space0, error::ErrorKind, error_position, Err,
IResult,
};
pub(crate) fn eol(input: &str) -> IResult<&str, ()> {
let (input, _) = space0(input)?;
if input.is_empty() {
Ok(("", ()))
} else {
let (input, _) = tag("\n")(input)?;
Ok((input, ()))
}
}
pub(crate) fn take_until_eol(input: &str) -> IResult<&str, &str> {
if let Some(i) = memchr(b'\n', input.as_bytes()) {
Ok((&input[i + 1..], input[0..i].trim()))
} else {
Ok(("", input.trim()))
}
}
pub(crate) fn take_lines_till(
predicate: impl Fn(&str) -> bool,
) -> impl Fn(&str) -> IResult<&str, &str> {
move |input| {
let mut start = 0;
for i in memchr_iter(b'\n', input.as_bytes()) {
if predicate(input[start..i].trim()) {
return Ok((&input[i + 1..], &input[0..start]));
}
start = i + 1;
}
if predicate(input[start..].trim()) {
Ok(("", &input[0..start]))
} else {
Err(Err::Error(error_position!(input, ErrorKind::TakeTill1)))
}
}
}