refactor(elements): rewrite most parsers with nom
This commit is contained in:
parent
8d18fb04c1
commit
37c33a82f0
|
@ -1,4 +1,6 @@
|
|||
use memchr::{memchr, memchr_iter};
|
||||
use nom::{bytes::complete::tag_no_case, character::complete::alpha1, sequence::preceded, IResult};
|
||||
|
||||
use crate::parsers::{take_lines_till, take_until_eol};
|
||||
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[derive(Debug)]
|
||||
|
@ -9,40 +11,23 @@ pub struct Block<'a> {
|
|||
|
||||
impl Block<'_> {
|
||||
#[inline]
|
||||
pub(crate) fn parse(text: &str) -> Option<(&str, Block<'_>, &str)> {
|
||||
debug_assert!(text.starts_with("#+"));
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, (Block<'_>, &str)> {
|
||||
let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?;
|
||||
let (input, args) = take_until_eol(input)?;
|
||||
let end_line = format!(r"#+END_{}", name);
|
||||
let (input, contents) =
|
||||
take_lines_till(|line| line.eq_ignore_ascii_case(&end_line))(input)?;
|
||||
|
||||
if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut lines = memchr_iter(b'\n', text.as_bytes());
|
||||
|
||||
let (name, args, off) = lines
|
||||
.next()
|
||||
.map(|i| {
|
||||
memchr(b' ', &text.as_bytes()[8..i])
|
||||
.map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1))
|
||||
.unwrap_or((&text[8..i], None, i + 1))
|
||||
})
|
||||
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
|
||||
|
||||
let mut pos = off;
|
||||
let end = format!(r"#+END_{}", name.to_uppercase());
|
||||
|
||||
for i in lines {
|
||||
if text[pos..i].trim().eq_ignore_ascii_case(&end) {
|
||||
return Some((&text[i + 1..], Block { name, args }, &text[off..pos]));
|
||||
}
|
||||
|
||||
pos = i + 1;
|
||||
}
|
||||
|
||||
if text[pos..].trim().eq_ignore_ascii_case(&end) {
|
||||
Some(("", Block { name, args }, &text[off..pos]))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
Ok((
|
||||
input,
|
||||
(
|
||||
Block {
|
||||
name,
|
||||
args: if args.is_empty() { None } else { Some(args) },
|
||||
},
|
||||
contents,
|
||||
),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -50,24 +35,28 @@ impl Block<'_> {
|
|||
fn parse() {
|
||||
assert_eq!(
|
||||
Block::parse("#+BEGIN_SRC\n#+END_SRC"),
|
||||
Some((
|
||||
Ok((
|
||||
"",
|
||||
(
|
||||
Block {
|
||||
name: "SRC",
|
||||
args: None,
|
||||
},
|
||||
""
|
||||
)
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
Block::parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"),
|
||||
Some((
|
||||
Ok((
|
||||
"",
|
||||
(
|
||||
Block {
|
||||
name: "SRC",
|
||||
args: Some("javascript"),
|
||||
},
|
||||
"console.log('Hello World!');\n"
|
||||
)
|
||||
))
|
||||
);
|
||||
// TODO: more testing
|
||||
|
|
|
@ -1,5 +1,13 @@
|
|||
use nom::sequence::separated_pair;
|
||||
use nom::{
|
||||
bytes::complete::tag,
|
||||
character::complete::{char, digit1, space0},
|
||||
combinator::{peek, recognize},
|
||||
IResult,
|
||||
};
|
||||
|
||||
use crate::elements::{Datetime, Element, Timestamp};
|
||||
use memchr::memchr;
|
||||
use crate::parsers::eol;
|
||||
|
||||
/// clock elements
|
||||
///
|
||||
|
@ -25,24 +33,11 @@ pub enum Clock<'a> {
|
|||
}
|
||||
|
||||
impl Clock<'_> {
|
||||
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>)> {
|
||||
let (text, eol) = memchr(b'\n', text.as_bytes())
|
||||
.map(|i| (text[..i].trim(), i + 1))
|
||||
.unwrap_or_else(|| (text.trim(), text.len()));
|
||||
|
||||
if !text.starts_with("CLOCK:") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let tail = &text["CLOCK:".len()..].trim_start();
|
||||
|
||||
if !tail.starts_with('[') {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (tail, timestamp) = Timestamp::parse_inactive(tail).ok()?;
|
||||
|
||||
let tail = tail.trim();
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
|
||||
let (input, _) = tag("CLOCK:")(input)?;
|
||||
let (input, _) = space0(input)?;
|
||||
let (input, _) = peek(tag("["))(input)?;
|
||||
let (input, timestamp) = Timestamp::parse_inactive(input)?;
|
||||
|
||||
match timestamp {
|
||||
Timestamp::InactiveRange {
|
||||
|
@ -51,16 +46,14 @@ impl Clock<'_> {
|
|||
repeater,
|
||||
delay,
|
||||
} => {
|
||||
if tail.starts_with("=>") {
|
||||
let duration = &tail[3..].trim();
|
||||
let colon = memchr(b':', duration.as_bytes())?;
|
||||
if duration.as_bytes()[0..colon].iter().all(u8::is_ascii_digit)
|
||||
&& colon == duration.len() - 3
|
||||
&& duration.as_bytes()[colon + 1].is_ascii_digit()
|
||||
&& duration.as_bytes()[colon + 2].is_ascii_digit()
|
||||
{
|
||||
Some((
|
||||
&text[eol..],
|
||||
let (input, _) = space0(input)?;
|
||||
let (input, _) = tag("=>")(input)?;
|
||||
let (input, _) = space0(input)?;
|
||||
let (input, duration) =
|
||||
recognize(separated_pair(digit1, char(':'), digit1))(input)?;
|
||||
let (input, _) = eol(input)?;
|
||||
Ok((
|
||||
input,
|
||||
Element::Clock(Clock::Closed {
|
||||
start,
|
||||
end,
|
||||
|
@ -69,32 +62,23 @@ impl Clock<'_> {
|
|||
duration,
|
||||
}),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Timestamp::Inactive {
|
||||
start,
|
||||
repeater,
|
||||
delay,
|
||||
} => {
|
||||
if tail.is_empty() {
|
||||
Some((
|
||||
&text[eol..],
|
||||
let (input, _) = eol(input)?;
|
||||
Ok((
|
||||
input,
|
||||
Element::Clock(Clock::Running {
|
||||
start,
|
||||
repeater,
|
||||
delay,
|
||||
}),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -154,7 +138,7 @@ impl Clock<'_> {
|
|||
fn parse() {
|
||||
assert_eq!(
|
||||
Clock::parse("CLOCK: [2003-09-16 Tue 09:39]"),
|
||||
Some((
|
||||
Ok((
|
||||
"",
|
||||
Element::Clock(Clock::Running {
|
||||
start: Datetime {
|
||||
|
@ -172,7 +156,7 @@ fn parse() {
|
|||
);
|
||||
assert_eq!(
|
||||
Clock::parse("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00"),
|
||||
Some((
|
||||
Ok((
|
||||
"",
|
||||
Element::Clock(Clock::Closed {
|
||||
start: Datetime {
|
||||
|
|
|
@ -1,4 +1,11 @@
|
|||
use memchr::{memchr, memchr2};
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::tag,
|
||||
character::complete::digit0,
|
||||
combinator::recognize,
|
||||
sequence::{delimited, pair, separated_pair},
|
||||
IResult,
|
||||
};
|
||||
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
|
@ -9,31 +16,17 @@ pub struct Cookie<'a> {
|
|||
|
||||
impl Cookie<'_> {
|
||||
#[inline]
|
||||
pub(crate) fn parse(src: &str) -> Option<(&str, Cookie<'_>)> {
|
||||
debug_assert!(src.starts_with('['));
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, Cookie<'_>> {
|
||||
let (input, value) = recognize(delimited(
|
||||
tag("["),
|
||||
alt((
|
||||
separated_pair(digit0, tag("/"), digit0),
|
||||
pair(digit0, tag("%")),
|
||||
)),
|
||||
tag("]"),
|
||||
))(input)?;
|
||||
|
||||
let bytes = src.as_bytes();
|
||||
let num1 =
|
||||
memchr2(b'%', b'/', bytes).filter(|&i| bytes[1..i].iter().all(u8::is_ascii_digit))?;
|
||||
|
||||
if bytes[num1] == b'%' && *bytes.get(num1 + 1)? == b']' {
|
||||
Some((
|
||||
&src[num1 + 2..],
|
||||
Cookie {
|
||||
value: &src[0..num1 + 2],
|
||||
},
|
||||
))
|
||||
} else {
|
||||
let num2 = memchr(b']', bytes)
|
||||
.filter(|&i| bytes[num1 + 1..i].iter().all(u8::is_ascii_digit))?;
|
||||
|
||||
Some((
|
||||
&src[num2 + 1..],
|
||||
Cookie {
|
||||
value: &src[0..num2 + 1],
|
||||
},
|
||||
))
|
||||
}
|
||||
Ok((input, Cookie { value }))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -41,29 +34,26 @@ impl Cookie<'_> {
|
|||
fn parse() {
|
||||
assert_eq!(
|
||||
Cookie::parse("[1/10]"),
|
||||
Some(("", Cookie { value: "[1/10]" }))
|
||||
Ok(("", Cookie { value: "[1/10]" }))
|
||||
);
|
||||
assert_eq!(
|
||||
Cookie::parse("[1/1000]"),
|
||||
Some(("", Cookie { value: "[1/1000]" }))
|
||||
Ok(("", Cookie { value: "[1/1000]" }))
|
||||
);
|
||||
assert_eq!(
|
||||
Cookie::parse("[10%]"),
|
||||
Some(("", Cookie { value: "[10%]" }))
|
||||
);
|
||||
assert_eq!(Cookie::parse("[%]"), Some(("", Cookie { value: "[%]" })));
|
||||
assert_eq!(Cookie::parse("[/]"), Some(("", Cookie { value: "[/]" })));
|
||||
assert_eq!(Cookie::parse("[10%]"), Ok(("", Cookie { value: "[10%]" })));
|
||||
assert_eq!(Cookie::parse("[%]"), Ok(("", Cookie { value: "[%]" })));
|
||||
assert_eq!(Cookie::parse("[/]"), Ok(("", Cookie { value: "[/]" })));
|
||||
assert_eq!(
|
||||
Cookie::parse("[100/]"),
|
||||
Some(("", Cookie { value: "[100/]" }))
|
||||
Ok(("", Cookie { value: "[100/]" }))
|
||||
);
|
||||
assert_eq!(
|
||||
Cookie::parse("[/100]"),
|
||||
Some(("", Cookie { value: "[/100]" }))
|
||||
Ok(("", Cookie { value: "[/100]" }))
|
||||
);
|
||||
|
||||
assert_eq!(Cookie::parse("[10% ]"), None);
|
||||
assert_eq!(Cookie::parse("[1//100]"), None);
|
||||
assert_eq!(Cookie::parse("[1\\100]"), None);
|
||||
assert_eq!(Cookie::parse("[10%%]"), None);
|
||||
assert!(Cookie::parse("[10% ]").is_err());
|
||||
assert!(Cookie::parse("[1//100]").is_err());
|
||||
assert!(Cookie::parse("[1\\100]").is_err());
|
||||
assert!(Cookie::parse("[10%%]").is_err());
|
||||
}
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
use memchr::memchr_iter;
|
||||
|
||||
use crate::elements::Element;
|
||||
use crate::parsers::{eol, take_lines_till};
|
||||
|
||||
use nom::{
|
||||
bytes::complete::{tag, take_while1},
|
||||
sequence::delimited,
|
||||
IResult,
|
||||
};
|
||||
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
|
@ -11,47 +16,16 @@ pub struct Drawer<'a> {
|
|||
|
||||
impl Drawer<'_> {
|
||||
#[inline]
|
||||
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> {
|
||||
debug_assert!(text.starts_with(':'));
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, (Element<'_>, &str)> {
|
||||
let (input, name) = delimited(
|
||||
tag(":"),
|
||||
take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'),
|
||||
tag(":"),
|
||||
)(input)?;
|
||||
let (input, _) = eol(input)?;
|
||||
let (input, contents) = take_lines_till(|line| line.eq_ignore_ascii_case(":END:"))(input)?;
|
||||
|
||||
let mut lines = memchr_iter(b'\n', text.as_bytes());
|
||||
|
||||
let (name, off) = lines
|
||||
.next()
|
||||
.map(|i| (text[1..i].trim_end(), i + 1))
|
||||
.filter(|(name, _)| {
|
||||
name.ends_with(':')
|
||||
&& name[0..name.len() - 1]
|
||||
.as_bytes()
|
||||
.iter()
|
||||
.all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_')
|
||||
})?;
|
||||
|
||||
let mut pos = off;
|
||||
for i in lines {
|
||||
if text[pos..i].trim().eq_ignore_ascii_case(":END:") {
|
||||
return Some((
|
||||
&text[i + 1..],
|
||||
Element::Drawer(Drawer {
|
||||
name: &name[0..name.len() - 1],
|
||||
}),
|
||||
&text[off..pos],
|
||||
));
|
||||
}
|
||||
pos = i + 1;
|
||||
}
|
||||
|
||||
if text[pos..].trim().eq_ignore_ascii_case(":END:") {
|
||||
Some((
|
||||
"",
|
||||
Element::Drawer(Drawer {
|
||||
name: &name[0..name.len() - 1],
|
||||
}),
|
||||
&text[off..pos],
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
Ok((input, (Element::Drawer(Drawer { name }), contents)))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,10 +33,12 @@ impl Drawer<'_> {
|
|||
fn parse() {
|
||||
assert_eq!(
|
||||
Drawer::parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
|
||||
Some((
|
||||
Ok((
|
||||
"",
|
||||
(
|
||||
Element::Drawer(Drawer { name: "PROPERTIES" }),
|
||||
" :CUSTOM_ID: id\n"
|
||||
)
|
||||
))
|
||||
)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
use crate::elements::Element;
|
||||
use crate::parsers::{take_lines_till, take_until_eol};
|
||||
|
||||
use memchr::{memchr, memchr_iter};
|
||||
use nom::{
|
||||
bytes::complete::tag_no_case,
|
||||
character::complete::{alpha1, space1},
|
||||
IResult,
|
||||
};
|
||||
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
|
@ -13,60 +18,24 @@ pub struct DynBlock<'a> {
|
|||
|
||||
impl DynBlock<'_> {
|
||||
#[inline]
|
||||
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> {
|
||||
debug_assert!(text.starts_with("#+"));
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, (Element<'_>, &str)> {
|
||||
let (input, _) = tag_no_case("#+BEGIN:")(input)?;
|
||||
let (input, _) = space1(input)?;
|
||||
let (input, name) = alpha1(input)?;
|
||||
let (input, args) = take_until_eol(input)?;
|
||||
|
||||
if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") {
|
||||
return None;
|
||||
}
|
||||
let (input, contents) = take_lines_till(|line| line.eq_ignore_ascii_case("#+END:"))(input)?;
|
||||
|
||||
let bytes = text.as_bytes();
|
||||
let mut lines = memchr_iter(b'\n', bytes);
|
||||
|
||||
let (name, para, off) = lines
|
||||
.next()
|
||||
.map(|i| {
|
||||
memchr(b' ', &bytes["#+BEGIN: ".len()..i])
|
||||
.map(|x| {
|
||||
Ok((
|
||||
input,
|
||||
(
|
||||
&text["#+BEGIN: ".len().."#+BEGIN: ".len() + x],
|
||||
Some(text["#+BEGIN: ".len() + x..i].trim()),
|
||||
i + 1,
|
||||
)
|
||||
})
|
||||
.unwrap_or((&text["#+BEGIN: ".len()..i], None, i + 1))
|
||||
})
|
||||
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
|
||||
|
||||
let mut pos = off;
|
||||
|
||||
for i in lines {
|
||||
if text[pos..i].trim().eq_ignore_ascii_case("#+END:") {
|
||||
return Some((
|
||||
&text[i + 1..],
|
||||
Element::DynBlock(DynBlock {
|
||||
block_name: name,
|
||||
arguments: para,
|
||||
arguments: if args.is_empty() { None } else { Some(args) },
|
||||
}),
|
||||
&text[off..pos],
|
||||
));
|
||||
}
|
||||
|
||||
pos = i + 1;
|
||||
}
|
||||
|
||||
if text[pos..].trim().eq_ignore_ascii_case("#+END:") {
|
||||
Some((
|
||||
"",
|
||||
Element::DynBlock(DynBlock {
|
||||
block_name: name,
|
||||
arguments: para,
|
||||
}),
|
||||
&text[off..pos],
|
||||
contents,
|
||||
),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -75,13 +44,15 @@ fn parse() {
|
|||
// TODO: testing
|
||||
assert_eq!(
|
||||
DynBlock::parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"),
|
||||
Some((
|
||||
Ok((
|
||||
"",
|
||||
(
|
||||
Element::DynBlock(DynBlock {
|
||||
block_name: "clocktable",
|
||||
arguments: Some(":scope file"),
|
||||
}),
|
||||
"CONTENTS\n"
|
||||
)
|
||||
))
|
||||
);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use bytecount::count;
|
||||
use memchr::memchr;
|
||||
use memchr::memchr_iter;
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn parse(text: &str, marker: u8) -> Option<(&str, &str)> {
|
||||
|
@ -11,31 +11,27 @@ pub(crate) fn parse(text: &str, marker: u8) -> Option<(&str, &str)> {
|
|||
return None;
|
||||
}
|
||||
|
||||
let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..=i], b'\n') < 2)?;
|
||||
|
||||
if bytes[end].is_ascii_whitespace() {
|
||||
return None;
|
||||
for i in memchr_iter(marker, bytes).skip(1) {
|
||||
if count(&bytes[1..i], b'\n') >= 2 {
|
||||
break;
|
||||
} else if validate_marker(i, text) {
|
||||
return Some((&text[i + 1..], &text[1..i]));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(&post) = bytes.get(end + 2) {
|
||||
if post == b' '
|
||||
|| post == b'-'
|
||||
|| post == b'.'
|
||||
|| post == b','
|
||||
|| post == b':'
|
||||
|| post == b'!'
|
||||
|| post == b'?'
|
||||
|| post == b'\''
|
||||
|| post == b'\n'
|
||||
|| post == b')'
|
||||
|| post == b'}'
|
||||
{
|
||||
Some((&text[end + 2..], &text[1..end + 1]))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
||||
fn validate_marker(pos: usize, text: &str) -> bool {
|
||||
if text.as_bytes()[pos - 1].is_ascii_whitespace() {
|
||||
false
|
||||
} else if let Some(&post) = text.as_bytes().get(pos + 1) {
|
||||
match post {
|
||||
b' ' | b'-' | b'.' | b',' | b':' | b'!' | b'?' | b'\'' | b'\n' | b')' | b'}' => true,
|
||||
_ => false,
|
||||
}
|
||||
} else {
|
||||
Some((&text[end + 2..], &text[1..end + 1]))
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -46,6 +42,7 @@ mod tests {
|
|||
use super::parse;
|
||||
|
||||
assert_eq!(parse("*bold*", b'*'), Some(("", "bold")));
|
||||
assert_eq!(parse("*bo*ld*", b'*'), Some(("", "bo*ld")));
|
||||
assert_eq!(parse("*bo\nld*", b'*'), Some(("", "bo\nld")));
|
||||
assert_eq!(parse("*bold*a", b'*'), None);
|
||||
assert_eq!(parse("*bold*", b'/'), None);
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use memchr::memchr;
|
||||
use nom::{
|
||||
bytes::complete::{tag, take_while1},
|
||||
sequence::delimited,
|
||||
IResult,
|
||||
};
|
||||
|
||||
|
@ -12,10 +13,12 @@ pub struct FnDef<'a> {
|
|||
}
|
||||
|
||||
fn parse_label(input: &str) -> IResult<&str, &str> {
|
||||
let (input, _) = tag("[fn:")(input)?;
|
||||
let (input, label) =
|
||||
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?;
|
||||
let (input, _) = tag("]")(input)?;
|
||||
let (input, label) = delimited(
|
||||
tag("[fn:"),
|
||||
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
|
||||
tag("]"),
|
||||
)(input)?;
|
||||
|
||||
Ok((input, label))
|
||||
}
|
||||
|
||||
|
|
|
@ -1,55 +1,46 @@
|
|||
use memchr::{memchr2, memchr2_iter};
|
||||
use memchr::memchr2_iter;
|
||||
use nom::{
|
||||
bytes::complete::{tag, take_while},
|
||||
combinator::opt,
|
||||
error::ErrorKind,
|
||||
error_position,
|
||||
sequence::preceded,
|
||||
Err, IResult,
|
||||
};
|
||||
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
#[derive(Debug)]
|
||||
pub struct FnRef<'a> {
|
||||
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
|
||||
pub label: Option<&'a str>,
|
||||
pub label: &'a str,
|
||||
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
|
||||
pub definition: Option<&'a str>,
|
||||
}
|
||||
|
||||
fn balanced_brackets(input: &str) -> IResult<&str, &str> {
|
||||
let mut pairs = 1;
|
||||
for i in memchr2_iter(b'[', b']', input.as_bytes()) {
|
||||
if input.as_bytes()[i] == b'[' {
|
||||
pairs += 1;
|
||||
} else if pairs != 1 {
|
||||
pairs -= 1;
|
||||
} else {
|
||||
return Ok((&input[i..], &input[0..i]));
|
||||
}
|
||||
}
|
||||
Err(Err::Error(error_position!(input, ErrorKind::Tag)))
|
||||
}
|
||||
|
||||
impl FnRef<'_> {
|
||||
#[inline]
|
||||
pub(crate) fn parse(text: &str) -> Option<(&str, FnRef<'_>)> {
|
||||
debug_assert!(text.starts_with("[fn:"));
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, FnRef<'_>> {
|
||||
let (input, _) = tag("[fn:")(input)?;
|
||||
let (input, label) =
|
||||
take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?;
|
||||
let (input, definition) = opt(preceded(tag(":"), balanced_brackets))(input)?;
|
||||
let (input, _) = tag("]")(input)?;
|
||||
|
||||
let bytes = text.as_bytes();
|
||||
let (label, off) = memchr2(b']', b':', &bytes["[fn:".len()..])
|
||||
.filter(|&i| {
|
||||
bytes["[fn:".len().."[fn:".len() + i]
|
||||
.iter()
|
||||
.all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_')
|
||||
})
|
||||
.map(|i| {
|
||||
(
|
||||
if i == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(&text["[fn:".len().."[fn:".len() + i])
|
||||
},
|
||||
"[fn:".len() + i,
|
||||
)
|
||||
})?;
|
||||
|
||||
let (definition, off) = if bytes[off] == b':' {
|
||||
let mut pairs = 1;
|
||||
memchr2_iter(b'[', b']', &bytes[off..])
|
||||
.find(|&i| {
|
||||
if bytes[i + off] == b'[' {
|
||||
pairs += 1;
|
||||
} else {
|
||||
pairs -= 1;
|
||||
}
|
||||
pairs == 0
|
||||
})
|
||||
.map(|i| (Some(&text[off + 1..off + i]), i + off + 1))?
|
||||
} else {
|
||||
(None, off + 1)
|
||||
};
|
||||
|
||||
Some((&text[off..], FnRef { label, definition }))
|
||||
Ok((input, FnRef { label, definition }))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -57,43 +48,44 @@ impl FnRef<'_> {
|
|||
fn parse() {
|
||||
assert_eq!(
|
||||
FnRef::parse("[fn:1]"),
|
||||
Some((
|
||||
Ok((
|
||||
"",
|
||||
FnRef {
|
||||
label: Some("1"),
|
||||
label: "1",
|
||||
definition: None
|
||||
},
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
FnRef::parse("[fn:1:2]"),
|
||||
Some((
|
||||
Ok((
|
||||
"",
|
||||
FnRef {
|
||||
label: Some("1"),
|
||||
label: "1",
|
||||
definition: Some("2")
|
||||
},
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
FnRef::parse("[fn::2]"),
|
||||
Some((
|
||||
Ok((
|
||||
"",
|
||||
FnRef {
|
||||
label: None,
|
||||
label: "",
|
||||
definition: Some("2")
|
||||
},
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
FnRef::parse("[fn::[]]"),
|
||||
Some((
|
||||
Ok((
|
||||
"",
|
||||
FnRef {
|
||||
label: None,
|
||||
label: "",
|
||||
definition: Some("[]")
|
||||
},
|
||||
))
|
||||
);
|
||||
assert_eq!(FnRef::parse("[fn::[]"), None);
|
||||
|
||||
assert!(FnRef::parse("[fn::[]").is_err());
|
||||
}
|
||||
|
|
|
@ -29,9 +29,8 @@ impl<'a> InlineCall<'a> {
|
|||
let (input, _) = tag("call_")(input)?;
|
||||
let (input, name) = take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')')(input)?;
|
||||
let (input, inside_header) = opt(header)(input)?;
|
||||
let (input, _) = tag("(")(input)?;
|
||||
let (input, arguments) = take_till(|c| c == ')' || c == '\n')(input)?;
|
||||
let (input, _) = tag(")")(input)?;
|
||||
let (input, arguments) =
|
||||
delimited(tag("("), take_till(|c| c == ')' || c == '\n'), tag(")"))(input)?;
|
||||
let (input, end_header) = opt(header)(input)?;
|
||||
|
||||
Ok((
|
||||
|
|
|
@ -28,9 +28,8 @@ impl InlineSrc<'_> {
|
|||
take_till(|c| c == '\n' || c == ']'),
|
||||
tag("]"),
|
||||
))(input)?;
|
||||
let (input, _) = tag("{")(input)?;
|
||||
let (input, body) = take_till(|c| c == '\n' || c == '}')(input)?;
|
||||
let (input, _) = tag("}")(input)?;
|
||||
let (input, body) =
|
||||
delimited(tag("{"), take_till(|c| c == '\n' || c == '}'), tag("}"))(input)?;
|
||||
|
||||
Ok((
|
||||
input,
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
use nom::{
|
||||
bytes::complete::{tag, take_till, take_while},
|
||||
combinator::{map, opt},
|
||||
bytes::complete::{tag, take_till},
|
||||
combinator::opt,
|
||||
sequence::delimited,
|
||||
IResult,
|
||||
};
|
||||
|
||||
use crate::elements::Element;
|
||||
use crate::parsers::take_until_eol;
|
||||
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
|
@ -24,20 +25,19 @@ pub struct BabelCall<'a> {
|
|||
pub value: &'a str,
|
||||
}
|
||||
|
||||
fn optional(input: &str) -> IResult<&str, &str> {
|
||||
delimited(tag("["), take_till(|c| c == ']' || c == '\n'), tag("]"))(input)
|
||||
}
|
||||
|
||||
impl Keyword<'_> {
|
||||
#[inline]
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
|
||||
let (input, _) = tag("#+")(input)?;
|
||||
let (input, key) =
|
||||
take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?;
|
||||
let (input, optional) = opt(optional)(input)?;
|
||||
let (input, optional) = opt(delimited(
|
||||
tag("["),
|
||||
take_till(|c| c == ']' || c == '\n'),
|
||||
tag("]"),
|
||||
))(input)?;
|
||||
let (input, _) = tag(":")(input)?;
|
||||
let (input, value) = map(take_while(|c| c != '\n'), str::trim)(input)?;
|
||||
let (input, _) = opt(tag("\n"))(input)?;
|
||||
let (input, value) = take_until_eol(input)?;
|
||||
|
||||
if key.eq_ignore_ascii_case("CALL") {
|
||||
Ok((input, Element::BabelCall(BabelCall { value })))
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use nom::{
|
||||
bytes::complete::{tag, take_while},
|
||||
combinator::opt,
|
||||
sequence::delimited,
|
||||
IResult,
|
||||
};
|
||||
|
||||
|
@ -18,16 +19,16 @@ pub struct Link<'a> {
|
|||
impl Link<'_> {
|
||||
#[inline]
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
|
||||
let (input, _) = tag("[[")(input)?;
|
||||
let (input, path) =
|
||||
take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']')(input)?;
|
||||
let (input, _) = tag("]")(input)?;
|
||||
let (input, desc) = opt(|input| {
|
||||
let (input, _) = tag("[")(input)?;
|
||||
let (input, desc) = take_while(|c: char| c != '[' && c != ']')(input)?;
|
||||
let (input, _) = tag("]")(input)?;
|
||||
Ok((input, desc))
|
||||
})(input)?;
|
||||
let (input, path) = delimited(
|
||||
tag("[["),
|
||||
take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'),
|
||||
tag("]"),
|
||||
)(input)?;
|
||||
let (input, desc) = opt(delimited(
|
||||
tag("["),
|
||||
take_while(|c: char| c != '[' && c != ']'),
|
||||
tag("]"),
|
||||
))(input)?;
|
||||
let (input, _) = tag("]")(input)?;
|
||||
Ok((input, Element::Link(Link { path, desc })))
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use nom::{
|
||||
bytes::complete::{tag, take, take_until, take_while1},
|
||||
combinator::{opt, verify},
|
||||
sequence::delimited,
|
||||
IResult,
|
||||
};
|
||||
|
||||
|
@ -23,12 +24,7 @@ impl Macros<'_> {
|
|||
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
|
||||
|s: &str| s.starts_with(|c: char| c.is_ascii_alphabetic()),
|
||||
)(input)?;
|
||||
let (input, arguments) = opt(|input| {
|
||||
let (input, _) = tag("(")(input)?;
|
||||
let (input, args) = take_until(")}}}")(input)?;
|
||||
let (input, _) = take(1usize)(input)?;
|
||||
Ok((input, args))
|
||||
})(input)?;
|
||||
let (input, arguments) = opt(delimited(tag("("), take_until(")}}}"), take(1usize)))(input)?;
|
||||
let (input, _) = tag("}}}")(input)?;
|
||||
|
||||
Ok((input, Element::Macros(Macros { name, arguments })))
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use nom::{
|
||||
bytes::complete::{tag, take_while},
|
||||
combinator::verify,
|
||||
sequence::delimited,
|
||||
IResult,
|
||||
};
|
||||
|
||||
|
@ -15,12 +16,14 @@ pub struct RadioTarget;
|
|||
impl RadioTarget {
|
||||
#[inline]
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, (Element, &str)> {
|
||||
let (input, _) = tag("<<<")(input)?;
|
||||
let (input, contents) = verify(
|
||||
let (input, contents) = delimited(
|
||||
tag("<<<"),
|
||||
verify(
|
||||
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|
||||
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
|
||||
),
|
||||
tag(">>>"),
|
||||
)(input)?;
|
||||
let (input, _) = tag(">>>")(input)?;
|
||||
|
||||
Ok((input, (Element::RadioTarget(RadioTarget), contents)))
|
||||
}
|
||||
|
|
|
@ -1,13 +1,8 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take_while_m_n},
|
||||
character::complete::space0,
|
||||
error::ErrorKind,
|
||||
Err, IResult,
|
||||
};
|
||||
use nom::{bytes::complete::take_while_m_n, character::complete::space0, IResult};
|
||||
use std::usize;
|
||||
|
||||
use crate::elements::Element;
|
||||
use crate::parsers::eol;
|
||||
|
||||
pub struct Rule;
|
||||
|
||||
|
@ -16,20 +11,11 @@ impl Rule {
|
|||
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
|
||||
let (input, _) = space0(input)?;
|
||||
let (input, _) = take_while_m_n(5, usize::MAX, |c| c == '-')(input)?;
|
||||
let (input, _) = space0(input)?;
|
||||
let (input, _) = alt((tag("\n"), eof))(input)?;
|
||||
let (input, _) = eol(input)?;
|
||||
Ok((input, Element::Rule))
|
||||
}
|
||||
}
|
||||
|
||||
fn eof(input: &str) -> IResult<&str, &str> {
|
||||
if input.is_empty() {
|
||||
Ok(("", ""))
|
||||
} else {
|
||||
Err(Err::Error(("", ErrorKind::Tag)))
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
assert_eq!(Rule::parse("-----"), Ok(("", Element::Rule)));
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use nom::{
|
||||
bytes::complete::{tag, take, take_until, take_while1},
|
||||
sequence::{delimited, separated_pair},
|
||||
IResult,
|
||||
};
|
||||
|
||||
|
@ -16,11 +17,15 @@ pub struct Snippet<'a> {
|
|||
impl Snippet<'_> {
|
||||
#[inline]
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
|
||||
let (input, _) = tag("@@")(input)?;
|
||||
let (input, name) = take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-')(input)?;
|
||||
let (input, _) = tag(":")(input)?;
|
||||
let (input, value) = take_until("@@")(input)?;
|
||||
let (input, _) = take(2usize)(input)?;
|
||||
let (input, (name, value)) = delimited(
|
||||
tag("@@"),
|
||||
separated_pair(
|
||||
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'),
|
||||
tag(":"),
|
||||
take_until("@@"),
|
||||
),
|
||||
take(2usize),
|
||||
)(input)?;
|
||||
|
||||
Ok((input, Element::Snippet(Snippet { name, value })))
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use nom::{
|
||||
bytes::complete::{tag, take_while},
|
||||
combinator::verify,
|
||||
sequence::delimited,
|
||||
IResult,
|
||||
};
|
||||
|
||||
|
@ -16,12 +17,14 @@ pub struct Target<'a> {
|
|||
impl Target<'_> {
|
||||
#[inline]
|
||||
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
|
||||
let (input, _) = tag("<<")(input)?;
|
||||
let (input, target) = verify(
|
||||
let (input, target) = delimited(
|
||||
tag("<<"),
|
||||
verify(
|
||||
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
|
||||
|s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '),
|
||||
),
|
||||
tag(">>"),
|
||||
)(input)?;
|
||||
let (input, _) = tag(">>")(input)?;
|
||||
|
||||
Ok((input, Element::Target(Target { target })))
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ use nom::{
|
|||
bytes::complete::{tag, take, take_till, take_while, take_while_m_n},
|
||||
character::complete::{space0, space1},
|
||||
combinator::{map, map_res, opt},
|
||||
sequence::preceded,
|
||||
IResult,
|
||||
};
|
||||
|
||||
|
@ -51,13 +52,9 @@ fn parse_datetime(input: &str) -> IResult<&str, Datetime<'_>> {
|
|||
&& c != ']'
|
||||
&& c != '>'
|
||||
})(input)?;
|
||||
let (input, (hour, minute)) = map(
|
||||
opt(|input| {
|
||||
let (input, _) = space1(input)?;
|
||||
parse_time(input)
|
||||
}),
|
||||
|time| (time.map(|t| t.0), time.map(|t| t.1)),
|
||||
)(input)?;
|
||||
let (input, (hour, minute)) = map(opt(preceded(space1, parse_time)), |time| {
|
||||
(time.map(|t| t.0), time.map(|t| t.1))
|
||||
})(input)?;
|
||||
|
||||
Ok((
|
||||
input,
|
||||
|
|
|
@ -56,10 +56,7 @@ pub trait OrgHandler<E: From<Error>> {
|
|||
}
|
||||
Code { value } => write!(w, "~{}~", value)?,
|
||||
FnRef(fn_ref) => {
|
||||
write!(&mut w, "[fn:")?;
|
||||
if let Some(label) = fn_ref.label {
|
||||
write!(&mut w, "{}", label)?;
|
||||
}
|
||||
write!(&mut w, "[fn:{}", fn_ref.label)?;
|
||||
if let Some(definition) = fn_ref.definition {
|
||||
write!(&mut w, ":{}", definition)?;
|
||||
}
|
||||
|
|
|
@ -221,6 +221,7 @@ pub mod elements;
|
|||
pub mod export;
|
||||
mod iter;
|
||||
mod org;
|
||||
mod parsers;
|
||||
#[cfg(feature = "serde")]
|
||||
mod serde;
|
||||
|
||||
|
|
18
src/org.rs
18
src/org.rs
|
@ -175,7 +175,7 @@ fn is_headline(text: &str) -> Option<usize> {
|
|||
} else {
|
||||
None
|
||||
}
|
||||
} else if text.len() > 0 && text.as_bytes().iter().all(|&c| c == b'*') {
|
||||
} else if !text.is_empty() && text.as_bytes().iter().all(|&c| c == b'*') {
|
||||
Some(text.len())
|
||||
} else {
|
||||
None
|
||||
|
@ -291,7 +291,7 @@ fn parse_block<'a>(
|
|||
|
||||
let tail = contents.trim_start();
|
||||
|
||||
if let Some((tail, clock)) = Clock::parse(tail) {
|
||||
if let Ok((tail, clock)) = Clock::parse(tail) {
|
||||
return Some((tail, arena.new_node(clock)));
|
||||
}
|
||||
|
||||
|
@ -305,7 +305,7 @@ fn parse_block<'a>(
|
|||
}
|
||||
|
||||
if tail.starts_with(':') {
|
||||
if let Some((tail, drawer, _content)) = Drawer::parse(tail) {
|
||||
if let Ok((tail, (drawer, _content))) = Drawer::parse(tail) {
|
||||
return Some((tail, arena.new_node(drawer)));
|
||||
}
|
||||
}
|
||||
|
@ -349,7 +349,7 @@ fn parse_block<'a>(
|
|||
}
|
||||
|
||||
if tail.starts_with("#+") {
|
||||
if let Some((tail, block, content)) = Block::parse(tail) {
|
||||
if let Ok((tail, (block, content))) = Block::parse(tail) {
|
||||
match &*block.name.to_uppercase() {
|
||||
"CENTER" => {
|
||||
let node = arena.new_node(Element::CenterBlock(CenterBlock {
|
||||
|
@ -414,7 +414,7 @@ fn parse_block<'a>(
|
|||
Some((tail, node))
|
||||
}
|
||||
}
|
||||
} else if let Some((tail, dyn_block, content)) = DynBlock::parse(tail) {
|
||||
} else if let Ok((tail, (dyn_block, content))) = DynBlock::parse(tail) {
|
||||
let node = arena.new_node(dyn_block);
|
||||
containers.push(Container::Block { content, node });
|
||||
Some((tail, node))
|
||||
|
@ -546,8 +546,8 @@ fn parse_inline<'a>(
|
|||
b'[' => {
|
||||
if contents[1..].starts_with("fn:") {
|
||||
FnRef::parse(contents)
|
||||
.map(|(tail, fn_ref)| (tail, fn_ref.into()))
|
||||
.map(|(tail, element)| (tail, arena.new_node(element)))
|
||||
.ok()
|
||||
.map(|(tail, fn_ref)| (tail, arena.new_node(fn_ref.into())))
|
||||
} else if bytes[1] == b'[' {
|
||||
Link::parse(contents)
|
||||
.ok()
|
||||
|
@ -555,11 +555,11 @@ fn parse_inline<'a>(
|
|||
} else {
|
||||
Cookie::parse(contents)
|
||||
.map(|(tail, cookie)| (tail, cookie.into()))
|
||||
.or_else(|| {
|
||||
.or_else(|_| {
|
||||
Timestamp::parse_inactive(contents)
|
||||
.map(|(tail, timestamp)| (tail, timestamp.into()))
|
||||
.ok()
|
||||
})
|
||||
.ok()
|
||||
.map(|(tail, element)| (tail, arena.new_node(element)))
|
||||
}
|
||||
}
|
||||
|
|
45
src/parsers.rs
Normal file
45
src/parsers.rs
Normal file
|
@ -0,0 +1,45 @@
|
|||
// resued nom parsers
|
||||
|
||||
use memchr::{memchr, memchr_iter};
|
||||
use nom::{
|
||||
bytes::complete::tag, character::complete::space0, error::ErrorKind, error_position, Err,
|
||||
IResult,
|
||||
};
|
||||
|
||||
pub(crate) fn eol(input: &str) -> IResult<&str, ()> {
|
||||
let (input, _) = space0(input)?;
|
||||
if input.is_empty() {
|
||||
Ok(("", ()))
|
||||
} else {
|
||||
let (input, _) = tag("\n")(input)?;
|
||||
Ok((input, ()))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn take_until_eol(input: &str) -> IResult<&str, &str> {
|
||||
if let Some(i) = memchr(b'\n', input.as_bytes()) {
|
||||
Ok((&input[i + 1..], input[0..i].trim()))
|
||||
} else {
|
||||
Ok(("", input.trim()))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn take_lines_till(
|
||||
predicate: impl Fn(&str) -> bool,
|
||||
) -> impl Fn(&str) -> IResult<&str, &str> {
|
||||
move |input| {
|
||||
let mut start = 0;
|
||||
for i in memchr_iter(b'\n', input.as_bytes()) {
|
||||
if predicate(input[start..i].trim()) {
|
||||
return Ok((&input[i + 1..], &input[0..start]));
|
||||
}
|
||||
start = i + 1;
|
||||
}
|
||||
|
||||
if predicate(input[start..].trim()) {
|
||||
Ok(("", &input[0..start]))
|
||||
} else {
|
||||
Err(Err::Error(error_position!(input, ErrorKind::TakeTill1)))
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue