From 37c33a82f08efb589decee61a6a122f8619b2948 Mon Sep 17 00:00:00 2001 From: PoiScript Date: Sun, 4 Aug 2019 17:46:10 +0800 Subject: [PATCH] refactor(elements): rewrite most parsers with nom --- src/elements/block.rs | 81 ++++++++++++---------------- src/elements/clock.rs | 100 +++++++++++++++-------------------- src/elements/cookie.rs | 68 ++++++++++-------------- src/elements/drawer.rs | 66 ++++++++--------------- src/elements/dyn_block.rs | 83 ++++++++++------------------- src/elements/emphasis.rs | 41 +++++++------- src/elements/fn_def.rs | 11 ++-- src/elements/fn_ref.rs | 90 ++++++++++++++----------------- src/elements/inline_call.rs | 5 +- src/elements/inline_src.rs | 5 +- src/elements/keyword.rs | 18 +++---- src/elements/link.rs | 21 ++++---- src/elements/macros.rs | 8 +-- src/elements/radio_target.rs | 13 +++-- src/elements/rule.rs | 20 ++----- src/elements/snippet.rs | 15 ++++-- src/elements/target.rs | 13 +++-- src/elements/timestamp.rs | 11 ++-- src/export/org.rs | 5 +- src/lib.rs | 1 + src/org.rs | 18 +++---- src/parsers.rs | 45 ++++++++++++++++ 22 files changed, 336 insertions(+), 402 deletions(-) create mode 100644 src/parsers.rs diff --git a/src/elements/block.rs b/src/elements/block.rs index 8963a2e..375c75c 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -1,4 +1,6 @@ -use memchr::{memchr, memchr_iter}; +use nom::{bytes::complete::tag_no_case, character::complete::alpha1, sequence::preceded, IResult}; + +use crate::parsers::{take_lines_till, take_until_eol}; #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] @@ -9,40 +11,23 @@ pub struct Block<'a> { impl Block<'_> { #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, Block<'_>, &str)> { - debug_assert!(text.starts_with("#+")); + pub(crate) fn parse(input: &str) -> IResult<&str, (Block<'_>, &str)> { + let (input, name) = preceded(tag_no_case("#+BEGIN_"), alpha1)(input)?; + let (input, args) = take_until_eol(input)?; + let end_line = format!(r"#+END_{}", name); + let (input, contents) = + take_lines_till(|line| line.eq_ignore_ascii_case(&end_line))(input)?; - if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" { - return None; - } - - let mut lines = memchr_iter(b'\n', text.as_bytes()); - - let (name, args, off) = lines - .next() - .map(|i| { - memchr(b' ', &text.as_bytes()[8..i]) - .map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1)) - .unwrap_or((&text[8..i], None, i + 1)) - }) - .filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?; - - let mut pos = off; - let end = format!(r"#+END_{}", name.to_uppercase()); - - for i in lines { - if text[pos..i].trim().eq_ignore_ascii_case(&end) { - return Some((&text[i + 1..], Block { name, args }, &text[off..pos])); - } - - pos = i + 1; - } - - if text[pos..].trim().eq_ignore_ascii_case(&end) { - Some(("", Block { name, args }, &text[off..pos])) - } else { - None - } + Ok(( + input, + ( + Block { + name, + args: if args.is_empty() { None } else { Some(args) }, + }, + contents, + ), + )) } } @@ -50,24 +35,28 @@ impl Block<'_> { fn parse() { assert_eq!( Block::parse("#+BEGIN_SRC\n#+END_SRC"), - Some(( + Ok(( "", - Block { - name: "SRC", - args: None, - }, - "" + ( + Block { + name: "SRC", + args: None, + }, + "" + ) )) ); assert_eq!( Block::parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"), - Some(( + Ok(( "", - Block { - name: "SRC", - args: Some("javascript"), - }, - "console.log('Hello World!');\n" + ( + Block { + name: "SRC", + args: Some("javascript"), + }, + "console.log('Hello World!');\n" + ) )) ); // TODO: more testing diff --git a/src/elements/clock.rs b/src/elements/clock.rs index 79455e9..39e9542 100644 --- a/src/elements/clock.rs +++ b/src/elements/clock.rs @@ -1,5 +1,13 @@ +use nom::sequence::separated_pair; +use nom::{ + bytes::complete::tag, + character::complete::{char, digit1, space0}, + combinator::{peek, recognize}, + IResult, +}; + use crate::elements::{Datetime, Element, Timestamp}; -use memchr::memchr; +use crate::parsers::eol; /// clock elements /// @@ -25,24 +33,11 @@ pub enum Clock<'a> { } impl Clock<'_> { - pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>)> { - let (text, eol) = memchr(b'\n', text.as_bytes()) - .map(|i| (text[..i].trim(), i + 1)) - .unwrap_or_else(|| (text.trim(), text.len())); - - if !text.starts_with("CLOCK:") { - return None; - } - - let tail = &text["CLOCK:".len()..].trim_start(); - - if !tail.starts_with('[') { - return None; - } - - let (tail, timestamp) = Timestamp::parse_inactive(tail).ok()?; - - let tail = tail.trim(); + pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { + let (input, _) = tag("CLOCK:")(input)?; + let (input, _) = space0(input)?; + let (input, _) = peek(tag("["))(input)?; + let (input, timestamp) = Timestamp::parse_inactive(input)?; match timestamp { Timestamp::InactiveRange { @@ -51,50 +46,39 @@ impl Clock<'_> { repeater, delay, } => { - if tail.starts_with("=>") { - let duration = &tail[3..].trim(); - let colon = memchr(b':', duration.as_bytes())?; - if duration.as_bytes()[0..colon].iter().all(u8::is_ascii_digit) - && colon == duration.len() - 3 - && duration.as_bytes()[colon + 1].is_ascii_digit() - && duration.as_bytes()[colon + 2].is_ascii_digit() - { - Some(( - &text[eol..], - Element::Clock(Clock::Closed { - start, - end, - repeater, - delay, - duration, - }), - )) - } else { - None - } - } else { - None - } + let (input, _) = space0(input)?; + let (input, _) = tag("=>")(input)?; + let (input, _) = space0(input)?; + let (input, duration) = + recognize(separated_pair(digit1, char(':'), digit1))(input)?; + let (input, _) = eol(input)?; + Ok(( + input, + Element::Clock(Clock::Closed { + start, + end, + repeater, + delay, + duration, + }), + )) } Timestamp::Inactive { start, repeater, delay, } => { - if tail.is_empty() { - Some(( - &text[eol..], - Element::Clock(Clock::Running { - start, - repeater, - delay, - }), - )) - } else { - None - } + let (input, _) = eol(input)?; + Ok(( + input, + Element::Clock(Clock::Running { + start, + repeater, + delay, + }), + )) } - _ => None, + _ => unreachable!(), } } @@ -154,7 +138,7 @@ impl Clock<'_> { fn parse() { assert_eq!( Clock::parse("CLOCK: [2003-09-16 Tue 09:39]"), - Some(( + Ok(( "", Element::Clock(Clock::Running { start: Datetime { @@ -172,7 +156,7 @@ fn parse() { ); assert_eq!( Clock::parse("CLOCK: [2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39] => 1:00"), - Some(( + Ok(( "", Element::Clock(Clock::Closed { start: Datetime { diff --git a/src/elements/cookie.rs b/src/elements/cookie.rs index be03e9f..6cfbbce 100644 --- a/src/elements/cookie.rs +++ b/src/elements/cookie.rs @@ -1,4 +1,11 @@ -use memchr::{memchr, memchr2}; +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::digit0, + combinator::recognize, + sequence::{delimited, pair, separated_pair}, + IResult, +}; #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "serde", derive(serde::Serialize))] @@ -9,31 +16,17 @@ pub struct Cookie<'a> { impl Cookie<'_> { #[inline] - pub(crate) fn parse(src: &str) -> Option<(&str, Cookie<'_>)> { - debug_assert!(src.starts_with('[')); + pub(crate) fn parse(input: &str) -> IResult<&str, Cookie<'_>> { + let (input, value) = recognize(delimited( + tag("["), + alt(( + separated_pair(digit0, tag("/"), digit0), + pair(digit0, tag("%")), + )), + tag("]"), + ))(input)?; - let bytes = src.as_bytes(); - let num1 = - memchr2(b'%', b'/', bytes).filter(|&i| bytes[1..i].iter().all(u8::is_ascii_digit))?; - - if bytes[num1] == b'%' && *bytes.get(num1 + 1)? == b']' { - Some(( - &src[num1 + 2..], - Cookie { - value: &src[0..num1 + 2], - }, - )) - } else { - let num2 = memchr(b']', bytes) - .filter(|&i| bytes[num1 + 1..i].iter().all(u8::is_ascii_digit))?; - - Some(( - &src[num2 + 1..], - Cookie { - value: &src[0..num2 + 1], - }, - )) - } + Ok((input, Cookie { value })) } } @@ -41,29 +34,26 @@ impl Cookie<'_> { fn parse() { assert_eq!( Cookie::parse("[1/10]"), - Some(("", Cookie { value: "[1/10]" })) + Ok(("", Cookie { value: "[1/10]" })) ); assert_eq!( Cookie::parse("[1/1000]"), - Some(("", Cookie { value: "[1/1000]" })) + Ok(("", Cookie { value: "[1/1000]" })) ); - assert_eq!( - Cookie::parse("[10%]"), - Some(("", Cookie { value: "[10%]" })) - ); - assert_eq!(Cookie::parse("[%]"), Some(("", Cookie { value: "[%]" }))); - assert_eq!(Cookie::parse("[/]"), Some(("", Cookie { value: "[/]" }))); + assert_eq!(Cookie::parse("[10%]"), Ok(("", Cookie { value: "[10%]" }))); + assert_eq!(Cookie::parse("[%]"), Ok(("", Cookie { value: "[%]" }))); + assert_eq!(Cookie::parse("[/]"), Ok(("", Cookie { value: "[/]" }))); assert_eq!( Cookie::parse("[100/]"), - Some(("", Cookie { value: "[100/]" })) + Ok(("", Cookie { value: "[100/]" })) ); assert_eq!( Cookie::parse("[/100]"), - Some(("", Cookie { value: "[/100]" })) + Ok(("", Cookie { value: "[/100]" })) ); - assert_eq!(Cookie::parse("[10% ]"), None); - assert_eq!(Cookie::parse("[1//100]"), None); - assert_eq!(Cookie::parse("[1\\100]"), None); - assert_eq!(Cookie::parse("[10%%]"), None); + assert!(Cookie::parse("[10% ]").is_err()); + assert!(Cookie::parse("[1//100]").is_err()); + assert!(Cookie::parse("[1\\100]").is_err()); + assert!(Cookie::parse("[10%%]").is_err()); } diff --git a/src/elements/drawer.rs b/src/elements/drawer.rs index 69c528f..a7228f7 100644 --- a/src/elements/drawer.rs +++ b/src/elements/drawer.rs @@ -1,6 +1,11 @@ -use memchr::memchr_iter; - use crate::elements::Element; +use crate::parsers::{eol, take_lines_till}; + +use nom::{ + bytes::complete::{tag, take_while1}, + sequence::delimited, + IResult, +}; #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "serde", derive(serde::Serialize))] @@ -11,47 +16,16 @@ pub struct Drawer<'a> { impl Drawer<'_> { #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> { - debug_assert!(text.starts_with(':')); + pub(crate) fn parse(input: &str) -> IResult<&str, (Element<'_>, &str)> { + let (input, name) = delimited( + tag(":"), + take_while1(|c: char| c.is_ascii_alphabetic() || c == '-' || c == '_'), + tag(":"), + )(input)?; + let (input, _) = eol(input)?; + let (input, contents) = take_lines_till(|line| line.eq_ignore_ascii_case(":END:"))(input)?; - let mut lines = memchr_iter(b'\n', text.as_bytes()); - - let (name, off) = lines - .next() - .map(|i| (text[1..i].trim_end(), i + 1)) - .filter(|(name, _)| { - name.ends_with(':') - && name[0..name.len() - 1] - .as_bytes() - .iter() - .all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_') - })?; - - let mut pos = off; - for i in lines { - if text[pos..i].trim().eq_ignore_ascii_case(":END:") { - return Some(( - &text[i + 1..], - Element::Drawer(Drawer { - name: &name[0..name.len() - 1], - }), - &text[off..pos], - )); - } - pos = i + 1; - } - - if text[pos..].trim().eq_ignore_ascii_case(":END:") { - Some(( - "", - Element::Drawer(Drawer { - name: &name[0..name.len() - 1], - }), - &text[off..pos], - )) - } else { - None - } + Ok((input, (Element::Drawer(Drawer { name }), contents))) } } @@ -59,10 +33,12 @@ impl Drawer<'_> { fn parse() { assert_eq!( Drawer::parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"), - Some(( + Ok(( "", - Element::Drawer(Drawer { name: "PROPERTIES" }), - " :CUSTOM_ID: id\n" + ( + Element::Drawer(Drawer { name: "PROPERTIES" }), + " :CUSTOM_ID: id\n" + ) )) ) } diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index ced6b5c..72ca856 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -1,6 +1,11 @@ use crate::elements::Element; +use crate::parsers::{take_lines_till, take_until_eol}; -use memchr::{memchr, memchr_iter}; +use nom::{ + bytes::complete::tag_no_case, + character::complete::{alpha1, space1}, + IResult, +}; #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "serde", derive(serde::Serialize))] @@ -13,60 +18,24 @@ pub struct DynBlock<'a> { impl DynBlock<'_> { #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> { - debug_assert!(text.starts_with("#+")); + pub(crate) fn parse(input: &str) -> IResult<&str, (Element<'_>, &str)> { + let (input, _) = tag_no_case("#+BEGIN:")(input)?; + let (input, _) = space1(input)?; + let (input, name) = alpha1(input)?; + let (input, args) = take_until_eol(input)?; - if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") { - return None; - } + let (input, contents) = take_lines_till(|line| line.eq_ignore_ascii_case("#+END:"))(input)?; - let bytes = text.as_bytes(); - let mut lines = memchr_iter(b'\n', bytes); - - let (name, para, off) = lines - .next() - .map(|i| { - memchr(b' ', &bytes["#+BEGIN: ".len()..i]) - .map(|x| { - ( - &text["#+BEGIN: ".len().."#+BEGIN: ".len() + x], - Some(text["#+BEGIN: ".len() + x..i].trim()), - i + 1, - ) - }) - .unwrap_or((&text["#+BEGIN: ".len()..i], None, i + 1)) - }) - .filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?; - - let mut pos = off; - - for i in lines { - if text[pos..i].trim().eq_ignore_ascii_case("#+END:") { - return Some(( - &text[i + 1..], - Element::DynBlock(DynBlock { - block_name: name, - arguments: para, - }), - &text[off..pos], - )); - } - - pos = i + 1; - } - - if text[pos..].trim().eq_ignore_ascii_case("#+END:") { - Some(( - "", + Ok(( + input, + ( Element::DynBlock(DynBlock { block_name: name, - arguments: para, + arguments: if args.is_empty() { None } else { Some(args) }, }), - &text[off..pos], - )) - } else { - None - } + contents, + ), + )) } } @@ -75,13 +44,15 @@ fn parse() { // TODO: testing assert_eq!( DynBlock::parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"), - Some(( + Ok(( "", - Element::DynBlock(DynBlock { - block_name: "clocktable", - arguments: Some(":scope file"), - }), - "CONTENTS\n" + ( + Element::DynBlock(DynBlock { + block_name: "clocktable", + arguments: Some(":scope file"), + }), + "CONTENTS\n" + ) )) ); } diff --git a/src/elements/emphasis.rs b/src/elements/emphasis.rs index 1be953e..5d580c5 100644 --- a/src/elements/emphasis.rs +++ b/src/elements/emphasis.rs @@ -1,5 +1,5 @@ use bytecount::count; -use memchr::memchr; +use memchr::memchr_iter; #[inline] pub(crate) fn parse(text: &str, marker: u8) -> Option<(&str, &str)> { @@ -11,31 +11,27 @@ pub(crate) fn parse(text: &str, marker: u8) -> Option<(&str, &str)> { return None; } - let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..=i], b'\n') < 2)?; - - if bytes[end].is_ascii_whitespace() { - return None; + for i in memchr_iter(marker, bytes).skip(1) { + if count(&bytes[1..i], b'\n') >= 2 { + break; + } else if validate_marker(i, text) { + return Some((&text[i + 1..], &text[1..i])); + } } - if let Some(&post) = bytes.get(end + 2) { - if post == b' ' - || post == b'-' - || post == b'.' - || post == b',' - || post == b':' - || post == b'!' - || post == b'?' - || post == b'\'' - || post == b'\n' - || post == b')' - || post == b'}' - { - Some((&text[end + 2..], &text[1..end + 1])) - } else { - None + None +} + +fn validate_marker(pos: usize, text: &str) -> bool { + if text.as_bytes()[pos - 1].is_ascii_whitespace() { + false + } else if let Some(&post) = text.as_bytes().get(pos + 1) { + match post { + b' ' | b'-' | b'.' | b',' | b':' | b'!' | b'?' | b'\'' | b'\n' | b')' | b'}' => true, + _ => false, } } else { - Some((&text[end + 2..], &text[1..end + 1])) + true } } @@ -46,6 +42,7 @@ mod tests { use super::parse; assert_eq!(parse("*bold*", b'*'), Some(("", "bold"))); + assert_eq!(parse("*bo*ld*", b'*'), Some(("", "bo*ld"))); assert_eq!(parse("*bo\nld*", b'*'), Some(("", "bo\nld"))); assert_eq!(parse("*bold*a", b'*'), None); assert_eq!(parse("*bold*", b'/'), None); diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs index c3680fa..8d0ed1e 100644 --- a/src/elements/fn_def.rs +++ b/src/elements/fn_def.rs @@ -1,6 +1,7 @@ use memchr::memchr; use nom::{ bytes::complete::{tag, take_while1}, + sequence::delimited, IResult, }; @@ -12,10 +13,12 @@ pub struct FnDef<'a> { } fn parse_label(input: &str) -> IResult<&str, &str> { - let (input, _) = tag("[fn:")(input)?; - let (input, label) = - take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?; - let (input, _) = tag("]")(input)?; + let (input, label) = delimited( + tag("[fn:"), + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), + tag("]"), + )(input)?; + Ok((input, label)) } diff --git a/src/elements/fn_ref.rs b/src/elements/fn_ref.rs index e90b392..8e11f3e 100644 --- a/src/elements/fn_ref.rs +++ b/src/elements/fn_ref.rs @@ -1,55 +1,46 @@ -use memchr::{memchr2, memchr2_iter}; +use memchr::memchr2_iter; +use nom::{ + bytes::complete::{tag, take_while}, + combinator::opt, + error::ErrorKind, + error_position, + sequence::preceded, + Err, IResult, +}; #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "serde", derive(serde::Serialize))] #[derive(Debug)] pub struct FnRef<'a> { - #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))] - pub label: Option<&'a str>, + pub label: &'a str, #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))] pub definition: Option<&'a str>, } +fn balanced_brackets(input: &str) -> IResult<&str, &str> { + let mut pairs = 1; + for i in memchr2_iter(b'[', b']', input.as_bytes()) { + if input.as_bytes()[i] == b'[' { + pairs += 1; + } else if pairs != 1 { + pairs -= 1; + } else { + return Ok((&input[i..], &input[0..i])); + } + } + Err(Err::Error(error_position!(input, ErrorKind::Tag))) +} + impl FnRef<'_> { #[inline] - pub(crate) fn parse(text: &str) -> Option<(&str, FnRef<'_>)> { - debug_assert!(text.starts_with("[fn:")); + pub(crate) fn parse(input: &str) -> IResult<&str, FnRef<'_>> { + let (input, _) = tag("[fn:")(input)?; + let (input, label) = + take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?; + let (input, definition) = opt(preceded(tag(":"), balanced_brackets))(input)?; + let (input, _) = tag("]")(input)?; - let bytes = text.as_bytes(); - let (label, off) = memchr2(b']', b':', &bytes["[fn:".len()..]) - .filter(|&i| { - bytes["[fn:".len().."[fn:".len() + i] - .iter() - .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') - }) - .map(|i| { - ( - if i == 0 { - None - } else { - Some(&text["[fn:".len().."[fn:".len() + i]) - }, - "[fn:".len() + i, - ) - })?; - - let (definition, off) = if bytes[off] == b':' { - let mut pairs = 1; - memchr2_iter(b'[', b']', &bytes[off..]) - .find(|&i| { - if bytes[i + off] == b'[' { - pairs += 1; - } else { - pairs -= 1; - } - pairs == 0 - }) - .map(|i| (Some(&text[off + 1..off + i]), i + off + 1))? - } else { - (None, off + 1) - }; - - Some((&text[off..], FnRef { label, definition })) + Ok((input, FnRef { label, definition })) } } @@ -57,43 +48,44 @@ impl FnRef<'_> { fn parse() { assert_eq!( FnRef::parse("[fn:1]"), - Some(( + Ok(( "", FnRef { - label: Some("1"), + label: "1", definition: None }, )) ); assert_eq!( FnRef::parse("[fn:1:2]"), - Some(( + Ok(( "", FnRef { - label: Some("1"), + label: "1", definition: Some("2") }, )) ); assert_eq!( FnRef::parse("[fn::2]"), - Some(( + Ok(( "", FnRef { - label: None, + label: "", definition: Some("2") }, )) ); assert_eq!( FnRef::parse("[fn::[]]"), - Some(( + Ok(( "", FnRef { - label: None, + label: "", definition: Some("[]") }, )) ); - assert_eq!(FnRef::parse("[fn::[]"), None); + + assert!(FnRef::parse("[fn::[]").is_err()); } diff --git a/src/elements/inline_call.rs b/src/elements/inline_call.rs index 9afbc50..662e61a 100644 --- a/src/elements/inline_call.rs +++ b/src/elements/inline_call.rs @@ -29,9 +29,8 @@ impl<'a> InlineCall<'a> { let (input, _) = tag("call_")(input)?; let (input, name) = take_till(|c| c == '[' || c == '\n' || c == '(' || c == ')')(input)?; let (input, inside_header) = opt(header)(input)?; - let (input, _) = tag("(")(input)?; - let (input, arguments) = take_till(|c| c == ')' || c == '\n')(input)?; - let (input, _) = tag(")")(input)?; + let (input, arguments) = + delimited(tag("("), take_till(|c| c == ')' || c == '\n'), tag(")"))(input)?; let (input, end_header) = opt(header)(input)?; Ok(( diff --git a/src/elements/inline_src.rs b/src/elements/inline_src.rs index 71aa386..18d7c0f 100644 --- a/src/elements/inline_src.rs +++ b/src/elements/inline_src.rs @@ -28,9 +28,8 @@ impl InlineSrc<'_> { take_till(|c| c == '\n' || c == ']'), tag("]"), ))(input)?; - let (input, _) = tag("{")(input)?; - let (input, body) = take_till(|c| c == '\n' || c == '}')(input)?; - let (input, _) = tag("}")(input)?; + let (input, body) = + delimited(tag("{"), take_till(|c| c == '\n' || c == '}'), tag("}"))(input)?; Ok(( input, diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs index 4173fa5..7b61d25 100644 --- a/src/elements/keyword.rs +++ b/src/elements/keyword.rs @@ -1,11 +1,12 @@ use nom::{ - bytes::complete::{tag, take_till, take_while}, - combinator::{map, opt}, + bytes::complete::{tag, take_till}, + combinator::opt, sequence::delimited, IResult, }; use crate::elements::Element; +use crate::parsers::take_until_eol; #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "serde", derive(serde::Serialize))] @@ -24,20 +25,19 @@ pub struct BabelCall<'a> { pub value: &'a str, } -fn optional(input: &str) -> IResult<&str, &str> { - delimited(tag("["), take_till(|c| c == ']' || c == '\n'), tag("]"))(input) -} - impl Keyword<'_> { #[inline] pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { let (input, _) = tag("#+")(input)?; let (input, key) = take_till(|c: char| c.is_ascii_whitespace() || c == ':' || c == '[')(input)?; - let (input, optional) = opt(optional)(input)?; + let (input, optional) = opt(delimited( + tag("["), + take_till(|c| c == ']' || c == '\n'), + tag("]"), + ))(input)?; let (input, _) = tag(":")(input)?; - let (input, value) = map(take_while(|c| c != '\n'), str::trim)(input)?; - let (input, _) = opt(tag("\n"))(input)?; + let (input, value) = take_until_eol(input)?; if key.eq_ignore_ascii_case("CALL") { Ok((input, Element::BabelCall(BabelCall { value }))) diff --git a/src/elements/link.rs b/src/elements/link.rs index 2f457ed..454b24d 100644 --- a/src/elements/link.rs +++ b/src/elements/link.rs @@ -1,6 +1,7 @@ use nom::{ bytes::complete::{tag, take_while}, combinator::opt, + sequence::delimited, IResult, }; @@ -18,16 +19,16 @@ pub struct Link<'a> { impl Link<'_> { #[inline] pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { - let (input, _) = tag("[[")(input)?; - let (input, path) = - take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']')(input)?; - let (input, _) = tag("]")(input)?; - let (input, desc) = opt(|input| { - let (input, _) = tag("[")(input)?; - let (input, desc) = take_while(|c: char| c != '[' && c != ']')(input)?; - let (input, _) = tag("]")(input)?; - Ok((input, desc)) - })(input)?; + let (input, path) = delimited( + tag("[["), + take_while(|c: char| c != '<' && c != '>' && c != '\n' && c != ']'), + tag("]"), + )(input)?; + let (input, desc) = opt(delimited( + tag("["), + take_while(|c: char| c != '[' && c != ']'), + tag("]"), + ))(input)?; let (input, _) = tag("]")(input)?; Ok((input, Element::Link(Link { path, desc }))) } diff --git a/src/elements/macros.rs b/src/elements/macros.rs index 949e453..e62e34d 100644 --- a/src/elements/macros.rs +++ b/src/elements/macros.rs @@ -1,6 +1,7 @@ use nom::{ bytes::complete::{tag, take, take_until, take_while1}, combinator::{opt, verify}, + sequence::delimited, IResult, }; @@ -23,12 +24,7 @@ impl Macros<'_> { take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_'), |s: &str| s.starts_with(|c: char| c.is_ascii_alphabetic()), )(input)?; - let (input, arguments) = opt(|input| { - let (input, _) = tag("(")(input)?; - let (input, args) = take_until(")}}}")(input)?; - let (input, _) = take(1usize)(input)?; - Ok((input, args)) - })(input)?; + let (input, arguments) = opt(delimited(tag("("), take_until(")}}}"), take(1usize)))(input)?; let (input, _) = tag("}}}")(input)?; Ok((input, Element::Macros(Macros { name, arguments }))) diff --git a/src/elements/radio_target.rs b/src/elements/radio_target.rs index 71a3f68..7f09ea7 100644 --- a/src/elements/radio_target.rs +++ b/src/elements/radio_target.rs @@ -1,6 +1,7 @@ use nom::{ bytes::complete::{tag, take_while}, combinator::verify, + sequence::delimited, IResult, }; @@ -15,12 +16,14 @@ pub struct RadioTarget; impl RadioTarget { #[inline] pub(crate) fn parse(input: &str) -> IResult<&str, (Element, &str)> { - let (input, _) = tag("<<<")(input)?; - let (input, contents) = verify( - take_while(|c: char| c != '<' && c != '\n' && c != '>'), - |s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '), + let (input, contents) = delimited( + tag("<<<"), + verify( + take_while(|c: char| c != '<' && c != '\n' && c != '>'), + |s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '), + ), + tag(">>>"), )(input)?; - let (input, _) = tag(">>>")(input)?; Ok((input, (Element::RadioTarget(RadioTarget), contents))) } diff --git a/src/elements/rule.rs b/src/elements/rule.rs index a66b778..7c51266 100644 --- a/src/elements/rule.rs +++ b/src/elements/rule.rs @@ -1,13 +1,8 @@ -use nom::{ - branch::alt, - bytes::complete::{tag, take_while_m_n}, - character::complete::space0, - error::ErrorKind, - Err, IResult, -}; +use nom::{bytes::complete::take_while_m_n, character::complete::space0, IResult}; use std::usize; use crate::elements::Element; +use crate::parsers::eol; pub struct Rule; @@ -16,20 +11,11 @@ impl Rule { pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { let (input, _) = space0(input)?; let (input, _) = take_while_m_n(5, usize::MAX, |c| c == '-')(input)?; - let (input, _) = space0(input)?; - let (input, _) = alt((tag("\n"), eof))(input)?; + let (input, _) = eol(input)?; Ok((input, Element::Rule)) } } -fn eof(input: &str) -> IResult<&str, &str> { - if input.is_empty() { - Ok(("", "")) - } else { - Err(Err::Error(("", ErrorKind::Tag))) - } -} - #[test] fn parse() { assert_eq!(Rule::parse("-----"), Ok(("", Element::Rule))); diff --git a/src/elements/snippet.rs b/src/elements/snippet.rs index bf24a00..b5ab4f0 100644 --- a/src/elements/snippet.rs +++ b/src/elements/snippet.rs @@ -1,5 +1,6 @@ use nom::{ bytes::complete::{tag, take, take_until, take_while1}, + sequence::{delimited, separated_pair}, IResult, }; @@ -16,11 +17,15 @@ pub struct Snippet<'a> { impl Snippet<'_> { #[inline] pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { - let (input, _) = tag("@@")(input)?; - let (input, name) = take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-')(input)?; - let (input, _) = tag(":")(input)?; - let (input, value) = take_until("@@")(input)?; - let (input, _) = take(2usize)(input)?; + let (input, (name, value)) = delimited( + tag("@@"), + separated_pair( + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-'), + tag(":"), + take_until("@@"), + ), + take(2usize), + )(input)?; Ok((input, Element::Snippet(Snippet { name, value }))) } diff --git a/src/elements/target.rs b/src/elements/target.rs index d5887d6..129f59b 100644 --- a/src/elements/target.rs +++ b/src/elements/target.rs @@ -1,6 +1,7 @@ use nom::{ bytes::complete::{tag, take_while}, combinator::verify, + sequence::delimited, IResult, }; @@ -16,12 +17,14 @@ pub struct Target<'a> { impl Target<'_> { #[inline] pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> { - let (input, _) = tag("<<")(input)?; - let (input, target) = verify( - take_while(|c: char| c != '<' && c != '\n' && c != '>'), - |s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '), + let (input, target) = delimited( + tag("<<"), + verify( + take_while(|c: char| c != '<' && c != '\n' && c != '>'), + |s: &str| s.starts_with(|c| c != ' ') && s.ends_with(|c| c != ' '), + ), + tag(">>"), )(input)?; - let (input, _) = tag(">>")(input)?; Ok((input, Element::Target(Target { target }))) } diff --git a/src/elements/timestamp.rs b/src/elements/timestamp.rs index b0f1401..a45e138 100644 --- a/src/elements/timestamp.rs +++ b/src/elements/timestamp.rs @@ -2,6 +2,7 @@ use nom::{ bytes::complete::{tag, take, take_till, take_while, take_while_m_n}, character::complete::{space0, space1}, combinator::{map, map_res, opt}, + sequence::preceded, IResult, }; @@ -51,13 +52,9 @@ fn parse_datetime(input: &str) -> IResult<&str, Datetime<'_>> { && c != ']' && c != '>' })(input)?; - let (input, (hour, minute)) = map( - opt(|input| { - let (input, _) = space1(input)?; - parse_time(input) - }), - |time| (time.map(|t| t.0), time.map(|t| t.1)), - )(input)?; + let (input, (hour, minute)) = map(opt(preceded(space1, parse_time)), |time| { + (time.map(|t| t.0), time.map(|t| t.1)) + })(input)?; Ok(( input, diff --git a/src/export/org.rs b/src/export/org.rs index 4f519ec..6becb02 100644 --- a/src/export/org.rs +++ b/src/export/org.rs @@ -56,10 +56,7 @@ pub trait OrgHandler> { } Code { value } => write!(w, "~{}~", value)?, FnRef(fn_ref) => { - write!(&mut w, "[fn:")?; - if let Some(label) = fn_ref.label { - write!(&mut w, "{}", label)?; - } + write!(&mut w, "[fn:{}", fn_ref.label)?; if let Some(definition) = fn_ref.definition { write!(&mut w, ":{}", definition)?; } diff --git a/src/lib.rs b/src/lib.rs index 9fe7eb1..bf70979 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -221,6 +221,7 @@ pub mod elements; pub mod export; mod iter; mod org; +mod parsers; #[cfg(feature = "serde")] mod serde; diff --git a/src/org.rs b/src/org.rs index e880698..d98f8fd 100644 --- a/src/org.rs +++ b/src/org.rs @@ -175,7 +175,7 @@ fn is_headline(text: &str) -> Option { } else { None } - } else if text.len() > 0 && text.as_bytes().iter().all(|&c| c == b'*') { + } else if !text.is_empty() && text.as_bytes().iter().all(|&c| c == b'*') { Some(text.len()) } else { None @@ -291,7 +291,7 @@ fn parse_block<'a>( let tail = contents.trim_start(); - if let Some((tail, clock)) = Clock::parse(tail) { + if let Ok((tail, clock)) = Clock::parse(tail) { return Some((tail, arena.new_node(clock))); } @@ -305,7 +305,7 @@ fn parse_block<'a>( } if tail.starts_with(':') { - if let Some((tail, drawer, _content)) = Drawer::parse(tail) { + if let Ok((tail, (drawer, _content))) = Drawer::parse(tail) { return Some((tail, arena.new_node(drawer))); } } @@ -349,7 +349,7 @@ fn parse_block<'a>( } if tail.starts_with("#+") { - if let Some((tail, block, content)) = Block::parse(tail) { + if let Ok((tail, (block, content))) = Block::parse(tail) { match &*block.name.to_uppercase() { "CENTER" => { let node = arena.new_node(Element::CenterBlock(CenterBlock { @@ -414,7 +414,7 @@ fn parse_block<'a>( Some((tail, node)) } } - } else if let Some((tail, dyn_block, content)) = DynBlock::parse(tail) { + } else if let Ok((tail, (dyn_block, content))) = DynBlock::parse(tail) { let node = arena.new_node(dyn_block); containers.push(Container::Block { content, node }); Some((tail, node)) @@ -546,8 +546,8 @@ fn parse_inline<'a>( b'[' => { if contents[1..].starts_with("fn:") { FnRef::parse(contents) - .map(|(tail, fn_ref)| (tail, fn_ref.into())) - .map(|(tail, element)| (tail, arena.new_node(element))) + .ok() + .map(|(tail, fn_ref)| (tail, arena.new_node(fn_ref.into()))) } else if bytes[1] == b'[' { Link::parse(contents) .ok() @@ -555,11 +555,11 @@ fn parse_inline<'a>( } else { Cookie::parse(contents) .map(|(tail, cookie)| (tail, cookie.into())) - .or_else(|| { + .or_else(|_| { Timestamp::parse_inactive(contents) .map(|(tail, timestamp)| (tail, timestamp.into())) - .ok() }) + .ok() .map(|(tail, element)| (tail, arena.new_node(element))) } } diff --git a/src/parsers.rs b/src/parsers.rs new file mode 100644 index 0000000..cfea23d --- /dev/null +++ b/src/parsers.rs @@ -0,0 +1,45 @@ +// resued nom parsers + +use memchr::{memchr, memchr_iter}; +use nom::{ + bytes::complete::tag, character::complete::space0, error::ErrorKind, error_position, Err, + IResult, +}; + +pub(crate) fn eol(input: &str) -> IResult<&str, ()> { + let (input, _) = space0(input)?; + if input.is_empty() { + Ok(("", ())) + } else { + let (input, _) = tag("\n")(input)?; + Ok((input, ())) + } +} + +pub(crate) fn take_until_eol(input: &str) -> IResult<&str, &str> { + if let Some(i) = memchr(b'\n', input.as_bytes()) { + Ok((&input[i + 1..], input[0..i].trim())) + } else { + Ok(("", input.trim())) + } +} + +pub(crate) fn take_lines_till( + predicate: impl Fn(&str) -> bool, +) -> impl Fn(&str) -> IResult<&str, &str> { + move |input| { + let mut start = 0; + for i in memchr_iter(b'\n', input.as_bytes()) { + if predicate(input[start..i].trim()) { + return Ok((&input[i + 1..], &input[0..start])); + } + start = i + 1; + } + + if predicate(input[start..].trim()) { + Ok(("", &input[0..start])) + } else { + Err(Err::Error(error_position!(input, ErrorKind::TakeTill1))) + } + } +}