From 0b355b498cb4c2a2447a587c6bbcf989f3d923eb Mon Sep 17 00:00:00 2001 From: PoiScript Date: Thu, 7 Feb 2019 15:54:16 +0800 Subject: [PATCH] refactor: cleanup utils macros --- README.md | 28 ++++++++ src/elements/block.rs | 9 ++- src/elements/dyn_block.rs | 32 ++++++--- src/elements/fn_def.rs | 22 +++---- src/elements/keyword.rs | 22 ++++--- src/elements/list.rs | 2 +- src/objects/cookie.rs | 19 ++++-- src/objects/emphasis.rs | 13 ++-- src/objects/fn_ref.rs | 34 ++++++---- src/objects/inline_call.rs | 25 ++++--- src/objects/inline_src.rs | 25 +++---- src/objects/link.rs | 22 ++++--- src/objects/macros.rs | 78 +++++++++++++++------- src/objects/mod.rs | 52 +++++++-------- src/objects/snippet.rs | 17 +++-- src/objects/target.rs | 51 ++++++++------- src/parser.rs | 2 + src/utils.rs | 129 +++---------------------------------- 18 files changed, 285 insertions(+), 297 deletions(-) diff --git a/README.md b/README.md index 9abb908..d7c665b 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,34 @@ _Section 2_ Alternatively, you can use the built-in render. +```rust +use orgize::{HtmlHandler, Render}; +use std::io::Cursor; + +fn main() { + let contents = r#"* Title 1 +*Section 1* +** Title 2 +_Section 2_ +* Title 3 +/Section 3/ +* Title 4 +=Section 4="#; + + let cursor = Cursor::new(Vec::new()); + let mut render = Render::new(HtmlHandler, cursor, &contents); + + render + .render() + .expect("something went wrong rendering the file"); + + println!( + "{}", + String::from_utf8(render.into_wirter().into_inner()).expect("invalid utf-8") + ); +} +``` + ## License MIT diff --git a/src/elements/block.rs b/src/elements/block.rs index 186d82c..fa7edf4 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -1,4 +1,5 @@ use lines::Lines; +use memchr::memchr2; #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] @@ -7,12 +8,14 @@ pub struct Block; impl Block { // return (name, args, contents-begin, contents-end, end) pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { - if src.len() < 17 || !src[0..8].eq_ignore_ascii_case("#+BEGIN_") { + debug_assert!(src.starts_with("#+")); + + if !src[2..8].eq_ignore_ascii_case("BEGIN_") { return None; } - let name = until_while!(src, 8, |c| c == b' ' || c == b'\n', |c: u8| c - .is_ascii_alphabetic())?; + let name = memchr2(b' ', b'\n', src.as_bytes()) + .filter(|&i| src.as_bytes()[8..i].iter().all(|c| c.is_ascii_alphabetic()))?; let mut lines = Lines::new(src); let (pre_cont_end, cont_beg, _) = lines.next()?; let args = if pre_cont_end == name { diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index 804be27..5b84159 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -1,3 +1,6 @@ +use lines::Lines; +use memchr::memchr2; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct DynBlock; @@ -5,17 +8,26 @@ pub struct DynBlock; impl DynBlock { // return (name, parameters, contents-begin, contents-end, end) pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { - if src.len() < 17 || !src[0..9].eq_ignore_ascii_case("#+BEGIN: ") { + debug_assert!(src.starts_with("#+")); + + if !src[2..9].eq_ignore_ascii_case("BEGIN: ") { return None; } + let bytes = src.as_bytes(); let args = eol!(src); - let name = until_while!(src, 9, |c| c == b' ' || c == b'\n', |c: u8| c - .is_ascii_alphabetic())?; + let name = memchr2(b' ', b'\n', &bytes[9..]) + .map(|i| i + 9) + .filter(|&i| { + src.as_bytes()[9..i] + .iter() + .all(|&c| c.is_ascii_alphabetic()) + })?; + let mut lines = Lines::new(src); + let (mut pre_cont_end, _, _) = lines.next()?; - let mut pos = 0; - for line_end in lines!(src) { - if src[pos..line_end].trim().eq_ignore_ascii_case("#+END:") { + while let Some((cont_end, end, line)) = lines.next() { + if line.trim().eq_ignore_ascii_case("#+END:") { return Some(( &src[8..name].trim(), if name == args { @@ -24,11 +36,11 @@ impl DynBlock { Some(&src[name..args].trim()) }, args, - pos, - line_end, + pre_cont_end, + end, )); } - pos = line_end; + pre_cont_end = cont_end; } None @@ -45,6 +57,6 @@ CONTENTS #+END: " ), - Some(("clocktable", Some(":scope file"), 31, 41, 48)) + Some(("clocktable", Some(":scope file"), 31, 40, 48)) ) } diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs index f7af320..6d4b3eb 100644 --- a/src/elements/fn_def.rs +++ b/src/elements/fn_def.rs @@ -1,23 +1,19 @@ +use memchr::memchr; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct FnDef; -#[inline] -fn valid_label(ch: u8) -> bool { - ch.is_ascii_alphanumeric() || ch == b'-' || ch == b'_' -} - impl FnDef { pub fn parse(src: &str) -> Option<(&str, &str, usize)> { - if cfg!(test) { - starts_with!(src, "[fn:"); - } + debug_assert!(src.starts_with("[fn:")); - let label = until_while!(src, 4, b']', valid_label)?; - - if label == 4 { - return None; - } + let label = memchr(b']', src.as_bytes()).filter(|&i| { + i != 4 + && src.as_bytes()[4..i] + .iter() + .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') + })?; let end = eol!(src); diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs index c8f1190..4bf7fd2 100644 --- a/src/elements/keyword.rs +++ b/src/elements/keyword.rs @@ -1,3 +1,5 @@ +use memchr::{memchr, memchr2}; + pub struct Keyword; #[cfg_attr(test, derive(PartialEq))] @@ -25,16 +27,18 @@ pub enum Key<'a> { impl Keyword { // return (key, value, offset) pub fn parse(src: &str) -> Option<(Key<'_>, &str, usize)> { - if cfg!(test) { - starts_with!(src, "#+"); - } + debug_assert!(src.starts_with("#+")); - let key_end = until_while!(src, 2, |c| c == b':' || c == b'[', |c: u8| c - .is_ascii_alphabetic() - || c == b'_')?; + let bytes = src.as_bytes(); + let key_end = memchr2(b':', b'[', bytes).filter(|&i| { + bytes[2..i] + .iter() + .all(|&c| c.is_ascii_alphabetic() || c == b'_') + })?; - let option = if src.as_bytes()[key_end] == b'[' { - let option = until_while!(src, key_end, b']', |c: u8| c != b'\n')?; + let option = if bytes[key_end] == b'[' { + let option = + memchr(b']', bytes).filter(|&i| bytes[key_end..i].iter().all(|&c| c != b'\n'))?; expect!(src, option + 1, b':')?; option + 1 } else { @@ -100,8 +104,6 @@ fn parse() { ); assert!(Keyword::parse("#+KE Y: VALUE").is_none()); assert!(Keyword::parse("#+ KEY: VALUE").is_none()); - assert!(Keyword::parse("# +KEY: VALUE").is_none()); - assert!(Keyword::parse(" #+KEY: VALUE").is_none()); assert_eq!( Keyword::parse("#+RESULTS:"), diff --git a/src/elements/list.rs b/src/elements/list.rs index 6b97533..77dfd83 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -15,7 +15,7 @@ impl List { let i = bytes .iter() .position(|&c| !c.is_ascii_digit()) - .unwrap_or_else(|| src.len()); + .unwrap_or_else(|| src.len() - 1); let c = bytes[i]; if !(c == b'.' || c == b')') { return (false, false); diff --git a/src/objects/cookie.rs b/src/objects/cookie.rs index 4e13c1d..fd152f5 100644 --- a/src/objects/cookie.rs +++ b/src/objects/cookie.rs @@ -1,3 +1,5 @@ +use memchr::{memchr, memchr2}; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct Cookie<'a> { @@ -6,22 +8,25 @@ pub struct Cookie<'a> { impl<'a> Cookie<'a> { pub fn parse(src: &'a str) -> Option<(Cookie<'a>, usize)> { - if cfg!(test) { - starts_with!(src, '['); - } + debug_assert!(src.starts_with("[")); - let num1 = until_while!(src, 1, |c| c == b'%' || c == b'/', |c: u8| c - .is_ascii_digit())?; + let num1 = memchr2(b'%', b'/', src.as_bytes()) + .filter(|&i| src.as_bytes()[1..i].iter().all(|c| c.is_ascii_digit()))?; if src.as_bytes()[num1] == b'%' && *src.as_bytes().get(num1 + 1)? == b']' { Some(( Cookie { - value: &src[0..num1 + 2], + value: &src[0..=num1 + 1], }, num1 + 2, )) } else { - let num2 = until_while!(src, num1 + 1, b']', |c: u8| c.is_ascii_digit())?; + let num2 = memchr(b']', src.as_bytes()).filter(|&i| { + src.as_bytes()[num1 + 1..i] + .iter() + .all(|c| c.is_ascii_digit()) + })?; + Some(( Cookie { value: &src[0..=num2], diff --git a/src/objects/emphasis.rs b/src/objects/emphasis.rs index 7ec452d..75b1602 100644 --- a/src/objects/emphasis.rs +++ b/src/objects/emphasis.rs @@ -1,3 +1,5 @@ +use memchr::memchr; + pub struct Emphasis; impl Emphasis { @@ -5,13 +7,10 @@ impl Emphasis { pub fn parse(src: &str, marker: u8) -> Option { expect!(src, 1, |c: u8| !c.is_ascii_whitespace())?; - let mut lines = 0; - let end = until_while!(src, 1, marker, |c| { - if c == b'\n' { - lines += 1; - } - lines < 2 - })?; + let bytes = src.as_bytes(); + let end = memchr(marker, &bytes[1..]) + .map(|i| i + 1) + .filter(|&i| bytes[1..i].iter().filter(|&&c| c == b'\n').count() < 2)?; expect!(src, end - 1, |c: u8| !c.is_ascii_whitespace())?; diff --git a/src/objects/fn_ref.rs b/src/objects/fn_ref.rs index 36e3931..bdffa90 100644 --- a/src/objects/fn_ref.rs +++ b/src/objects/fn_ref.rs @@ -1,3 +1,5 @@ +use memchr::{memchr2, memchr2_iter}; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct FnRef<'a> { @@ -5,26 +7,32 @@ pub struct FnRef<'a> { definition: Option<&'a str>, } -fn valid_label(ch: u8) -> bool { - ch.is_ascii_alphanumeric() || ch == b'-' || ch == b'_' +fn valid_label(ch: &u8) -> bool { + ch.is_ascii_alphanumeric() || *ch == b'-' || *ch == b'_' } impl<'a> FnRef<'a> { pub fn parse(src: &'a str) -> Option<(FnRef<'a>, usize)> { - starts_with!(src, "[fn:"); + debug_assert!(src.starts_with("[fn:")); - let label = until_while!(src, 4, |c| c == b']' || c == b':', valid_label)?; + let bytes = src.as_bytes(); + let label = memchr2(b']', b':', &bytes[4..]) + .map(|i| i + 4) + .filter(|&i| bytes[4..i].iter().all(valid_label))?; - if src.as_bytes()[label] == b':' { + if bytes[label] == b':' { let mut pairs = 1; - let def = until!(src[label..], |c| { - if c == b'[' { - pairs += 1; - } else if c == b']' { - pairs -= 1; - } - c == b']' && pairs == 0 - })? + label; + let def = memchr2_iter(b'[', b']', &bytes[label..]) + .map(|i| i + label) + .filter(|&i| { + if bytes[i] == b'[' { + pairs += 1; + } else { + pairs -= 1; + } + pairs == 0 + }) + .next()?; Some(( FnRef { diff --git a/src/objects/inline_call.rs b/src/objects/inline_call.rs index aa92318..fabeba2 100644 --- a/src/objects/inline_call.rs +++ b/src/objects/inline_call.rs @@ -1,3 +1,5 @@ +use memchr::{memchr, memchr2}; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct InlineCall<'a> { @@ -11,17 +13,21 @@ pub struct InlineCall<'a> { impl<'a> InlineCall<'a> { pub fn parse(src: &'a str) -> Option<(InlineCall, usize)> { - starts_with!(src, "call_"); + debug_assert!(src.starts_with("call_")); - let mut pos = until_while!(src, 5, |c| c == b'[' || c == b'(', |c: u8| c - .is_ascii_graphic())?; + let bytes = src.as_bytes(); + let mut pos = memchr2(b'[', b'(', bytes) + .filter(|&i| bytes[5..i].iter().all(|c| c.is_ascii_graphic()))?; let mut pos_; let name = &src[5..pos]; - let inside_header = if src.as_bytes()[pos] == b'[' { + let inside_header = if bytes[pos] == b'[' { pos_ = pos; - pos = until_while!(src, pos, b']', |c: u8| c != b'\n')? + 1; + pos = memchr(b']', &bytes[pos..]) + .map(|i| i + pos) + .filter(|&i| bytes[pos..i].iter().all(|&c| c != b'\n'))? + + 1; expect!(src, pos, b'(')?; Some(&src[pos_ + 1..pos - 1]) } else { @@ -29,13 +35,16 @@ impl<'a> InlineCall<'a> { }; pos_ = pos; - pos = until_while!(src, pos, b')', |c| c != b'\n')?; + pos = memchr(b')', &bytes[pos..]) + .map(|i| i + pos) + .filter(|&i| bytes[pos..i].iter().all(|&c| c != b'\n'))?; let args = &src[pos_ + 1..pos]; let end_header = if src.len() > pos + 1 && src.as_bytes()[pos + 1] == b'[' { pos_ = pos; - pos = until_while!(src, pos_ + 1, |c| c == b']', |c: u8| c != b'\n' - && c != b')')?; + pos = memchr(b']', &bytes[pos_ + 1..]) + .map(|i| i + pos_ + 1) + .filter(|&i| bytes[pos_ + 1..i].iter().all(|&c| c != b'\n' && c != b')'))?; Some(&src[pos_ + 2..pos]) } else { None diff --git a/src/objects/inline_src.rs b/src/objects/inline_src.rs index 5482b39..a481593 100644 --- a/src/objects/inline_src.rs +++ b/src/objects/inline_src.rs @@ -1,3 +1,5 @@ +use memchr::{memchr, memchr2}; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct InlineSrc<'a> { @@ -8,18 +10,18 @@ pub struct InlineSrc<'a> { impl<'a> InlineSrc<'a> { pub fn parse(src: &'a str) -> Option<(InlineSrc, usize)> { - starts_with!(src, "src_"); + debug_assert!(src.starts_with("src_")); - let lang = until_while!(src, 4, |c| c == b'[' || c == b'{', |c: u8| !c - .is_ascii_whitespace())?; + let bytes = src.as_bytes(); + let lang = memchr2(b'[', b'{', bytes) + .filter(|&i| i != 4 && bytes[4..i].iter().all(|c| !c.is_ascii_whitespace()))?; - if lang == 4 { - return None; - } - - if src.as_bytes()[lang] == b'[' { - let option = until_while!(src, lang, b']', |c| c != b'\n')?; - let body = until_while!(src, option, b'}', |c| c != b'\n')?; + if bytes[lang] == b'[' { + let option = + memchr(b']', bytes).filter(|&i| bytes[lang..i].iter().all(|c| *c != b'\n'))?; + let body = memchr(b'}', &bytes[option..]) + .map(|i| i + option) + .filter(|&i| bytes[option..i].iter().all(|c| *c != b'\n'))?; Some(( InlineSrc { @@ -30,7 +32,8 @@ impl<'a> InlineSrc<'a> { body + 1, )) } else { - let body = until_while!(src, lang, b'}', |c| c != b'\n')?; + let body = + memchr(b'}', bytes).filter(|&i| bytes[lang..i].iter().all(|c| *c != b'\n'))?; Some(( InlineSrc { diff --git a/src/objects/link.rs b/src/objects/link.rs index e9600f2..70a96b8 100644 --- a/src/objects/link.rs +++ b/src/objects/link.rs @@ -1,3 +1,5 @@ +use memchr::memchr; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct Link<'a> { @@ -7,13 +9,16 @@ pub struct Link<'a> { impl<'a> Link<'a> { pub fn parse(src: &'a str) -> Option<(Link<'a>, usize)> { - if cfg!(test) { - starts_with!(src, "[["); - } + debug_assert!(src.starts_with("[[")); - let path = until_while!(src, 2, b']', |c| c != b'<' && c != b'>' && c != b'\n')?; + let bytes = src.as_bytes(); + let path = memchr(b']', bytes).filter(|&i| { + bytes[2..i] + .iter() + .all(|&c| c != b'<' && c != b'>' && c != b'\n') + })?; - if cond_eq!(src, path + 1, b']') { + if *bytes.get(path + 1)? == b']' { Some(( Link { path: &src[2..path], @@ -21,8 +26,10 @@ impl<'a> Link<'a> { }, path + 2, )) - } else if src.as_bytes()[path + 1] == b'[' { - let desc = until_while!(src, path + 2, b']', |c| c != b'[')?; + } else if bytes[path + 1] == b'[' { + let desc = memchr(b']', &bytes[path + 2..]) + .map(|i| i + path + 2) + .filter(|&i| bytes[path + 2..i].iter().all(|&c| c != b'['))?; expect!(src, desc + 1, b']')?; Some(( @@ -61,5 +68,4 @@ fn parse() { ) ); assert!(Link::parse("[[#id][desc]").is_none()); - assert!(Link::parse("[#id][desc]]").is_none()); } diff --git a/src/objects/macros.rs b/src/objects/macros.rs index ffc22ef..88befa8 100644 --- a/src/objects/macros.rs +++ b/src/objects/macros.rs @@ -1,4 +1,5 @@ use jetscii::Substring; +use memchr::memchr2; #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] @@ -7,54 +8,83 @@ pub struct Macros<'a> { pub args: Option<&'a str>, } -fn valid_name(ch: u8) -> bool { - ch.is_ascii_alphanumeric() || ch == b'-' || ch == b'_' -} - impl<'a> Macros<'a> { pub fn parse(src: &'a str) -> Option<(Macros<'a>, usize)> { - starts_with!(src, "{{{"); + debug_assert!(src.starts_with("{{{")); expect!(src, 3, |c: u8| c.is_ascii_alphabetic())?; - let name = until_while!(src, 3, |c| c == b'}' || c == b'(', valid_name)?; + let bytes = src.as_bytes(); + let name = memchr2(b'}', b'(', bytes).filter(|&i| { + bytes[3..i] + .iter() + .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') + })?; - if src.as_bytes()[name] == b'}' { + Some(if bytes[name] == b'}' { expect!(src, name + 1, b'}')?; expect!(src, name + 2, b'}')?; - Some(( + ( Macros { name: &src[3..name], args: None, }, name + 3, - )) + ) } else { - let end = Substring::new("}}}").find(&src[name..]).map(|i| i + name)?; - expect!(src, end - 1, b')')?; - Some(( + let end = Substring::new(")}}}") + .find(&src[name..]) + .map(|i| i + name)?; + ( Macros { name: &src[3..name], args: if name == end { None } else { - Some(&src[name + 1..end - 1]) + Some(&src[name + 1..end]) }, }, - end + 3, - )) - } + end + 4, + ) + }) } } #[test] fn parse() { - parse_succ!(Macros, "{{{poem(red,blue)}}}", name: "poem", args: Some("red,blue")); - parse_succ!(Macros, "{{{poem())}}}", name: "poem", args: Some(")")); - parse_succ!(Macros, "{{{author}}}", name: "author", args: None); - parse_fail!(Macros, "{{author}}}"); - parse_fail!(Macros, "{{{0uthor}}}"); - parse_fail!(Macros, "{{{author}}"); - parse_fail!(Macros, "{{{poem(}}}"); - parse_fail!(Macros, "{{{poem)}}}"); + assert_eq!( + Macros::parse("{{{poem(red,blue)}}}"), + Some(( + Macros { + name: "poem", + args: Some("red,blue") + }, + "{{{poem(red,blue)}}}".len() + )) + ); + assert_eq!( + Macros::parse("{{{poem())}}}"), + Some(( + Macros { + name: "poem", + args: Some(")") + }, + "{{{poem())}}}".len() + )) + ); + assert_eq!( + Macros::parse("{{{author}}}"), + Some(( + Macros { + name: "author", + args: None + }, + "{{{author}}}".len() + )) + ); + + assert_eq!(Macros::parse("{{{0uthor}}}"), None); + assert_eq!(Macros::parse("{{{author}}"), None); + assert_eq!(Macros::parse("{{{poem(}}}"), None); + assert_eq!(Macros::parse("{{{poem)}}}"), None); } diff --git a/src/objects/mod.rs b/src/objects/mod.rs index 0fdfac6..b47c861 100644 --- a/src/objects/mod.rs +++ b/src/objects/mod.rs @@ -1,8 +1,6 @@ mod cookie; mod emphasis; -mod entity; mod fn_ref; -mod fragment; mod inline_call; mod inline_src; mod link; @@ -67,51 +65,47 @@ impl<'a> Object<'a> { let mut pre = pos; - match (bytes[pos], bytes[pos + 1], bytes[pos + 2]) { - (b'@', b'@', _) => { + match bytes[pos] { + b'@' if bytes[pos + 1] == b'@' => { if let Some((snippet, off)) = Snippet::parse(&src[pos..]) { brk!(Object::Snippet(snippet), off, pos); } } - (b'{', b'{', b'{') => { + b'{' if bytes[pos + 1] == b'{' && bytes[pos + 2] == b'{' => { if let Some((macros, off)) = Macros::parse(&src[pos..]) { brk!(Object::Macros(macros), off, pos); } } - (b'<', b'<', b'<') => { - if let Some((target, off)) = RadioTarget::parse(&src[pos..]) { - brk!(Object::RadioTarget(target), off, pos); - } - } - (b'<', b'<', third) => { - if third != b'\n' { + b'<' if bytes[pos + 1] == b'<' => { + if bytes[pos + 2] == b'<' { + if let Some((target, off)) = RadioTarget::parse(&src[pos..]) { + brk!(Object::RadioTarget(target), off, pos); + } + } else if bytes[pos + 2] != b'\n' { if let Some((target, off)) = Target::parse(&src[pos..]) { brk!(Object::Target(target), off, pos); } } } - (b'[', b'f', b'n') => { - if let Some((fn_ref, off)) = FnRef::parse(&src[pos..]) { - brk!(Object::FnRef(fn_ref), off, pos); + b'[' => { + if bytes[pos + 1..].starts_with(b"fn:") { + if let Some((fn_ref, off)) = FnRef::parse(&src[pos..]) { + brk!(Object::FnRef(fn_ref), off, pos); + } } - } - (b'[', b'[', _) => { - if let Some((link, off)) = Link::parse(&src[pos..]) { - brk!(Object::Link(link), off, pos); + + if bytes[pos + 1] == b'[' { + if let Some((link, off)) = Link::parse(&src[pos..]) { + brk!(Object::Link(link), off, pos); + } } - } - (b'[', _, _) => { + if let Some((cookie, off)) = Cookie::parse(&src[pos..]) { brk!(Object::Cookie(cookie), off, pos); } // TODO: Timestamp } - (b'{', _, _) - | (b' ', _, _) - | (b'"', _, _) - | (b',', _, _) - | (b'(', _, _) - | (b'\n', _, _) => pre += 1, + b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => pre += 1, _ => (), } @@ -146,12 +140,12 @@ impl<'a> Object<'a> { brk!(Object::Code(&src[pre + 1..pre + end]), end + 1, pre); } } - b'c' => { + b'c' if src[pre..].starts_with("call_") => { if let Some((call, off)) = InlineCall::parse(&src[pre..]) { brk!(Object::InlineCall(call), off, pre); } } - b's' => { + b's' if src[pre..].starts_with("src_") => { if let Some((src, off)) = InlineSrc::parse(&src[pre..]) { brk!(Object::InlineSrc(src), off, pre); } diff --git a/src/objects/snippet.rs b/src/objects/snippet.rs index 0c2ddc4..c782133 100644 --- a/src/objects/snippet.rs +++ b/src/objects/snippet.rs @@ -1,4 +1,5 @@ use jetscii::Substring; +use memchr::memchr; #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] @@ -9,15 +10,14 @@ pub struct Snippet<'a> { impl<'a> Snippet<'a> { pub fn parse(src: &'a str) -> Option<(Snippet<'a>, usize)> { - if cfg!(test) { - starts_with!(src, "@@"); - } + debug_assert!(src.starts_with("@@")); - let name = until_while!(src, 2, b':', |c: u8| c.is_ascii_alphanumeric() || c == b'-')?; - - if name == 2 { - return None; - } + let name = memchr(b':', src.as_bytes()).filter(|&i| { + i != 2 + && src.as_bytes()[2..i] + .iter() + .all(|&c| c.is_ascii_alphanumeric() || c == b'-') + })?; let end = Substring::new("@@") .find(&src[name + 1..]) @@ -66,7 +66,6 @@ fn parse() { ) ); assert!(Snippet::parse("@@html:@").is_none()); - assert!(Snippet::parse("@html:@@").is_none()); assert!(Snippet::parse("@@html@@").is_none()); assert!(Snippet::parse("@@:@@").is_none()); } diff --git a/src/objects/target.rs b/src/objects/target.rs index e5ca8ec..24a8bca 100644 --- a/src/objects/target.rs +++ b/src/objects/target.rs @@ -1,3 +1,5 @@ +use jetscii::Substring; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] // TODO: text-markup, entities, latex-fragments, subscript and superscript @@ -5,17 +7,17 @@ pub struct RadioTarget<'a>(&'a str); impl<'a> RadioTarget<'a> { pub fn parse(src: &'a str) -> Option<(RadioTarget<'a>, usize)> { - if cfg!(test) { - starts_with!(src, "<<<"); - } + debug_assert!(src.starts_with("<<<")); expect!(src, 3, |c| c != b' ')?; - let end = until_while!(src, 3, b'>', |c| c != b'<' && c != b'\n')?; + let end = Substring::new(">>>").find(src).filter(|&i| { + src.as_bytes()[3..i] + .iter() + .all(|&c| c != b'<' && c != b'\n' && c != b'>') + })?; expect!(src, end - 1, |c| c != b' ')?; - expect!(src, end + 1, b'>')?; - expect!(src, end + 2, b'>')?; Some((RadioTarget(&src[3..end]), end + 3)) } @@ -27,16 +29,17 @@ pub struct Target<'a>(&'a str); impl<'a> Target<'a> { pub fn parse(src: &'a str) -> Option<(Target<'a>, usize)> { - if cfg!(test) { - starts_with!(src, "<<"); - } + debug_assert!(src.starts_with("<<")); expect!(src, 2, |c| c != b' ')?; - let end = until_while!(src, 2, b'>', |c| c != b'<' && c != b'\n')?; + let end = Substring::new(">>").find(src).filter(|&i| { + src.as_bytes()[2..i] + .iter() + .all(|&c| c != b'<' && c != b'\n' && c != b'>') + })?; expect!(src, end - 1, |c| c != b' ')?; - expect!(src, end + 1, b'>')?; Some((Target(&src[2..end]), end + 2)) } @@ -52,13 +55,12 @@ fn parse() { RadioTarget::parse("<<>>").unwrap(), (RadioTarget("tar get"), "<<>>".len()) ); - parse_fail!(RadioTarget, "<<>>"); - parse_fail!(RadioTarget, "<<< target>>>"); - parse_fail!(RadioTarget, "<<>>"); - parse_fail!(RadioTarget, "<<get>>>"); - parse_fail!(RadioTarget, "<<>>"); - parse_fail!(RadioTarget, "<>>"); - parse_fail!(RadioTarget, "<<>"); + assert_eq!(RadioTarget::parse("<<>>"), None); + assert_eq!(RadioTarget::parse("<<< target>>>"), None); + assert_eq!(RadioTarget::parse("<<>>"), None); + assert_eq!(RadioTarget::parse("<<get>>>"), None); + assert_eq!(RadioTarget::parse("<<>>"), None); + assert_eq!(RadioTarget::parse("<<>"), None); assert_eq!( Target::parse("<>").unwrap(), @@ -68,11 +70,10 @@ fn parse() { Target::parse("<>").unwrap(), (Target("tar get"), "<>".len()) ); - parse_fail!(Target, "<>"); - parse_fail!(Target, "<< target>>"); - parse_fail!(Target, "<>"); - parse_fail!(Target, "<get>>"); - parse_fail!(Target, "<>"); - parse_fail!(Target, ">"); - parse_fail!(Target, "<"); + assert_eq!(Target::parse("<>"), None); + assert_eq!(Target::parse("<< target>>"), None); + assert_eq!(Target::parse("<>"), None); + assert_eq!(Target::parse("<get>>"), None); + assert_eq!(Target::parse("<>"), None); + assert_eq!(Target::parse("<"), None); } diff --git a/src/parser.rs b/src/parser.rs index d383a96..1a011db 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -414,6 +414,8 @@ impl<'a> Iterator for Parser<'a> { .cloned() .map(|x| match x { Container::Headline { beg, end } => { + debug_assert!(self.off >= beg); + debug_assert!(self.off <= end); if self.off >= end { self.end() } else if self.off == beg { diff --git a/src/utils.rs b/src/utils.rs index 241f276..a052273 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -22,99 +22,21 @@ macro_rules! eol { }; } -#[macro_export] -macro_rules! until { - ($src:expr, $until:tt) => {{ - let mut pos = 0; - loop { - if pos >= $src.len() { - break None; - } - - if $until == $src.as_bytes()[pos] { - break Some(pos); - } else { - pos += 1; - } - } - }}; - ($src:expr, $until:expr) => {{ - let mut pos = 0; - loop { - if pos >= $src.len() { - break None; - } - - if $until($src.as_bytes()[pos]) { - break Some(pos); - } else { - pos += 1; - } - } - }}; -} - -#[macro_export] -macro_rules! until_while { - ($src:expr, $start:expr, $until:tt, $while:expr) => {{ - let mut pos = $start; - loop { - if pos >= $src.len() { - break None; - } else if $until == $src.as_bytes()[pos] { - break Some(pos); - } else if $while($src.as_bytes()[pos]) { - pos += 1; - continue; - } else { - break None; - } - } - }}; - ($src:expr, $start:expr, $until:expr, $while:expr) => {{ - let mut pos = $start; - loop { - if pos >= $src.len() { - break None; - } else if $until($src.as_bytes()[pos]) { - break Some(pos); - } else if $while($src.as_bytes()[pos]) { - pos += 1; - continue; - } else { - break None; - } - } - }}; -} - -#[macro_export] -macro_rules! cond_eq { - ($s:ident, $i:expr, $p:expr) => { - if $i >= $s.len() { - return None; - } else { - $s.as_bytes()[$i] == $p - } - }; -} - -#[macro_export] -macro_rules! starts_with { - ($s:ident, $p:expr) => { - if !$s.starts_with($p) { - return None; - } - }; -} - #[macro_export] macro_rules! skip_space { ($src:ident) => { - until!($src, |c| c != b' ' && c != b'\t').unwrap_or(0) + $src.as_bytes() + .iter() + .position(|c| c != b' ' && c != b'\t') + .unwrap_or(0) }; ($src:ident, $from:expr) => { - until!($src[$from..], |c| c != b' ' && c != b'\t').unwrap_or(0) + $from + $src[$from..] + .as_bytes() + .iter() + .position(|&c| c != b' ' && c != b'\t') + .map(|i| i + $from) + .unwrap_or(0) }; } @@ -131,34 +53,3 @@ macro_rules! skip_empty_line { } }}; } - -#[macro_export] -macro_rules! parse_fail { - ($ty:ident, $src:expr) => { - assert_eq!($ty::parse($src), None); - }; -} - -#[macro_export] -macro_rules! parse_succ { - ($ty:ident, $src:expr, $($field:ident : $value:expr),* ) => { - assert_eq!( - $ty::parse($src), - Some(( - $ty { - $( $field : $value ),* - }, - $src.len() - )), - ); - }; -} - -#[macro_export] -macro_rules! lines { - ($src:ident) => { - memchr::memchr_iter(b'\n', $src.as_bytes()) - .map(|i| i + 1) - .chain(std::iter::once($src.len())) - }; -}