From 6f7fa9c92087ca3fc9c2bbde1aadfa595b382a95 Mon Sep 17 00:00:00 2001 From: PoiScript Date: Thu, 10 Jan 2019 20:58:13 +0800 Subject: [PATCH] update --- .gitignore | 4 + benches/parse.rs | 31 ++++++ src/elements/keyword.rs | 26 ++--- src/elements/mod.rs | 145 +++++++++++++++++++++++++- src/elements/rule.rs | 9 +- src/headline.rs | 22 ++-- src/lib.rs | 2 + src/objects/emphasis.rs | 16 ++- src/objects/entity.rs | 2 +- src/objects/link.rs | 2 +- src/objects/macros.rs | 46 +++------ src/objects/mod.rs | 195 ++++++++++++++++++++++++++--------- src/objects/snippet.rs | 4 +- src/objects/target.rs | 44 ++++---- src/parser.rs | 223 +++++++++++++++++++++++++++++++++------- src/utils.rs | 80 ++++++++------ 16 files changed, 622 insertions(+), 229 deletions(-) create mode 100644 benches/parse.rs diff --git a/.gitignore b/.gitignore index 6936990..4ca2515 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ /target **/*.rs.bk Cargo.lock + +benches/*.org +.gdb_history +perf.data* diff --git a/benches/parse.rs b/benches/parse.rs new file mode 100644 index 0000000..c678c2f --- /dev/null +++ b/benches/parse.rs @@ -0,0 +1,31 @@ +#![feature(test)] + +extern crate org; +extern crate test; + +use org::Parser; +use test::Bencher; + +#[bench] +fn org_syntax(b: &mut Bencher) { + // wget https://orgmode.org/worg/sources/dev/org-syntax.org + b.iter(|| { + let _ = Parser::new(include_str!("org-syntax.org")).collect::>(); + }) +} + +#[bench] +fn doc(b: &mut Bencher) { + // wget https://orgmode.org/worg/sources/doc.org + b.iter(|| { + let _ = Parser::new(include_str!("doc.org")).collect::>(); + }) +} + +#[bench] +fn org_faq(b: &mut Bencher) { + // wget https://orgmode.org/worg/sources/org-faq.org + b.iter(|| { + let _ = Parser::new(include_str!("org-faq.org")).collect::>(); + }) +} diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs index 3df61f1..d58beb6 100644 --- a/src/elements/keyword.rs +++ b/src/elements/keyword.rs @@ -12,25 +12,13 @@ impl<'a> Keyword<'a> { let end = eol!(src); - if end == key + 1 { - Some(( - Keyword { - key: &src[2..key], - value: "", - }, - end, - )) - } else { - let space = position!(src, key + 1, |c| !c.is_ascii_whitespace()); - - Some(( - Keyword { - key: &src[2..key], - value: &src[space..end], - }, - end, - )) - } + Some(( + Keyword { + key: &src[2..key], + value: &src[key + 1..end].trim(), + }, + end, + )) } } diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 437e372..15455db 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -6,13 +6,150 @@ pub use self::fn_def::FnDef; pub use self::keyword::Keyword; pub use self::rule::Rule; +#[cfg_attr(test, derive(PartialEq, Debug))] pub enum Element<'a> { - Paragraph(&'a str), + Paragraph { + // end of the contents + end: usize, + // trailing space + trailing: usize, + }, + Keyword(Keyword<'a>), + FnDef(FnDef<'a>), + + Rule, + Comment(&'a str), } impl<'a> Element<'a> { - pub fn find_elem(src: &'a str) -> (Element<'a>, usize) { - // TODO - (Element::Paragraph(src), src.len()) + pub fn next_2(src: &'a str) -> (usize, Option>, Option<(Element<'a>, usize)>) { + let bytes = src.as_bytes(); + + let mut pos = skip_empty_line!(src, 0); + let start = pos; + + if start == src.len() { + return (start, None, None); + } + + loop { + if pos >= src.len() { + return ( + start, + Some(Element::Paragraph { + end: if bytes[pos - 1] == b'\n' { + pos - 1 + } else { + pos + }, + trailing: pos, + }), + None, + ); + } + + // TODO: refactor with src[..].find('\n') + if pos == start || bytes[pos - 1] == b'\n' { + // Unlike other element, footnote definition must starts at column 0 + if bytes[pos] == b'[' { + if let Some((fd, off)) = FnDef::parse(&src[pos..]) { + return if pos == start { + (off + 1, Some(Element::FnDef(fd)), None) + } else { + ( + start, + Some(Element::Paragraph { + end: if pos == start { pos } else { pos - 1 }, + trailing: pos, + }), + Some((Element::FnDef(fd), off + 1)), + ) + }; + } + } + + let end = pos; + pos = skip_space!(src, pos); + + if pos <= src.len() { + if bytes[pos] == b'\n' { + return ( + start, + Some(Element::Paragraph { + end: if pos == start { end } else { end - 1 }, + trailing: pos, + }), + None, + ); + } + + // TODO: LaTeX environment + if bytes[pos] == b'\\' {} + + // Rule + if bytes[pos] == b'-' { + if let Some(off) = Rule::parse(&src[pos..]) { + return if pos == start { + (off, Some(Element::Rule), None) + } else { + ( + start, + Some(Element::Paragraph { + end: if pos == start { end } else { end - 1 }, + trailing: pos, + }), + Some((Element::Rule, off)), + ) + }; + } + } + + if bytes[pos] == b'#' { + // Keyword + if bytes[pos + 1] == b'+' { + if let Some((kw, off)) = Keyword::parse(&src[pos..]) { + return if pos == start { + (off, Some(Element::Keyword(kw)), None) + } else { + ( + start, + Some(Element::Paragraph { + end: if pos == start { end } else { end - 1 }, + trailing: pos - 1, + }), + Some((Element::Keyword(kw), off)), + ) + }; + } + } + + // Comment + if src.as_bytes()[pos + 1] == b' ' { + let eol = eol!(src, pos); + return if pos == start { + (eol, Some(Element::Comment(&src[pos + 1..eol])), None) + } else { + ( + start, + Some(Element::Paragraph { + end: if pos == start { end } else { end - 1 }, + trailing: pos - 1, + }), + Some((Element::Comment(&src[pos + 1..eol]), eol)), + ) + }; + } + } + } + } + + pos += 1 + } } } + +#[test] +fn next_2() { + // TODO: more tests + assert_eq!(Element::next_2("\n\n\n\n"), (4, None, None)); +} diff --git a/src/elements/rule.rs b/src/elements/rule.rs index 4623941..ee1f2fc 100644 --- a/src/elements/rule.rs +++ b/src/elements/rule.rs @@ -1,3 +1,4 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] pub struct Rule; impl Rule { @@ -14,10 +15,10 @@ impl Rule { #[test] fn parse() { - assert!(Rule::parse("-----").is_some()); - assert!(Rule::parse("--------").is_some()); - assert!(Rule::parse(" -----").is_some()); - assert!(Rule::parse("\t\t-----").is_some()); + assert_eq!(Rule::parse("-----").unwrap(), "-----".len()); + assert_eq!(Rule::parse("--------").unwrap(), "--------".len()); + assert_eq!(Rule::parse(" -----").unwrap(), " -----".len()); + assert_eq!(Rule::parse("\t\t-----").unwrap(), "\t\t-----".len()); assert!(Rule::parse("").is_none()); assert!(Rule::parse("----").is_none()); diff --git a/src/headline.rs b/src/headline.rs index 755e27d..308143a 100644 --- a/src/headline.rs +++ b/src/headline.rs @@ -1,4 +1,4 @@ -#[derive(PartialEq, Debug)] +#[cfg_attr(test, derive(PartialEq, Debug))] pub struct Headline<'a> { pub level: usize, pub priority: Option, @@ -69,7 +69,7 @@ impl<'a> Headline<'a> { let eol = eol!(src); let end = Headline::find_level(&src[eol..], level) + eol; - let mut title_start = skip_whitespace!(src, level); + let mut title_start = skip_space!(src, level); let keyword = match Headline::parse_keyword(&src[title_start..eol]) { Some((k, l)) => { @@ -79,7 +79,7 @@ impl<'a> Headline<'a> { None => None, }; - title_start = skip_whitespace!(src, title_start); + title_start = skip_space!(src, title_start); let priority = match Headline::parse_priority(&src[title_start..eol]) { Some(p) => { @@ -89,7 +89,7 @@ impl<'a> Headline<'a> { None => None, }; - title_start = skip_whitespace!(src, title_start); + title_start = skip_space!(src, title_start); let (tags, title_off) = Headline::parse_tags(&src[title_start..eol]); @@ -112,35 +112,29 @@ impl<'a> Headline<'a> { // TODO: optimize pub fn find_level(src: &str, level: usize) -> usize { let mut pos = 0; - let end; 'outer: loop { if pos >= src.len() { - end = src.len(); - break; + return src.len(); } if src.as_bytes()[pos] == b'*' && (pos == 0 || src.as_bytes()[pos - 1] == b'\n') { let pos_ = pos; 'inner: loop { if pos >= src.len() { - end = src.len(); - break 'outer; + return src.len(); } if src.as_bytes()[pos] == b'*' { pos += 1; } else if src.as_bytes()[pos] == b' ' && pos - pos_ <= level { - end = pos_; - break 'outer; + return pos_; } else { break 'inner; } } } - pos += 1; + pos += 1 } - - end } pub fn is_commented(&self) -> bool { diff --git a/src/lib.rs b/src/lib.rs index 79e2992..e2940e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,3 +5,5 @@ mod elements; mod headline; mod objects; mod parser; + +pub use parser::Parser; diff --git a/src/objects/emphasis.rs b/src/objects/emphasis.rs index 4aae3e0..2c0d6c8 100644 --- a/src/objects/emphasis.rs +++ b/src/objects/emphasis.rs @@ -1,7 +1,8 @@ pub struct Emphasis; impl Emphasis { - pub fn parse(src: &str, marker: u8) -> Option<(&'_ str, usize)> { + // TODO: return usize instead of Option + pub fn parse(src: &str, marker: u8) -> Option { expect!(src, 1, |c: u8| !c.is_ascii_whitespace()); let mut lines = 0; @@ -23,24 +24,19 @@ impl Emphasis { || ch == b'!' || ch == b'?' || ch == b'\'' + || ch == b'\n' || ch == b')' || ch == b'}'); } - Some((&src[1..end], end + 1)) + Some(end - 1) } } #[test] fn parse() { - assert_eq!( - Emphasis::parse("*bold*", b'*').unwrap(), - ("bold", "*bold*".len()) - ); - assert_eq!( - Emphasis::parse("*bo\nld*", b'*').unwrap(), - ("bo\nld", "*bo\nld*".len()) - ); + assert_eq!(Emphasis::parse("*bold*", b'*').unwrap(), "bold".len()); + assert_eq!(Emphasis::parse("*bo\nld*", b'*').unwrap(), "bo\nld".len()); assert!(Emphasis::parse("*bold*a", b'*').is_none()); assert!(Emphasis::parse("*bold*", b'/').is_none()); assert!(Emphasis::parse("*bold *", b'*').is_none()); diff --git a/src/objects/entity.rs b/src/objects/entity.rs index 4c92ef9..1358bda 100644 --- a/src/objects/entity.rs +++ b/src/objects/entity.rs @@ -5,7 +5,7 @@ pub struct Entity<'a> { impl<'a> Entity<'a> { pub fn parse(src: &'a str) -> Option<(Entity<'a>, usize)> { - expect!(src, 0, b'\\'); + expect!(src, 0, b'\\')?; let name = position!(src, 1, |c| !c.is_ascii_alphabetic()); diff --git a/src/objects/link.rs b/src/objects/link.rs index 11796b6..de33b08 100644 --- a/src/objects/link.rs +++ b/src/objects/link.rs @@ -26,7 +26,7 @@ impl<'a> Link<'a> { && c != b'[' && c != b'\n'); - expect!(src, desc + 1, b']'); + expect!(src, desc + 1, b']')?; Some(( Link { diff --git a/src/objects/macros.rs b/src/objects/macros.rs index 4e35539..95553d9 100644 --- a/src/objects/macros.rs +++ b/src/objects/macros.rs @@ -5,7 +5,7 @@ pub struct Macros<'a> { } fn valid_name(ch: u8) -> bool { - ch.is_ascii_alphanumeric() || ch == b'-' && ch == b'_' + ch.is_ascii_alphanumeric() || ch == b'-' || ch == b'_' } impl<'a> Macros<'a> { @@ -17,8 +17,8 @@ impl<'a> Macros<'a> { let name = until_while!(src, 3, |c| c == b'}' || c == b'(', valid_name); if src.as_bytes()[name] == b'}' { - expect!(src, name + 1, b'}'); - expect!(src, name + 2, b'}'); + expect!(src, name + 1, b'}')?; + expect!(src, name + 2, b'}')?; Some(( Macros { name: &src[3..name], @@ -27,12 +27,12 @@ impl<'a> Macros<'a> { name + 3, )) } else { - let end = find!(src, name, "}}}"); - expect!(src, end - 1, b')'); + let end = &src[name..].find("}}}").map(|i| i + name)?; + expect!(src, end - 1, b')')?; Some(( Macros { name: &src[3..name], - args: if name == end { + args: if name == *end { None } else { Some(&src[name + 1..end - 1]) @@ -46,30 +46,12 @@ impl<'a> Macros<'a> { #[test] fn parse() { - assert_eq!( - Macros::parse("{{{poem(red,blue)}}}").unwrap(), - ( - Macros { - name: "poem", - args: Some("red,blue") - }, - "{{{poem(red,blue)}}}".len() - ) - ); - assert_eq!( - Macros::parse("{{{author}}}").unwrap(), - ( - Macros { - name: "author", - args: None, - }, - "{{{author}}}".len() - ) - ); - assert!(Macros::parse("{{author}}}").is_none()); - assert!(Macros::parse("{{{0uthor}}}").is_none()); - assert!(Macros::parse("{{{author}}").is_none()); - assert!(Macros::parse("{{{poem(}}}").is_none()); - assert!(Macros::parse("{{{poem)}}}").is_none()); - // FIXME: assert_eq!(Macros::parse("{{{poem())}}}"), None); + parse_succ!(Macros, "{{{poem(red,blue)}}}", name: "poem", args: Some("red,blue")); + parse_succ!(Macros, "{{{poem())}}}", name: "poem", args: Some(")")); + parse_succ!(Macros, "{{{author}}}", name: "author", args: None); + parse_fail!(Macros, "{{author}}}"); + parse_fail!(Macros, "{{{0uthor}}}"); + parse_fail!(Macros, "{{{author}}"); + parse_fail!(Macros, "{{{poem(}}}"); + parse_fail!(Macros, "{{{poem)}}}"); } diff --git a/src/objects/mod.rs b/src/objects/mod.rs index c3afcfb..4024960 100644 --- a/src/objects/mod.rs +++ b/src/objects/mod.rs @@ -20,20 +20,7 @@ pub use self::macros::Macros; pub use self::snippet::Snippet; pub use self::target::{RadioTarget, Target}; -const ACTIVE_TAB: [u8; 6] = [b' ', b'"', b'(', b'{', b'\'', b'\n']; - #[cfg_attr(test, derive(PartialEq, Debug))] -pub struct Objects<'a> { - text: &'a str, - off: usize, -} - -impl<'a> Objects<'a> { - pub fn new(text: &'a str) -> Objects<'a> { - Objects { text, off: 0 } - } -} - pub enum Object<'a> { Cookie(Cookie<'a>), FnRef(FnRef<'a>), @@ -45,47 +32,163 @@ pub enum Object<'a> { Snippet(Snippet<'a>), Target(Target<'a>), - Bold(&'a str), - Verbatim(&'a str), - Italic(&'a str), - Strike(&'a str), - Underline(&'a str), - Code(&'a str), + Bold { end: usize }, + Italic { end: usize }, + Strike { end: usize }, + Underline { end: usize }, + Verbatim(&'a str), + Code(&'a str), Text(&'a str), } impl<'a> Object<'a> { - pub fn parse(src: &'a str) -> (Object<'a>, usize) { - macro_rules! parse { - ($ty:ident) => { - $ty::parse(src).map(|(s, l)| (Object::$ty(s), l)) - }; + pub fn next_2(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)>) { + let bytes = src.as_bytes(); + + if src.len() < 2 { + return (Object::Text(src), src.len(), None); } - macro_rules! parse_emphasis { - ($mk:tt, $ty:ident) => { - Emphasis::parse(src, $mk).map(|(s, l)| (Object::$ty(s), l)) - }; + // TODO: refactor with src[..].find(..) + for pos in 0..src.len() - 2 { + macro_rules! parse { + ($obj:ident) => { + if let Some((obj, off)) = $obj::parse(&src[pos..]) { + return if pos == 0 { + (Object::$obj(obj), off, None) + } else { + ( + Object::Text(&src[0..pos]), + pos, + Some((Object::$obj(obj), off)), + ) + }; + } + }; + } + + let first = bytes[pos]; + let second = bytes[pos + 1]; + let third = bytes[pos + 2]; + + if first == b'@' && second == b'@' { + parse!(Snippet); + } + + if first == b'[' { + if second == b'f' && third == b'n' { + parse!(FnRef); + } else if second == b'[' { + parse!(Link); + } else { + parse!(Cookie); + // TODO: Timestamp + } + } + + if first == b'{' && second == b'{' && third == b'{' { + parse!(Macros); + } + + if first == b'<' && second == b'<' { + if third == b'<' { + parse!(RadioTarget); + } else if third != b'<' && third != b'\n' { + parse!(Target); + } + } + + if pos == 0 + || bytes[pos - 1] == b' ' + || bytes[pos - 1] == b'"' + || bytes[pos - 1] == b'(' + || bytes[pos - 1] == b',' + || bytes[pos - 1] == b'\n' + || bytes[pos - 1] == b'{' + { + if (first == b'*' + || first == b'+' + || first == b'/' + || first == b'=' + || first == b'_' + || first == b'~') + && !second.is_ascii_whitespace() + { + if let Some(end) = Emphasis::parse(&src[pos..], first).map(|i| i + pos) { + macro_rules! emph { + ($obj:ident) => { + return if pos == 0 { + (Object::$obj { end }, 1, None) + } else { + ( + Object::Text(&src[0..pos]), + pos, + Some((Object::$obj { end }, end)), + ) + }; + }; + } + + match first { + b'*' => emph!(Bold), + b'+' => emph!(Strike), + b'/' => emph!(Italic), + b'_' => emph!(Underline), + b'~' => { + return if pos == 0 { + (Object::Code(&src[1..end + 1]), end + 2, None) + } else { + ( + Object::Text(&src[0..pos]), + pos, + Some((Object::Code(&src[pos + 1..end + 1]), end - pos + 2)), + ) + }; + } + b'=' => { + return if pos == 0 { + (Object::Verbatim(&src[1..end + 1]), end + 2, None) + } else { + ( + Object::Text(&src[0..pos]), + pos, + Some(( + Object::Verbatim(&src[pos + 1..end + 1]), + end - pos + 2, + )), + ) + }; + } + _ => unreachable!(), + } + } + } + + if first == b'c' && second == b'a' && third == b'l' { + parse!(InlineCall); + } + + if first == b's' && second == b'r' && third == b'c' { + parse!(InlineSrc); + } + } } - (match src.as_bytes()[0] { - b'@' => parse!(Snippet), - b'[' => parse!(FnRef) - .or_else(|| parse!(Link)) - .or_else(|| parse!(Cookie)), - b's' => parse!(InlineSrc), - b'c' => parse!(InlineCall), - b'{' => parse!(Macros), - b'<' => parse!(RadioTarget).or_else(|| parse!(Target)), - b'*' => parse_emphasis!(b'*', Bold), - b'=' => parse_emphasis!(b'=', Verbatim), - b'/' => parse_emphasis!(b'/', Italic), - b'+' => parse_emphasis!(b'+', Strike), - b'_' => parse_emphasis!(b'_', Underline), - b'~' => parse_emphasis!(b'~', Code), - _ => None, - }) - .unwrap_or((Object::Text(&src[0..1]), 1)) + (Object::Text(src), src.len(), None) } } + +#[test] +fn next_2() { + // TODO: more tests + assert_eq!(Object::next_2("*bold*"), (Object::Bold { end: 4 }, 1, None)); + assert_eq!( + Object::next_2("Normal =verbatim="), + ( + Object::Text("Normal "), + "Normal ".len(), + Some((Object::Verbatim("verbatim"), "=verbatim=".len())) + ) + ); +} diff --git a/src/objects/snippet.rs b/src/objects/snippet.rs index aa6e68f..6f8897a 100644 --- a/src/objects/snippet.rs +++ b/src/objects/snippet.rs @@ -14,12 +14,12 @@ impl<'a> Snippet<'a> { return None; } - let end = find!(src, name + 1, "@@"); + let end = &src[name + 1..].find("@@").map(|i| i + name + 1)?; Some(( Snippet { name: &src[2..name], - value: &src[name + 1..end], + value: &src[name + 1..*end], }, end + 2, )) diff --git a/src/objects/target.rs b/src/objects/target.rs index 2131e96..2f8bf92 100644 --- a/src/objects/target.rs +++ b/src/objects/target.rs @@ -1,8 +1,6 @@ -use objects::Objects; - #[cfg_attr(test, derive(PartialEq, Debug))] // TODO: text-markup, entities, latex-fragments, subscript and superscript -pub struct RadioTarget<'a>(Objects<'a>); +pub struct RadioTarget<'a>(&'a str); impl<'a> RadioTarget<'a> { pub fn parse(src: &'a str) -> Option<(RadioTarget<'a>, usize)> { @@ -12,10 +10,10 @@ impl<'a> RadioTarget<'a> { let end = until_while!(src, 3, b'>', |c| c != b'<' && c != b'\n'); expect!(src, end - 1, |c| c != b' '); - expect!(src, end + 1, b'>'); - expect!(src, end + 2, b'>'); + expect!(src, end + 1, b'>')?; + expect!(src, end + 2, b'>')?; - Some((RadioTarget(Objects::new(&src[3..end])), end + 3)) + Some((RadioTarget(&src[3..end]), end + 3)) } } @@ -30,7 +28,7 @@ impl<'a> Target<'a> { let end = until_while!(src, 2, b'>', |c| c != b'<' && c != b'\n'); expect!(src, end - 1, |c| c != b' '); - expect!(src, end + 1, b'>'); + expect!(src, end + 1, b'>')?; Some((Target(&src[2..end]), end + 2)) } @@ -40,19 +38,19 @@ impl<'a> Target<'a> { fn parse() { assert_eq!( RadioTarget::parse("<<>>").unwrap(), - (RadioTarget(Objects::new("target")), "<<>>".len()) + (RadioTarget("target"), "<<>>".len()) ); assert_eq!( RadioTarget::parse("<<>>").unwrap(), - (RadioTarget(Objects::new("tar get")), "<<>>".len()) + (RadioTarget("tar get"), "<<>>".len()) ); - assert!(RadioTarget::parse("<<>>").is_none()); - assert!(RadioTarget::parse("<<< target>>>").is_none()); - assert!(RadioTarget::parse("<<>>").is_none()); - assert!(RadioTarget::parse("<<get>>>").is_none()); - assert!(RadioTarget::parse("<<>>").is_none()); - assert!(RadioTarget::parse("<>>").is_none()); - assert!(RadioTarget::parse("<<>").is_none()); + parse_fail!(RadioTarget, "<<>>"); + parse_fail!(RadioTarget, "<<< target>>>"); + parse_fail!(RadioTarget, "<<>>"); + parse_fail!(RadioTarget, "<<get>>>"); + parse_fail!(RadioTarget, "<<>>"); + parse_fail!(RadioTarget, "<>>"); + parse_fail!(RadioTarget, "<<>"); assert_eq!( Target::parse("<>").unwrap(), @@ -62,11 +60,11 @@ fn parse() { Target::parse("<>").unwrap(), (Target("tar get"), "<>".len()) ); - assert!(Target::parse("<>").is_none()); - assert!(Target::parse("<< target>>").is_none()); - assert!(Target::parse("<>").is_none()); - assert!(Target::parse("<get>>").is_none()); - assert!(Target::parse("<>").is_none()); - assert!(Target::parse(">").is_none()); - assert!(Target::parse("<").is_none()); + parse_fail!(Target, "<>"); + parse_fail!(Target, "<< target>>"); + parse_fail!(Target, "<>"); + parse_fail!(Target, "<get>>"); + parse_fail!(Target, "<>"); + parse_fail!(Target, ">"); + parse_fail!(Target, "<"); } diff --git a/src/parser.rs b/src/parser.rs index c0b66a5..ff5f32c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,18 +5,21 @@ use objects::*; #[cfg_attr(test, derive(PartialEq))] #[derive(Copy, Clone, Debug)] pub enum Container { - Block, - Bold, - Drawer, Headline { beg: usize, end: usize }, - Italic, + Section { end: usize }, + + Paragraph { end: usize, trailing: usize }, + + Block, + Drawer, LatexEnv, List, - Paragraph, - Section { end: usize }, - StrikeThrough, Table, - Underline, + + Italic { end: usize }, + Strike { end: usize }, + Bold { end: usize }, + Underline { end: usize }, } #[cfg_attr(test, derive(PartialEq, Debug))] @@ -27,7 +30,9 @@ pub enum Event<'a> { StartSection, EndSection, - Paragraph, + StartParagraph, + EndParagraph, + BlockStart, BlockEnd, DynBlockStart, @@ -43,17 +48,17 @@ pub enum Event<'a> { Clock, - Comment, + Comment(&'a str), TableStart, TableEnd, TableCell, LatexEnv, - StrikeThrough, FnDef(FnDef<'a>), Keyword(Keyword<'a>), Rule, + Cookie(Cookie<'a>), FnRef(FnRef<'a>), InlineCall(InlineCall<'a>), @@ -63,13 +68,18 @@ pub enum Event<'a> { RadioTarget(RadioTarget<'a>), Snippet(Snippet<'a>), Target(Target<'a>), - Bold(&'a str), - Verbatim(&'a str), - Italic(&'a str), - Strike(&'a str), - Underline(&'a str), - Code(&'a str), + StartBold, + EndBold, + StartItalic, + EndItalic, + StartStrike, + EndStrike, + StartUnderline, + EndUnderline, + + Verbatim(&'a str), + Code(&'a str), Text(&'a str), } @@ -77,6 +87,8 @@ pub struct Parser<'a> { text: &'a str, stack: Vec, off: usize, + ele_buf: Option<(Element<'a>, usize)>, + obj_buf: Option<(Object<'a>, usize)>, } impl<'a> Parser<'a> { @@ -85,6 +97,8 @@ impl<'a> Parser<'a> { text, stack: Vec::new(), off: 0, + ele_buf: None, + obj_buf: None, } } @@ -100,11 +114,6 @@ impl<'a> Parser<'a> { } } - fn end_section(&mut self) -> Event<'a> { - self.stack.pop(); - Event::EndSection - } - fn start_headline(&mut self, tail: &'a str) -> Event<'a> { let (hdl, off, end) = Headline::parse(tail); self.stack.push(Container::Headline { @@ -115,9 +124,71 @@ impl<'a> Parser<'a> { Event::StartHeadline(hdl) } - fn end_headline(&mut self) -> Event<'a> { - self.stack.pop(); - Event::EndHeadline + fn next_ele(&mut self, end: usize) -> Event<'a> { + let (ele, off) = if let Some((ele, off)) = std::mem::replace(&mut self.ele_buf, None) { + (Some(ele), off) + } else { + let (off, ele, next_2) = Element::next_2(&self.text[self.off..end]); + self.ele_buf = next_2; + (ele, off) + }; + + self.off += off; + + if let Some(ele) = ele { + if let Element::Paragraph { end, trailing } = ele { + self.stack.push(Container::Paragraph { + end: end + self.off - off, + trailing: trailing + self.off - off, + }); + } + ele.into() + } else { + self.end() + } + } + + fn next_obj(&mut self, end: usize) -> Event<'a> { + let (obj, off) = if let Some((obj, off)) = std::mem::replace(&mut self.obj_buf, None) { + (obj, off) + } else { + let (obj, off, next_2) = Object::next_2(&self.text[self.off..end]); + self.obj_buf = next_2; + (obj, off) + }; + + self.off += off; + + match obj { + Object::Underline { end } => self.stack.push(Container::Underline { + end: self.off + end, + }), + Object::Strike { end } => self.stack.push(Container::Strike { + end: self.off + end, + }), + Object::Italic { end } => self.stack.push(Container::Italic { + end: self.off + end, + }), + Object::Bold { end } => self.stack.push(Container::Bold { + end: self.off + end, + }), + _ => (), + } + + obj.into() + } + + fn end(&mut self) -> Event<'a> { + match self.stack.pop().unwrap() { + Container::Paragraph { .. } => Event::EndParagraph, + Container::Underline { .. } => Event::EndUnderline, + Container::Section { .. } => Event::EndSection, + Container::Strike { .. } => Event::EndStrike, + Container::Headline { .. } => Event::EndHeadline, + Container::Italic { .. } => Event::EndItalic, + Container::Bold { .. } => Event::EndBold, + _ => unimplemented!(), + } } } @@ -139,7 +210,7 @@ impl<'a> Iterator for Parser<'a> { Some(match last { Container::Headline { beg, end } => { if self.off >= end { - self.end_headline() + self.end() } else if self.off == beg { self.start_section_or_headline(tail) } else { @@ -148,14 +219,28 @@ impl<'a> Iterator for Parser<'a> { } Container::Section { end } => { if self.off >= end { - self.end_section() + self.end() } else { - match Element::find_elem(&self.text[self.off..end]) { - (Element::Paragraph(_), off) => { - self.off += off; - Event::Paragraph - } - } + self.next_ele(end) + } + } + Container::Paragraph { end, trailing } => { + if self.off >= end { + self.off = trailing; + self.end() + } else { + self.next_obj(end) + } + } + Container::Bold { end } + | Container::Underline { end } + | Container::Italic { end } + | Container::Strike { end } => { + if self.off >= end { + self.off += 1; + self.end() + } else { + self.next_obj(end) } } _ => unimplemented!(), @@ -164,6 +249,41 @@ impl<'a> Iterator for Parser<'a> { } } +impl<'a> From> for Event<'a> { + fn from(obj: Object<'a>) -> Self { + match obj { + Object::Bold { .. } => Event::StartBold, + Object::Code(c) => Event::Code(c), + Object::Cookie(c) => Event::Cookie(c), + Object::FnRef(f) => Event::FnRef(f), + Object::InlineCall(i) => Event::InlineCall(i), + Object::InlineSrc(i) => Event::InlineSrc(i), + Object::Italic { .. } => Event::StartItalic, + Object::Link(l) => Event::Link(l), + Object::Macros(m) => Event::Macros(m), + Object::RadioTarget(r) => Event::RadioTarget(r), + Object::Snippet(s) => Event::Snippet(s), + Object::Strike { .. } => Event::StartStrike, + Object::Target(t) => Event::Target(t), + Object::Text(t) => Event::Text(t), + Object::Underline { .. } => Event::StartUnderline, + Object::Verbatim(v) => Event::Verbatim(v), + } + } +} + +impl<'a> From> for Event<'a> { + fn from(ele: Element<'a>) -> Self { + match ele { + Element::Comment(c) => Event::Comment(c), + Element::FnDef(fd) => Event::FnDef(fd), + Element::Keyword(kw) => Event::Keyword(kw), + Element::Paragraph { .. } => Event::StartParagraph, + Element::Rule => Event::Rule, + } + } +} + #[test] fn parse() { use self::Event::*; @@ -171,29 +291,52 @@ fn parse() { let expected = vec![ StartHeadline(Headline::new(1, None, None, "Title 1", None)), StartSection, - Paragraph, + StartParagraph, + StartBold, + Text("Section 1"), + EndBold, + EndParagraph, EndSection, StartHeadline(Headline::new(2, None, None, "Title 2", None)), StartSection, - Paragraph, + StartParagraph, + StartUnderline, + Text("Section 2"), + EndUnderline, + EndParagraph, EndSection, EndHeadline, EndHeadline, StartHeadline(Headline::new(1, None, None, "Title 3", None)), StartSection, - Paragraph, + StartParagraph, + StartItalic, + Text("Section 3"), + EndItalic, + EndParagraph, EndSection, EndHeadline, - StartHeadline(Headline::new(1, None, None, "Title 4 ", None)), + StartHeadline(Headline::new(1, None, None, "Title 4", None)), StartSection, - Paragraph, + StartParagraph, + Verbatim("Section 4"), + EndParagraph, EndSection, EndHeadline, ]; assert_eq!( - Parser::new("* Title 1\nSection 1\n** Title 2\nSection 2\n* Title 3\nSection 3\n* Title 4 \nSection 4") - .collect::>(), + Parser::new( + r#"* Title 1 +*Section 1* +** Title 2 +_Section 2_ +* Title 3 +/Section 3/ +* Title 4 +=Section 4="# + ) + .collect::>(), expected ); } diff --git a/src/utils.rs b/src/utils.rs index b869456..ee8662a 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,11 +3,10 @@ #[macro_export] macro_rules! expect { ($src:ident, $index:expr, $expect:tt) => { - if $index >= $src.len() || $src.as_bytes()[$index] != $expect { - return None; - } + $src.as_bytes().get($index).filter(|&b| b == &$expect) }; ($src:ident, $index:expr, $expect:expr) => { + // $src.as_bytes().get($index).filter($expect) if $index >= $src.len() || !$expect($src.as_bytes()[$index]) { return None; } @@ -16,16 +15,15 @@ macro_rules! expect { #[macro_export] macro_rules! eol { - ($src:expr) => {{ - let mut pos = 0; - while pos < $src.len() { - if $src.as_bytes()[pos] == b'\n' { - break; - } - pos += 1; - } - pos - }}; + ($src:expr) => { + $src.find('\n').unwrap_or($src.len()) + }; + ($src:expr, $from:expr) => { + $src[$from..] + .find('\n') + .map(|i| i + $from) + .unwrap_or($src.len()) + }; } #[macro_export] @@ -105,7 +103,7 @@ macro_rules! until_while { #[macro_export] macro_rules! cond_eq { ($s:ident, $i:expr, $p:expr) => { - if $i > $s.len() { + if $i >= $s.len() { return None; } else { $s.as_bytes()[$i] == $p @@ -123,16 +121,6 @@ macro_rules! position { }; } -#[macro_export] -macro_rules! find { - ($s:ident, $i:expr, $p:expr) => { - match $s[$i..].find($p) { - Some(x) => x + $i, - None => return None, - } - }; -} - #[macro_export] macro_rules! starts_with { ($s:ident, $p:expr) => { @@ -143,18 +131,44 @@ macro_rules! starts_with { } #[macro_export] -macro_rules! next_line { - ($s:ident, $p:expr) => { - self.chars().position(|c| c == ch).unwrap_or(self.len()) - if !$s.starts_with($p) { - return None; - } +macro_rules! skip_space { + ($src:ident, $from:expr) => { + until!($src[$from..], |c| c != b' ').unwrap_or(0) + $from }; } #[macro_export] -macro_rules! skip_whitespace { - ($src:ident, $from:ident) => { - until!($src[$from..], |c| c != b' ').unwrap_or(0) + $from +macro_rules! skip_empty_line { + ($src:ident, $from:expr) => {{ + let mut pos = $from; + while pos < $src.len() { + if $src.as_bytes()[pos] != b'\n' { + break; + } + pos += 1; + } + pos + }}; +} + +#[macro_export] +macro_rules! parse_fail { + ($ty:ident, $src:expr) => { + assert_eq!($ty::parse($src), None); + }; +} + +#[macro_export] +macro_rules! parse_succ { + ($ty:ident, $src:expr, $($field:ident : $value:expr),* ) => { + assert_eq!( + $ty::parse($src), + Some(( + $ty { + $( $field : $value ),* + }, + $src.len() + )), + ); }; }