This commit is contained in:
PoiScript 2019-01-10 20:58:13 +08:00
parent a85efe2056
commit 6f7fa9c920
16 changed files with 622 additions and 229 deletions

4
.gitignore vendored
View file

@ -1,3 +1,7 @@
/target /target
**/*.rs.bk **/*.rs.bk
Cargo.lock Cargo.lock
benches/*.org
.gdb_history
perf.data*

31
benches/parse.rs Normal file
View file

@ -0,0 +1,31 @@
#![feature(test)]
extern crate org;
extern crate test;
use org::Parser;
use test::Bencher;
#[bench]
fn org_syntax(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/dev/org-syntax.org
b.iter(|| {
let _ = Parser::new(include_str!("org-syntax.org")).collect::<Vec<_>>();
})
}
#[bench]
fn doc(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/doc.org
b.iter(|| {
let _ = Parser::new(include_str!("doc.org")).collect::<Vec<_>>();
})
}
#[bench]
fn org_faq(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/org-faq.org
b.iter(|| {
let _ = Parser::new(include_str!("org-faq.org")).collect::<Vec<_>>();
})
}

View file

@ -12,25 +12,13 @@ impl<'a> Keyword<'a> {
let end = eol!(src); let end = eol!(src);
if end == key + 1 {
Some(( Some((
Keyword { Keyword {
key: &src[2..key], key: &src[2..key],
value: "", value: &src[key + 1..end].trim(),
}, },
end, end,
)) ))
} else {
let space = position!(src, key + 1, |c| !c.is_ascii_whitespace());
Some((
Keyword {
key: &src[2..key],
value: &src[space..end],
},
end,
))
}
} }
} }

View file

@ -6,13 +6,150 @@ pub use self::fn_def::FnDef;
pub use self::keyword::Keyword; pub use self::keyword::Keyword;
pub use self::rule::Rule; pub use self::rule::Rule;
#[cfg_attr(test, derive(PartialEq, Debug))]
pub enum Element<'a> { pub enum Element<'a> {
Paragraph(&'a str), Paragraph {
// end of the contents
end: usize,
// trailing space
trailing: usize,
},
Keyword(Keyword<'a>),
FnDef(FnDef<'a>),
Rule,
Comment(&'a str),
} }
impl<'a> Element<'a> { impl<'a> Element<'a> {
pub fn find_elem(src: &'a str) -> (Element<'a>, usize) { pub fn next_2(src: &'a str) -> (usize, Option<Element<'a>>, Option<(Element<'a>, usize)>) {
// TODO let bytes = src.as_bytes();
(Element::Paragraph(src), src.len())
let mut pos = skip_empty_line!(src, 0);
let start = pos;
if start == src.len() {
return (start, None, None);
}
loop {
if pos >= src.len() {
return (
start,
Some(Element::Paragraph {
end: if bytes[pos - 1] == b'\n' {
pos - 1
} else {
pos
},
trailing: pos,
}),
None,
);
}
// TODO: refactor with src[..].find('\n')
if pos == start || bytes[pos - 1] == b'\n' {
// Unlike other element, footnote definition must starts at column 0
if bytes[pos] == b'[' {
if let Some((fd, off)) = FnDef::parse(&src[pos..]) {
return if pos == start {
(off + 1, Some(Element::FnDef(fd)), None)
} else {
(
start,
Some(Element::Paragraph {
end: if pos == start { pos } else { pos - 1 },
trailing: pos,
}),
Some((Element::FnDef(fd), off + 1)),
)
};
} }
} }
let end = pos;
pos = skip_space!(src, pos);
if pos <= src.len() {
if bytes[pos] == b'\n' {
return (
start,
Some(Element::Paragraph {
end: if pos == start { end } else { end - 1 },
trailing: pos,
}),
None,
);
}
// TODO: LaTeX environment
if bytes[pos] == b'\\' {}
// Rule
if bytes[pos] == b'-' {
if let Some(off) = Rule::parse(&src[pos..]) {
return if pos == start {
(off, Some(Element::Rule), None)
} else {
(
start,
Some(Element::Paragraph {
end: if pos == start { end } else { end - 1 },
trailing: pos,
}),
Some((Element::Rule, off)),
)
};
}
}
if bytes[pos] == b'#' {
// Keyword
if bytes[pos + 1] == b'+' {
if let Some((kw, off)) = Keyword::parse(&src[pos..]) {
return if pos == start {
(off, Some(Element::Keyword(kw)), None)
} else {
(
start,
Some(Element::Paragraph {
end: if pos == start { end } else { end - 1 },
trailing: pos - 1,
}),
Some((Element::Keyword(kw), off)),
)
};
}
}
// Comment
if src.as_bytes()[pos + 1] == b' ' {
let eol = eol!(src, pos);
return if pos == start {
(eol, Some(Element::Comment(&src[pos + 1..eol])), None)
} else {
(
start,
Some(Element::Paragraph {
end: if pos == start { end } else { end - 1 },
trailing: pos - 1,
}),
Some((Element::Comment(&src[pos + 1..eol]), eol)),
)
};
}
}
}
}
pos += 1
}
}
}
#[test]
fn next_2() {
// TODO: more tests
assert_eq!(Element::next_2("\n\n\n\n"), (4, None, None));
}

View file

@ -1,3 +1,4 @@
#[cfg_attr(test, derive(PartialEq, Debug))]
pub struct Rule; pub struct Rule;
impl Rule { impl Rule {
@ -14,10 +15,10 @@ impl Rule {
#[test] #[test]
fn parse() { fn parse() {
assert!(Rule::parse("-----").is_some()); assert_eq!(Rule::parse("-----").unwrap(), "-----".len());
assert!(Rule::parse("--------").is_some()); assert_eq!(Rule::parse("--------").unwrap(), "--------".len());
assert!(Rule::parse(" -----").is_some()); assert_eq!(Rule::parse(" -----").unwrap(), " -----".len());
assert!(Rule::parse("\t\t-----").is_some()); assert_eq!(Rule::parse("\t\t-----").unwrap(), "\t\t-----".len());
assert!(Rule::parse("").is_none()); assert!(Rule::parse("").is_none());
assert!(Rule::parse("----").is_none()); assert!(Rule::parse("----").is_none());

View file

@ -1,4 +1,4 @@
#[derive(PartialEq, Debug)] #[cfg_attr(test, derive(PartialEq, Debug))]
pub struct Headline<'a> { pub struct Headline<'a> {
pub level: usize, pub level: usize,
pub priority: Option<char>, pub priority: Option<char>,
@ -69,7 +69,7 @@ impl<'a> Headline<'a> {
let eol = eol!(src); let eol = eol!(src);
let end = Headline::find_level(&src[eol..], level) + eol; let end = Headline::find_level(&src[eol..], level) + eol;
let mut title_start = skip_whitespace!(src, level); let mut title_start = skip_space!(src, level);
let keyword = match Headline::parse_keyword(&src[title_start..eol]) { let keyword = match Headline::parse_keyword(&src[title_start..eol]) {
Some((k, l)) => { Some((k, l)) => {
@ -79,7 +79,7 @@ impl<'a> Headline<'a> {
None => None, None => None,
}; };
title_start = skip_whitespace!(src, title_start); title_start = skip_space!(src, title_start);
let priority = match Headline::parse_priority(&src[title_start..eol]) { let priority = match Headline::parse_priority(&src[title_start..eol]) {
Some(p) => { Some(p) => {
@ -89,7 +89,7 @@ impl<'a> Headline<'a> {
None => None, None => None,
}; };
title_start = skip_whitespace!(src, title_start); title_start = skip_space!(src, title_start);
let (tags, title_off) = Headline::parse_tags(&src[title_start..eol]); let (tags, title_off) = Headline::parse_tags(&src[title_start..eol]);
@ -112,35 +112,29 @@ impl<'a> Headline<'a> {
// TODO: optimize // TODO: optimize
pub fn find_level(src: &str, level: usize) -> usize { pub fn find_level(src: &str, level: usize) -> usize {
let mut pos = 0; let mut pos = 0;
let end;
'outer: loop { 'outer: loop {
if pos >= src.len() { if pos >= src.len() {
end = src.len(); return src.len();
break;
} }
if src.as_bytes()[pos] == b'*' && (pos == 0 || src.as_bytes()[pos - 1] == b'\n') { if src.as_bytes()[pos] == b'*' && (pos == 0 || src.as_bytes()[pos - 1] == b'\n') {
let pos_ = pos; let pos_ = pos;
'inner: loop { 'inner: loop {
if pos >= src.len() { if pos >= src.len() {
end = src.len(); return src.len();
break 'outer;
} }
if src.as_bytes()[pos] == b'*' { if src.as_bytes()[pos] == b'*' {
pos += 1; pos += 1;
} else if src.as_bytes()[pos] == b' ' && pos - pos_ <= level { } else if src.as_bytes()[pos] == b' ' && pos - pos_ <= level {
end = pos_; return pos_;
break 'outer;
} else { } else {
break 'inner; break 'inner;
} }
} }
} }
pos += 1; pos += 1
} }
end
} }
pub fn is_commented(&self) -> bool { pub fn is_commented(&self) -> bool {

View file

@ -5,3 +5,5 @@ mod elements;
mod headline; mod headline;
mod objects; mod objects;
mod parser; mod parser;
pub use parser::Parser;

View file

@ -1,7 +1,8 @@
pub struct Emphasis; pub struct Emphasis;
impl Emphasis { impl Emphasis {
pub fn parse(src: &str, marker: u8) -> Option<(&'_ str, usize)> { // TODO: return usize instead of Option<usize>
pub fn parse(src: &str, marker: u8) -> Option<usize> {
expect!(src, 1, |c: u8| !c.is_ascii_whitespace()); expect!(src, 1, |c: u8| !c.is_ascii_whitespace());
let mut lines = 0; let mut lines = 0;
@ -23,24 +24,19 @@ impl Emphasis {
|| ch == b'!' || ch == b'!'
|| ch == b'?' || ch == b'?'
|| ch == b'\'' || ch == b'\''
|| ch == b'\n'
|| ch == b')' || ch == b')'
|| ch == b'}'); || ch == b'}');
} }
Some((&src[1..end], end + 1)) Some(end - 1)
} }
} }
#[test] #[test]
fn parse() { fn parse() {
assert_eq!( assert_eq!(Emphasis::parse("*bold*", b'*').unwrap(), "bold".len());
Emphasis::parse("*bold*", b'*').unwrap(), assert_eq!(Emphasis::parse("*bo\nld*", b'*').unwrap(), "bo\nld".len());
("bold", "*bold*".len())
);
assert_eq!(
Emphasis::parse("*bo\nld*", b'*').unwrap(),
("bo\nld", "*bo\nld*".len())
);
assert!(Emphasis::parse("*bold*a", b'*').is_none()); assert!(Emphasis::parse("*bold*a", b'*').is_none());
assert!(Emphasis::parse("*bold*", b'/').is_none()); assert!(Emphasis::parse("*bold*", b'/').is_none());
assert!(Emphasis::parse("*bold *", b'*').is_none()); assert!(Emphasis::parse("*bold *", b'*').is_none());

View file

@ -5,7 +5,7 @@ pub struct Entity<'a> {
impl<'a> Entity<'a> { impl<'a> Entity<'a> {
pub fn parse(src: &'a str) -> Option<(Entity<'a>, usize)> { pub fn parse(src: &'a str) -> Option<(Entity<'a>, usize)> {
expect!(src, 0, b'\\'); expect!(src, 0, b'\\')?;
let name = position!(src, 1, |c| !c.is_ascii_alphabetic()); let name = position!(src, 1, |c| !c.is_ascii_alphabetic());

View file

@ -26,7 +26,7 @@ impl<'a> Link<'a> {
&& c != b'[' && c != b'['
&& c != b'\n'); && c != b'\n');
expect!(src, desc + 1, b']'); expect!(src, desc + 1, b']')?;
Some(( Some((
Link { Link {

View file

@ -5,7 +5,7 @@ pub struct Macros<'a> {
} }
fn valid_name(ch: u8) -> bool { fn valid_name(ch: u8) -> bool {
ch.is_ascii_alphanumeric() || ch == b'-' && ch == b'_' ch.is_ascii_alphanumeric() || ch == b'-' || ch == b'_'
} }
impl<'a> Macros<'a> { impl<'a> Macros<'a> {
@ -17,8 +17,8 @@ impl<'a> Macros<'a> {
let name = until_while!(src, 3, |c| c == b'}' || c == b'(', valid_name); let name = until_while!(src, 3, |c| c == b'}' || c == b'(', valid_name);
if src.as_bytes()[name] == b'}' { if src.as_bytes()[name] == b'}' {
expect!(src, name + 1, b'}'); expect!(src, name + 1, b'}')?;
expect!(src, name + 2, b'}'); expect!(src, name + 2, b'}')?;
Some(( Some((
Macros { Macros {
name: &src[3..name], name: &src[3..name],
@ -27,12 +27,12 @@ impl<'a> Macros<'a> {
name + 3, name + 3,
)) ))
} else { } else {
let end = find!(src, name, "}}}"); let end = &src[name..].find("}}}").map(|i| i + name)?;
expect!(src, end - 1, b')'); expect!(src, end - 1, b')')?;
Some(( Some((
Macros { Macros {
name: &src[3..name], name: &src[3..name],
args: if name == end { args: if name == *end {
None None
} else { } else {
Some(&src[name + 1..end - 1]) Some(&src[name + 1..end - 1])
@ -46,30 +46,12 @@ impl<'a> Macros<'a> {
#[test] #[test]
fn parse() { fn parse() {
assert_eq!( parse_succ!(Macros, "{{{poem(red,blue)}}}", name: "poem", args: Some("red,blue"));
Macros::parse("{{{poem(red,blue)}}}").unwrap(), parse_succ!(Macros, "{{{poem())}}}", name: "poem", args: Some(")"));
( parse_succ!(Macros, "{{{author}}}", name: "author", args: None);
Macros { parse_fail!(Macros, "{{author}}}");
name: "poem", parse_fail!(Macros, "{{{0uthor}}}");
args: Some("red,blue") parse_fail!(Macros, "{{{author}}");
}, parse_fail!(Macros, "{{{poem(}}}");
"{{{poem(red,blue)}}}".len() parse_fail!(Macros, "{{{poem)}}}");
)
);
assert_eq!(
Macros::parse("{{{author}}}").unwrap(),
(
Macros {
name: "author",
args: None,
},
"{{{author}}}".len()
)
);
assert!(Macros::parse("{{author}}}").is_none());
assert!(Macros::parse("{{{0uthor}}}").is_none());
assert!(Macros::parse("{{{author}}").is_none());
assert!(Macros::parse("{{{poem(}}}").is_none());
assert!(Macros::parse("{{{poem)}}}").is_none());
// FIXME: assert_eq!(Macros::parse("{{{poem())}}}"), None);
} }

View file

@ -20,20 +20,7 @@ pub use self::macros::Macros;
pub use self::snippet::Snippet; pub use self::snippet::Snippet;
pub use self::target::{RadioTarget, Target}; pub use self::target::{RadioTarget, Target};
const ACTIVE_TAB: [u8; 6] = [b' ', b'"', b'(', b'{', b'\'', b'\n'];
#[cfg_attr(test, derive(PartialEq, Debug))] #[cfg_attr(test, derive(PartialEq, Debug))]
pub struct Objects<'a> {
text: &'a str,
off: usize,
}
impl<'a> Objects<'a> {
pub fn new(text: &'a str) -> Objects<'a> {
Objects { text, off: 0 }
}
}
pub enum Object<'a> { pub enum Object<'a> {
Cookie(Cookie<'a>), Cookie(Cookie<'a>),
FnRef(FnRef<'a>), FnRef(FnRef<'a>),
@ -45,47 +32,163 @@ pub enum Object<'a> {
Snippet(Snippet<'a>), Snippet(Snippet<'a>),
Target(Target<'a>), Target(Target<'a>),
Bold(&'a str), Bold { end: usize },
Verbatim(&'a str), Italic { end: usize },
Italic(&'a str), Strike { end: usize },
Strike(&'a str), Underline { end: usize },
Underline(&'a str),
Code(&'a str),
Verbatim(&'a str),
Code(&'a str),
Text(&'a str), Text(&'a str),
} }
impl<'a> Object<'a> { impl<'a> Object<'a> {
pub fn parse(src: &'a str) -> (Object<'a>, usize) { pub fn next_2(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)>) {
let bytes = src.as_bytes();
if src.len() < 2 {
return (Object::Text(src), src.len(), None);
}
// TODO: refactor with src[..].find(..)
for pos in 0..src.len() - 2 {
macro_rules! parse { macro_rules! parse {
($ty:ident) => { ($obj:ident) => {
$ty::parse(src).map(|(s, l)| (Object::$ty(s), l)) if let Some((obj, off)) = $obj::parse(&src[pos..]) {
return if pos == 0 {
(Object::$obj(obj), off, None)
} else {
(
Object::Text(&src[0..pos]),
pos,
Some((Object::$obj(obj), off)),
)
};
}
}; };
} }
macro_rules! parse_emphasis { let first = bytes[pos];
($mk:tt, $ty:ident) => { let second = bytes[pos + 1];
Emphasis::parse(src, $mk).map(|(s, l)| (Object::$ty(s), l)) let third = bytes[pos + 2];
if first == b'@' && second == b'@' {
parse!(Snippet);
}
if first == b'[' {
if second == b'f' && third == b'n' {
parse!(FnRef);
} else if second == b'[' {
parse!(Link);
} else {
parse!(Cookie);
// TODO: Timestamp
}
}
if first == b'{' && second == b'{' && third == b'{' {
parse!(Macros);
}
if first == b'<' && second == b'<' {
if third == b'<' {
parse!(RadioTarget);
} else if third != b'<' && third != b'\n' {
parse!(Target);
}
}
if pos == 0
|| bytes[pos - 1] == b' '
|| bytes[pos - 1] == b'"'
|| bytes[pos - 1] == b'('
|| bytes[pos - 1] == b','
|| bytes[pos - 1] == b'\n'
|| bytes[pos - 1] == b'{'
{
if (first == b'*'
|| first == b'+'
|| first == b'/'
|| first == b'='
|| first == b'_'
|| first == b'~')
&& !second.is_ascii_whitespace()
{
if let Some(end) = Emphasis::parse(&src[pos..], first).map(|i| i + pos) {
macro_rules! emph {
($obj:ident) => {
return if pos == 0 {
(Object::$obj { end }, 1, None)
} else {
(
Object::Text(&src[0..pos]),
pos,
Some((Object::$obj { end }, end)),
)
};
}; };
} }
(match src.as_bytes()[0] { match first {
b'@' => parse!(Snippet), b'*' => emph!(Bold),
b'[' => parse!(FnRef) b'+' => emph!(Strike),
.or_else(|| parse!(Link)) b'/' => emph!(Italic),
.or_else(|| parse!(Cookie)), b'_' => emph!(Underline),
b's' => parse!(InlineSrc), b'~' => {
b'c' => parse!(InlineCall), return if pos == 0 {
b'{' => parse!(Macros), (Object::Code(&src[1..end + 1]), end + 2, None)
b'<' => parse!(RadioTarget).or_else(|| parse!(Target)), } else {
b'*' => parse_emphasis!(b'*', Bold), (
b'=' => parse_emphasis!(b'=', Verbatim), Object::Text(&src[0..pos]),
b'/' => parse_emphasis!(b'/', Italic), pos,
b'+' => parse_emphasis!(b'+', Strike), Some((Object::Code(&src[pos + 1..end + 1]), end - pos + 2)),
b'_' => parse_emphasis!(b'_', Underline), )
b'~' => parse_emphasis!(b'~', Code), };
_ => None, }
}) b'=' => {
.unwrap_or((Object::Text(&src[0..1]), 1)) return if pos == 0 {
(Object::Verbatim(&src[1..end + 1]), end + 2, None)
} else {
(
Object::Text(&src[0..pos]),
pos,
Some((
Object::Verbatim(&src[pos + 1..end + 1]),
end - pos + 2,
)),
)
};
}
_ => unreachable!(),
} }
} }
}
if first == b'c' && second == b'a' && third == b'l' {
parse!(InlineCall);
}
if first == b's' && second == b'r' && third == b'c' {
parse!(InlineSrc);
}
}
}
(Object::Text(src), src.len(), None)
}
}
#[test]
fn next_2() {
// TODO: more tests
assert_eq!(Object::next_2("*bold*"), (Object::Bold { end: 4 }, 1, None));
assert_eq!(
Object::next_2("Normal =verbatim="),
(
Object::Text("Normal "),
"Normal ".len(),
Some((Object::Verbatim("verbatim"), "=verbatim=".len()))
)
);
}

View file

@ -14,12 +14,12 @@ impl<'a> Snippet<'a> {
return None; return None;
} }
let end = find!(src, name + 1, "@@"); let end = &src[name + 1..].find("@@").map(|i| i + name + 1)?;
Some(( Some((
Snippet { Snippet {
name: &src[2..name], name: &src[2..name],
value: &src[name + 1..end], value: &src[name + 1..*end],
}, },
end + 2, end + 2,
)) ))

View file

@ -1,8 +1,6 @@
use objects::Objects;
#[cfg_attr(test, derive(PartialEq, Debug))] #[cfg_attr(test, derive(PartialEq, Debug))]
// TODO: text-markup, entities, latex-fragments, subscript and superscript // TODO: text-markup, entities, latex-fragments, subscript and superscript
pub struct RadioTarget<'a>(Objects<'a>); pub struct RadioTarget<'a>(&'a str);
impl<'a> RadioTarget<'a> { impl<'a> RadioTarget<'a> {
pub fn parse(src: &'a str) -> Option<(RadioTarget<'a>, usize)> { pub fn parse(src: &'a str) -> Option<(RadioTarget<'a>, usize)> {
@ -12,10 +10,10 @@ impl<'a> RadioTarget<'a> {
let end = until_while!(src, 3, b'>', |c| c != b'<' && c != b'\n'); let end = until_while!(src, 3, b'>', |c| c != b'<' && c != b'\n');
expect!(src, end - 1, |c| c != b' '); expect!(src, end - 1, |c| c != b' ');
expect!(src, end + 1, b'>'); expect!(src, end + 1, b'>')?;
expect!(src, end + 2, b'>'); expect!(src, end + 2, b'>')?;
Some((RadioTarget(Objects::new(&src[3..end])), end + 3)) Some((RadioTarget(&src[3..end]), end + 3))
} }
} }
@ -30,7 +28,7 @@ impl<'a> Target<'a> {
let end = until_while!(src, 2, b'>', |c| c != b'<' && c != b'\n'); let end = until_while!(src, 2, b'>', |c| c != b'<' && c != b'\n');
expect!(src, end - 1, |c| c != b' '); expect!(src, end - 1, |c| c != b' ');
expect!(src, end + 1, b'>'); expect!(src, end + 1, b'>')?;
Some((Target(&src[2..end]), end + 2)) Some((Target(&src[2..end]), end + 2))
} }
@ -40,19 +38,19 @@ impl<'a> Target<'a> {
fn parse() { fn parse() {
assert_eq!( assert_eq!(
RadioTarget::parse("<<<target>>>").unwrap(), RadioTarget::parse("<<<target>>>").unwrap(),
(RadioTarget(Objects::new("target")), "<<<target>>>".len()) (RadioTarget("target"), "<<<target>>>".len())
); );
assert_eq!( assert_eq!(
RadioTarget::parse("<<<tar get>>>").unwrap(), RadioTarget::parse("<<<tar get>>>").unwrap(),
(RadioTarget(Objects::new("tar get")), "<<<tar get>>>".len()) (RadioTarget("tar get"), "<<<tar get>>>".len())
); );
assert!(RadioTarget::parse("<<<target >>>").is_none()); parse_fail!(RadioTarget, "<<<target >>>");
assert!(RadioTarget::parse("<<< target>>>").is_none()); parse_fail!(RadioTarget, "<<< target>>>");
assert!(RadioTarget::parse("<<<ta<get>>>").is_none()); parse_fail!(RadioTarget, "<<<ta<get>>>");
assert!(RadioTarget::parse("<<<ta>get>>>").is_none()); parse_fail!(RadioTarget, "<<<ta>get>>>");
assert!(RadioTarget::parse("<<<ta\nget>>>").is_none()); parse_fail!(RadioTarget, "<<<ta\nget>>>");
assert!(RadioTarget::parse("<<target>>>").is_none()); parse_fail!(RadioTarget, "<<target>>>");
assert!(RadioTarget::parse("<<<target>>").is_none()); parse_fail!(RadioTarget, "<<<target>>");
assert_eq!( assert_eq!(
Target::parse("<<target>>").unwrap(), Target::parse("<<target>>").unwrap(),
@ -62,11 +60,11 @@ fn parse() {
Target::parse("<<tar get>>").unwrap(), Target::parse("<<tar get>>").unwrap(),
(Target("tar get"), "<<tar get>>".len()) (Target("tar get"), "<<tar get>>".len())
); );
assert!(Target::parse("<<target >>").is_none()); parse_fail!(Target, "<<target >>");
assert!(Target::parse("<< target>>").is_none()); parse_fail!(Target, "<< target>>");
assert!(Target::parse("<<ta<get>>").is_none()); parse_fail!(Target, "<<ta<get>>");
assert!(Target::parse("<<ta>get>>").is_none()); parse_fail!(Target, "<<ta>get>>");
assert!(Target::parse("<<ta\nget>>").is_none()); parse_fail!(Target, "<<ta\nget>>");
assert!(Target::parse("<target>>").is_none()); parse_fail!(Target, "<target>>");
assert!(Target::parse("<<target>").is_none()); parse_fail!(Target, "<<target>");
} }

View file

@ -5,18 +5,21 @@ use objects::*;
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub enum Container { pub enum Container {
Block,
Bold,
Drawer,
Headline { beg: usize, end: usize }, Headline { beg: usize, end: usize },
Italic, Section { end: usize },
Paragraph { end: usize, trailing: usize },
Block,
Drawer,
LatexEnv, LatexEnv,
List, List,
Paragraph,
Section { end: usize },
StrikeThrough,
Table, Table,
Underline,
Italic { end: usize },
Strike { end: usize },
Bold { end: usize },
Underline { end: usize },
} }
#[cfg_attr(test, derive(PartialEq, Debug))] #[cfg_attr(test, derive(PartialEq, Debug))]
@ -27,7 +30,9 @@ pub enum Event<'a> {
StartSection, StartSection,
EndSection, EndSection,
Paragraph, StartParagraph,
EndParagraph,
BlockStart, BlockStart,
BlockEnd, BlockEnd,
DynBlockStart, DynBlockStart,
@ -43,17 +48,17 @@ pub enum Event<'a> {
Clock, Clock,
Comment, Comment(&'a str),
TableStart, TableStart,
TableEnd, TableEnd,
TableCell, TableCell,
LatexEnv, LatexEnv,
StrikeThrough,
FnDef(FnDef<'a>), FnDef(FnDef<'a>),
Keyword(Keyword<'a>), Keyword(Keyword<'a>),
Rule, Rule,
Cookie(Cookie<'a>), Cookie(Cookie<'a>),
FnRef(FnRef<'a>), FnRef(FnRef<'a>),
InlineCall(InlineCall<'a>), InlineCall(InlineCall<'a>),
@ -63,13 +68,18 @@ pub enum Event<'a> {
RadioTarget(RadioTarget<'a>), RadioTarget(RadioTarget<'a>),
Snippet(Snippet<'a>), Snippet(Snippet<'a>),
Target(Target<'a>), Target(Target<'a>),
Bold(&'a str),
Verbatim(&'a str),
Italic(&'a str),
Strike(&'a str),
Underline(&'a str),
Code(&'a str),
StartBold,
EndBold,
StartItalic,
EndItalic,
StartStrike,
EndStrike,
StartUnderline,
EndUnderline,
Verbatim(&'a str),
Code(&'a str),
Text(&'a str), Text(&'a str),
} }
@ -77,6 +87,8 @@ pub struct Parser<'a> {
text: &'a str, text: &'a str,
stack: Vec<Container>, stack: Vec<Container>,
off: usize, off: usize,
ele_buf: Option<(Element<'a>, usize)>,
obj_buf: Option<(Object<'a>, usize)>,
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
@ -85,6 +97,8 @@ impl<'a> Parser<'a> {
text, text,
stack: Vec::new(), stack: Vec::new(),
off: 0, off: 0,
ele_buf: None,
obj_buf: None,
} }
} }
@ -100,11 +114,6 @@ impl<'a> Parser<'a> {
} }
} }
fn end_section(&mut self) -> Event<'a> {
self.stack.pop();
Event::EndSection
}
fn start_headline(&mut self, tail: &'a str) -> Event<'a> { fn start_headline(&mut self, tail: &'a str) -> Event<'a> {
let (hdl, off, end) = Headline::parse(tail); let (hdl, off, end) = Headline::parse(tail);
self.stack.push(Container::Headline { self.stack.push(Container::Headline {
@ -115,9 +124,71 @@ impl<'a> Parser<'a> {
Event::StartHeadline(hdl) Event::StartHeadline(hdl)
} }
fn end_headline(&mut self) -> Event<'a> { fn next_ele(&mut self, end: usize) -> Event<'a> {
self.stack.pop(); let (ele, off) = if let Some((ele, off)) = std::mem::replace(&mut self.ele_buf, None) {
Event::EndHeadline (Some(ele), off)
} else {
let (off, ele, next_2) = Element::next_2(&self.text[self.off..end]);
self.ele_buf = next_2;
(ele, off)
};
self.off += off;
if let Some(ele) = ele {
if let Element::Paragraph { end, trailing } = ele {
self.stack.push(Container::Paragraph {
end: end + self.off - off,
trailing: trailing + self.off - off,
});
}
ele.into()
} else {
self.end()
}
}
fn next_obj(&mut self, end: usize) -> Event<'a> {
let (obj, off) = if let Some((obj, off)) = std::mem::replace(&mut self.obj_buf, None) {
(obj, off)
} else {
let (obj, off, next_2) = Object::next_2(&self.text[self.off..end]);
self.obj_buf = next_2;
(obj, off)
};
self.off += off;
match obj {
Object::Underline { end } => self.stack.push(Container::Underline {
end: self.off + end,
}),
Object::Strike { end } => self.stack.push(Container::Strike {
end: self.off + end,
}),
Object::Italic { end } => self.stack.push(Container::Italic {
end: self.off + end,
}),
Object::Bold { end } => self.stack.push(Container::Bold {
end: self.off + end,
}),
_ => (),
}
obj.into()
}
fn end(&mut self) -> Event<'a> {
match self.stack.pop().unwrap() {
Container::Paragraph { .. } => Event::EndParagraph,
Container::Underline { .. } => Event::EndUnderline,
Container::Section { .. } => Event::EndSection,
Container::Strike { .. } => Event::EndStrike,
Container::Headline { .. } => Event::EndHeadline,
Container::Italic { .. } => Event::EndItalic,
Container::Bold { .. } => Event::EndBold,
_ => unimplemented!(),
}
} }
} }
@ -139,7 +210,7 @@ impl<'a> Iterator for Parser<'a> {
Some(match last { Some(match last {
Container::Headline { beg, end } => { Container::Headline { beg, end } => {
if self.off >= end { if self.off >= end {
self.end_headline() self.end()
} else if self.off == beg { } else if self.off == beg {
self.start_section_or_headline(tail) self.start_section_or_headline(tail)
} else { } else {
@ -148,14 +219,28 @@ impl<'a> Iterator for Parser<'a> {
} }
Container::Section { end } => { Container::Section { end } => {
if self.off >= end { if self.off >= end {
self.end_section() self.end()
} else { } else {
match Element::find_elem(&self.text[self.off..end]) { self.next_ele(end)
(Element::Paragraph(_), off) => {
self.off += off;
Event::Paragraph
} }
} }
Container::Paragraph { end, trailing } => {
if self.off >= end {
self.off = trailing;
self.end()
} else {
self.next_obj(end)
}
}
Container::Bold { end }
| Container::Underline { end }
| Container::Italic { end }
| Container::Strike { end } => {
if self.off >= end {
self.off += 1;
self.end()
} else {
self.next_obj(end)
} }
} }
_ => unimplemented!(), _ => unimplemented!(),
@ -164,6 +249,41 @@ impl<'a> Iterator for Parser<'a> {
} }
} }
impl<'a> From<Object<'a>> for Event<'a> {
fn from(obj: Object<'a>) -> Self {
match obj {
Object::Bold { .. } => Event::StartBold,
Object::Code(c) => Event::Code(c),
Object::Cookie(c) => Event::Cookie(c),
Object::FnRef(f) => Event::FnRef(f),
Object::InlineCall(i) => Event::InlineCall(i),
Object::InlineSrc(i) => Event::InlineSrc(i),
Object::Italic { .. } => Event::StartItalic,
Object::Link(l) => Event::Link(l),
Object::Macros(m) => Event::Macros(m),
Object::RadioTarget(r) => Event::RadioTarget(r),
Object::Snippet(s) => Event::Snippet(s),
Object::Strike { .. } => Event::StartStrike,
Object::Target(t) => Event::Target(t),
Object::Text(t) => Event::Text(t),
Object::Underline { .. } => Event::StartUnderline,
Object::Verbatim(v) => Event::Verbatim(v),
}
}
}
impl<'a> From<Element<'a>> for Event<'a> {
fn from(ele: Element<'a>) -> Self {
match ele {
Element::Comment(c) => Event::Comment(c),
Element::FnDef(fd) => Event::FnDef(fd),
Element::Keyword(kw) => Event::Keyword(kw),
Element::Paragraph { .. } => Event::StartParagraph,
Element::Rule => Event::Rule,
}
}
}
#[test] #[test]
fn parse() { fn parse() {
use self::Event::*; use self::Event::*;
@ -171,28 +291,51 @@ fn parse() {
let expected = vec![ let expected = vec![
StartHeadline(Headline::new(1, None, None, "Title 1", None)), StartHeadline(Headline::new(1, None, None, "Title 1", None)),
StartSection, StartSection,
Paragraph, StartParagraph,
StartBold,
Text("Section 1"),
EndBold,
EndParagraph,
EndSection, EndSection,
StartHeadline(Headline::new(2, None, None, "Title 2", None)), StartHeadline(Headline::new(2, None, None, "Title 2", None)),
StartSection, StartSection,
Paragraph, StartParagraph,
StartUnderline,
Text("Section 2"),
EndUnderline,
EndParagraph,
EndSection, EndSection,
EndHeadline, EndHeadline,
EndHeadline, EndHeadline,
StartHeadline(Headline::new(1, None, None, "Title 3", None)), StartHeadline(Headline::new(1, None, None, "Title 3", None)),
StartSection, StartSection,
Paragraph, StartParagraph,
StartItalic,
Text("Section 3"),
EndItalic,
EndParagraph,
EndSection, EndSection,
EndHeadline, EndHeadline,
StartHeadline(Headline::new(1, None, None, "Title 4", None)), StartHeadline(Headline::new(1, None, None, "Title 4", None)),
StartSection, StartSection,
Paragraph, StartParagraph,
Verbatim("Section 4"),
EndParagraph,
EndSection, EndSection,
EndHeadline, EndHeadline,
]; ];
assert_eq!( assert_eq!(
Parser::new("* Title 1\nSection 1\n** Title 2\nSection 2\n* Title 3\nSection 3\n* Title 4 \nSection 4") Parser::new(
r#"* Title 1
*Section 1*
** Title 2
_Section 2_
* Title 3
/Section 3/
* Title 4
=Section 4="#
)
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
expected expected
); );

View file

@ -3,11 +3,10 @@
#[macro_export] #[macro_export]
macro_rules! expect { macro_rules! expect {
($src:ident, $index:expr, $expect:tt) => { ($src:ident, $index:expr, $expect:tt) => {
if $index >= $src.len() || $src.as_bytes()[$index] != $expect { $src.as_bytes().get($index).filter(|&b| b == &$expect)
return None;
}
}; };
($src:ident, $index:expr, $expect:expr) => { ($src:ident, $index:expr, $expect:expr) => {
// $src.as_bytes().get($index).filter($expect)
if $index >= $src.len() || !$expect($src.as_bytes()[$index]) { if $index >= $src.len() || !$expect($src.as_bytes()[$index]) {
return None; return None;
} }
@ -16,16 +15,15 @@ macro_rules! expect {
#[macro_export] #[macro_export]
macro_rules! eol { macro_rules! eol {
($src:expr) => {{ ($src:expr) => {
let mut pos = 0; $src.find('\n').unwrap_or($src.len())
while pos < $src.len() { };
if $src.as_bytes()[pos] == b'\n' { ($src:expr, $from:expr) => {
break; $src[$from..]
} .find('\n')
pos += 1; .map(|i| i + $from)
} .unwrap_or($src.len())
pos };
}};
} }
#[macro_export] #[macro_export]
@ -105,7 +103,7 @@ macro_rules! until_while {
#[macro_export] #[macro_export]
macro_rules! cond_eq { macro_rules! cond_eq {
($s:ident, $i:expr, $p:expr) => { ($s:ident, $i:expr, $p:expr) => {
if $i > $s.len() { if $i >= $s.len() {
return None; return None;
} else { } else {
$s.as_bytes()[$i] == $p $s.as_bytes()[$i] == $p
@ -123,16 +121,6 @@ macro_rules! position {
}; };
} }
#[macro_export]
macro_rules! find {
($s:ident, $i:expr, $p:expr) => {
match $s[$i..].find($p) {
Some(x) => x + $i,
None => return None,
}
};
}
#[macro_export] #[macro_export]
macro_rules! starts_with { macro_rules! starts_with {
($s:ident, $p:expr) => { ($s:ident, $p:expr) => {
@ -143,18 +131,44 @@ macro_rules! starts_with {
} }
#[macro_export] #[macro_export]
macro_rules! next_line { macro_rules! skip_space {
($s:ident, $p:expr) => { ($src:ident, $from:expr) => {
self.chars().position(|c| c == ch).unwrap_or(self.len()) until!($src[$from..], |c| c != b' ').unwrap_or(0) + $from
if !$s.starts_with($p) {
return None;
}
}; };
} }
#[macro_export] #[macro_export]
macro_rules! skip_whitespace { macro_rules! skip_empty_line {
($src:ident, $from:ident) => { ($src:ident, $from:expr) => {{
until!($src[$from..], |c| c != b' ').unwrap_or(0) + $from let mut pos = $from;
while pos < $src.len() {
if $src.as_bytes()[pos] != b'\n' {
break;
}
pos += 1;
}
pos
}};
}
#[macro_export]
macro_rules! parse_fail {
($ty:ident, $src:expr) => {
assert_eq!($ty::parse($src), None);
};
}
#[macro_export]
macro_rules! parse_succ {
($ty:ident, $src:expr, $($field:ident : $value:expr),* ) => {
assert_eq!(
$ty::parse($src),
Some((
$ty {
$( $field : $value ),*
},
$src.len()
)),
);
}; };
} }