feat(parser): improve inline object parsing

This commit is contained in:
PoiScript 2019-05-18 21:34:29 +08:00
parent c4041aefb6
commit 3beabcedfa
3 changed files with 98 additions and 76 deletions

View file

@ -2,7 +2,6 @@ use bytecount::count;
use memchr::memchr;
#[inline]
/// returns offset
pub fn parse(text: &str, marker: u8) -> Option<usize> {
debug_assert!(text.len() >= 3);
@ -12,7 +11,7 @@ pub fn parse(text: &str, marker: u8) -> Option<usize> {
return None;
}
let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..i + 1], b'\n') < 2)?;
let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..=i], b'\n') < 2)?;
if bytes[end].is_ascii_whitespace() {
return None;

View file

@ -436,31 +436,47 @@ impl<'a> Parser<'a> {
}
fn next_obj(&mut self, text: &'a str) -> Event<'a> {
let bytes = text.as_bytes();
let (obj, off, limit, end) = self
.obj_buf
.take()
.or_else(|| self.real_next_obj(text))
.or_else(|| match bytes[0] {
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
if let Some(buf) = self.real_next_obj(&text[1..]) {
self.obj_buf = Some(buf);
Some((Event::Text(&text[0..1]), 1, 0, 0))
} else {
None
}
}
_ => self.real_next_obj(text),
})
.unwrap_or_else(|| {
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
let bytes = text.as_bytes();
let mut pos = 0;
while let Some(off) = bs.find(&bytes[pos..]) {
pos += off;
match bytes[pos] {
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
if let Some(buf) = self.real_next_obj(&text[pos + 1..]) {
self.obj_buf = Some(buf);
return (Event::Text(&text[0..=pos]), pos + 1, 0, 0);
}
}
_ => {
if let Some(buf) = self.real_next_obj(&text[pos..]) {
self.obj_buf = Some(buf);
return (Event::Text(&text[0..pos]), pos, 0, 0);
}
}
}
pos += 1;
}
(Event::Text(text), text.len(), 0, 0)
});
debug_assert!(
(limit == 0 && end == 0)
|| (limit == 1 && end == 1)
|| (off <= limit && limit <= end && end <= text.len()),
(limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()),
"{} <= {} <= {} <= {}",
off,
limit,
@ -481,11 +497,10 @@ impl<'a> Parser<'a> {
obj
}
fn real_next_obj(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
fn real_next_obj(&self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
if text.len() < 3 {
return None;
}
None
} else {
let bytes = text.as_bytes();
match bytes[0] {
b'@' if bytes[1] == b'@' => {
@ -499,44 +514,35 @@ impl<'a> Parser<'a> {
radio_target::parse(text)
.map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0))
} else {
target::parse(text).map(|(target, off)| (Event::Target { target }, off, 0, 0))
target::parse(text)
.map(|(target, off)| (Event::Target { target }, off, 0, 0))
}
}
b'<' => Timestamp::parse_active(text)
.or_else(|| Timestamp::parse_diary(text))
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)),
.map(|(ts, off)| (Event::Timestamp(ts), off, 0, 0)),
b'[' => {
if text[1..].starts_with("fn:") {
FnRef::parse(text).map(|(fn_ref, off)| (Event::FnRef(fn_ref), off, 0, 0))
} else if bytes[1] == b'[' {
Link::parse(text).map(|(link, off)| (Event::Link(link), off, 0, 0))
} else if let Some((cookie, off)) = Cookie::parse(text) {
Some((Event::Cookie(cookie), off, 0, 0))
} else {
Cookie::parse(text)
.map(|(cookie, off)| (Event::Cookie(cookie), off, 0, 0))
.or_else(|| {
Timestamp::parse_inactive(text)
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
})
.map(|(ts, off)| (Event::Timestamp(ts), off, 0, 0))
}
}
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => self
.next_inline(&text[1..])
.map(|(event, off, limit, end)| (event, off + 1, limit + 1, end + 1)),
_ => self.next_inline(text),
}
}
fn next_inline(&self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
match text.as_bytes()[0] {
b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)),
b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)),
b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)),
b'_' => emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end)),
b'_' => {
emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end))
}
b'=' => emphasis::parse(text, b'=')
.map(|end| (Event::Verbatim(&text[1..end - 1]), end, 0, 0)),
b'~' => {
emphasis::parse(text, b'~').map(|end| (Event::Code(&text[1..end - 1]), end, 0, 0))
}
b'~' => emphasis::parse(text, b'~')
.map(|end| (Event::Code(&text[1..end - 1]), end, 0, 0)),
b's' if text.starts_with("src_") => {
InlineSrc::parse(text).map(|(src, off)| (Event::InlineSrc(src), off, 0, 0))
}
@ -546,6 +552,7 @@ impl<'a> Parser<'a> {
_ => None,
}
}
}
fn next_list_item(&self, text: &'a str, indent: usize) -> (&'a str, usize, usize, usize) {
use std::iter::once;
@ -559,7 +566,7 @@ impl<'a> Parser<'a> {
.chain(once(text.len()));
let mut pos = lines.next().unwrap();
while let Some(i) = lines.next() {
for i in lines {
let line = &text[pos..i];
if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
if line_indent == indent {

View file

@ -18,6 +18,12 @@ macro_rules! html_test {
html_test!(
emphasis,
"*bold*, /italic/,_underlined_, =verbatim= and ~code~",
"<section><p><b>bold</b>, <i>italic</i>,<u>underlined</u>, <code>verbatim</code> and <code>code</code></p></section>"
);
html_test!(
section_and_headline,
r#"* Title 1
*Section 1*
** Title 2
@ -26,10 +32,14 @@ _Section 2_
/Section 3/
* Title 4
=Section 4="#,
"<h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
<h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
<h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
<h1>Title 4</h1><section><p><code>Section 4</code></p></section>"
"<h1>Title 1</h1>\
<section><p><b>Section 1</b></p></section>\
<h2>Title 2</h2>\
<section><p><u>Section 2</u></p></section>\
<h1>Title 3</h1>\
<section><p><i>Section 3</i></p></section>\
<h1>Title 4</h1>\
<section><p><code>Section 4</code></p></section>"
);
html_test!(
@ -49,3 +59,9 @@ html_test!(
<li><p>5</p></li>\
</ul></section>"
);
html_test!(
snippet,
"@@html:<del>@@delete this@@html:</del>@@",
"<section><p><del>delete this</del></p></section>"
);