feat(parser): improve inline object parsing
This commit is contained in:
parent
c4041aefb6
commit
3beabcedfa
|
@ -2,7 +2,6 @@ use bytecount::count;
|
|||
use memchr::memchr;
|
||||
|
||||
#[inline]
|
||||
/// returns offset
|
||||
pub fn parse(text: &str, marker: u8) -> Option<usize> {
|
||||
debug_assert!(text.len() >= 3);
|
||||
|
||||
|
@ -12,7 +11,7 @@ pub fn parse(text: &str, marker: u8) -> Option<usize> {
|
|||
return None;
|
||||
}
|
||||
|
||||
let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..i + 1], b'\n') < 2)?;
|
||||
let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..=i], b'\n') < 2)?;
|
||||
|
||||
if bytes[end].is_ascii_whitespace() {
|
||||
return None;
|
||||
|
|
|
@ -436,31 +436,47 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
|
||||
fn next_obj(&mut self, text: &'a str) -> Event<'a> {
|
||||
let bytes = text.as_bytes();
|
||||
let (obj, off, limit, end) = self
|
||||
.obj_buf
|
||||
.take()
|
||||
.or_else(|| self.real_next_obj(text))
|
||||
.or_else(|| match bytes[0] {
|
||||
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
|
||||
if let Some(buf) = self.real_next_obj(&text[1..]) {
|
||||
self.obj_buf = Some(buf);
|
||||
Some((Event::Text(&text[0..1]), 1, 0, 0))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => self.real_next_obj(text),
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
|
||||
let bytes = text.as_bytes();
|
||||
let mut pos = 0;
|
||||
|
||||
while let Some(off) = bs.find(&bytes[pos..]) {
|
||||
pos += off;
|
||||
match bytes[pos] {
|
||||
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
|
||||
if let Some(buf) = self.real_next_obj(&text[pos + 1..]) {
|
||||
self.obj_buf = Some(buf);
|
||||
return (Event::Text(&text[0..=pos]), pos + 1, 0, 0);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if let Some(buf) = self.real_next_obj(&text[pos..]) {
|
||||
self.obj_buf = Some(buf);
|
||||
return (Event::Text(&text[0..pos]), pos, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
(Event::Text(text), text.len(), 0, 0)
|
||||
});
|
||||
|
||||
debug_assert!(
|
||||
(limit == 0 && end == 0)
|
||||
|| (limit == 1 && end == 1)
|
||||
|| (off <= limit && limit <= end && end <= text.len()),
|
||||
(limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()),
|
||||
"{} <= {} <= {} <= {}",
|
||||
off,
|
||||
limit,
|
||||
|
@ -481,11 +497,10 @@ impl<'a> Parser<'a> {
|
|||
obj
|
||||
}
|
||||
|
||||
fn real_next_obj(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
||||
fn real_next_obj(&self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
||||
if text.len() < 3 {
|
||||
return None;
|
||||
}
|
||||
|
||||
None
|
||||
} else {
|
||||
let bytes = text.as_bytes();
|
||||
match bytes[0] {
|
||||
b'@' if bytes[1] == b'@' => {
|
||||
|
@ -499,44 +514,35 @@ impl<'a> Parser<'a> {
|
|||
radio_target::parse(text)
|
||||
.map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0))
|
||||
} else {
|
||||
target::parse(text).map(|(target, off)| (Event::Target { target }, off, 0, 0))
|
||||
target::parse(text)
|
||||
.map(|(target, off)| (Event::Target { target }, off, 0, 0))
|
||||
}
|
||||
}
|
||||
b'<' => Timestamp::parse_active(text)
|
||||
.or_else(|| Timestamp::parse_diary(text))
|
||||
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)),
|
||||
.map(|(ts, off)| (Event::Timestamp(ts), off, 0, 0)),
|
||||
b'[' => {
|
||||
if text[1..].starts_with("fn:") {
|
||||
FnRef::parse(text).map(|(fn_ref, off)| (Event::FnRef(fn_ref), off, 0, 0))
|
||||
} else if bytes[1] == b'[' {
|
||||
Link::parse(text).map(|(link, off)| (Event::Link(link), off, 0, 0))
|
||||
} else if let Some((cookie, off)) = Cookie::parse(text) {
|
||||
Some((Event::Cookie(cookie), off, 0, 0))
|
||||
} else {
|
||||
Cookie::parse(text)
|
||||
.map(|(cookie, off)| (Event::Cookie(cookie), off, 0, 0))
|
||||
.or_else(|| {
|
||||
Timestamp::parse_inactive(text)
|
||||
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
|
||||
})
|
||||
.map(|(ts, off)| (Event::Timestamp(ts), off, 0, 0))
|
||||
}
|
||||
}
|
||||
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => self
|
||||
.next_inline(&text[1..])
|
||||
.map(|(event, off, limit, end)| (event, off + 1, limit + 1, end + 1)),
|
||||
_ => self.next_inline(text),
|
||||
}
|
||||
}
|
||||
|
||||
fn next_inline(&self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
||||
match text.as_bytes()[0] {
|
||||
b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)),
|
||||
b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)),
|
||||
b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)),
|
||||
b'_' => emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end)),
|
||||
b'_' => {
|
||||
emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end))
|
||||
}
|
||||
b'=' => emphasis::parse(text, b'=')
|
||||
.map(|end| (Event::Verbatim(&text[1..end - 1]), end, 0, 0)),
|
||||
b'~' => {
|
||||
emphasis::parse(text, b'~').map(|end| (Event::Code(&text[1..end - 1]), end, 0, 0))
|
||||
}
|
||||
b'~' => emphasis::parse(text, b'~')
|
||||
.map(|end| (Event::Code(&text[1..end - 1]), end, 0, 0)),
|
||||
b's' if text.starts_with("src_") => {
|
||||
InlineSrc::parse(text).map(|(src, off)| (Event::InlineSrc(src), off, 0, 0))
|
||||
}
|
||||
|
@ -546,6 +552,7 @@ impl<'a> Parser<'a> {
|
|||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn next_list_item(&self, text: &'a str, indent: usize) -> (&'a str, usize, usize, usize) {
|
||||
use std::iter::once;
|
||||
|
@ -559,7 +566,7 @@ impl<'a> Parser<'a> {
|
|||
.chain(once(text.len()));
|
||||
let mut pos = lines.next().unwrap();
|
||||
|
||||
while let Some(i) = lines.next() {
|
||||
for i in lines {
|
||||
let line = &text[pos..i];
|
||||
if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
|
||||
if line_indent == indent {
|
||||
|
|
|
@ -18,6 +18,12 @@ macro_rules! html_test {
|
|||
|
||||
html_test!(
|
||||
emphasis,
|
||||
"*bold*, /italic/,_underlined_, =verbatim= and ~code~",
|
||||
"<section><p><b>bold</b>, <i>italic</i>,<u>underlined</u>, <code>verbatim</code> and <code>code</code></p></section>"
|
||||
);
|
||||
|
||||
html_test!(
|
||||
section_and_headline,
|
||||
r#"* Title 1
|
||||
*Section 1*
|
||||
** Title 2
|
||||
|
@ -26,10 +32,14 @@ _Section 2_
|
|||
/Section 3/
|
||||
* Title 4
|
||||
=Section 4="#,
|
||||
"<h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
|
||||
<h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
|
||||
<h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
|
||||
<h1>Title 4</h1><section><p><code>Section 4</code></p></section>"
|
||||
"<h1>Title 1</h1>\
|
||||
<section><p><b>Section 1</b></p></section>\
|
||||
<h2>Title 2</h2>\
|
||||
<section><p><u>Section 2</u></p></section>\
|
||||
<h1>Title 3</h1>\
|
||||
<section><p><i>Section 3</i></p></section>\
|
||||
<h1>Title 4</h1>\
|
||||
<section><p><code>Section 4</code></p></section>"
|
||||
);
|
||||
|
||||
html_test!(
|
||||
|
@ -49,3 +59,9 @@ html_test!(
|
|||
<li><p>5</p></li>\
|
||||
</ul></section>"
|
||||
);
|
||||
|
||||
html_test!(
|
||||
snippet,
|
||||
"@@html:<del>@@delete this@@html:</del>@@",
|
||||
"<section><p><del>delete this</del></p></section>"
|
||||
);
|
||||
|
|
Loading…
Reference in a new issue