feat(parser): improve inline object parsing
This commit is contained in:
parent
c4041aefb6
commit
3beabcedfa
|
@ -2,7 +2,6 @@ use bytecount::count;
|
||||||
use memchr::memchr;
|
use memchr::memchr;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
/// returns offset
|
|
||||||
pub fn parse(text: &str, marker: u8) -> Option<usize> {
|
pub fn parse(text: &str, marker: u8) -> Option<usize> {
|
||||||
debug_assert!(text.len() >= 3);
|
debug_assert!(text.len() >= 3);
|
||||||
|
|
||||||
|
@ -12,7 +11,7 @@ pub fn parse(text: &str, marker: u8) -> Option<usize> {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..i + 1], b'\n') < 2)?;
|
let end = memchr(marker, &bytes[1..]).filter(|&i| count(&bytes[1..=i], b'\n') < 2)?;
|
||||||
|
|
||||||
if bytes[end].is_ascii_whitespace() {
|
if bytes[end].is_ascii_whitespace() {
|
||||||
return None;
|
return None;
|
||||||
|
|
|
@ -436,31 +436,47 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn next_obj(&mut self, text: &'a str) -> Event<'a> {
|
fn next_obj(&mut self, text: &'a str) -> Event<'a> {
|
||||||
|
let bytes = text.as_bytes();
|
||||||
let (obj, off, limit, end) = self
|
let (obj, off, limit, end) = self
|
||||||
.obj_buf
|
.obj_buf
|
||||||
.take()
|
.take()
|
||||||
.or_else(|| self.real_next_obj(text))
|
.or_else(|| match bytes[0] {
|
||||||
|
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
|
||||||
|
if let Some(buf) = self.real_next_obj(&text[1..]) {
|
||||||
|
self.obj_buf = Some(buf);
|
||||||
|
Some((Event::Text(&text[0..1]), 1, 0, 0))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => self.real_next_obj(text),
|
||||||
|
})
|
||||||
.unwrap_or_else(|| {
|
.unwrap_or_else(|| {
|
||||||
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
|
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
|
||||||
let bytes = text.as_bytes();
|
|
||||||
let mut pos = 0;
|
let mut pos = 0;
|
||||||
|
|
||||||
while let Some(off) = bs.find(&bytes[pos..]) {
|
while let Some(off) = bs.find(&bytes[pos..]) {
|
||||||
pos += off;
|
pos += off;
|
||||||
|
match bytes[pos] {
|
||||||
|
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
|
||||||
|
if let Some(buf) = self.real_next_obj(&text[pos + 1..]) {
|
||||||
|
self.obj_buf = Some(buf);
|
||||||
|
return (Event::Text(&text[0..=pos]), pos + 1, 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
if let Some(buf) = self.real_next_obj(&text[pos..]) {
|
if let Some(buf) = self.real_next_obj(&text[pos..]) {
|
||||||
self.obj_buf = Some(buf);
|
self.obj_buf = Some(buf);
|
||||||
return (Event::Text(&text[0..pos]), pos, 0, 0);
|
return (Event::Text(&text[0..pos]), pos, 0, 0);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
pos += 1;
|
pos += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
(Event::Text(text), text.len(), 0, 0)
|
(Event::Text(text), text.len(), 0, 0)
|
||||||
});
|
});
|
||||||
|
|
||||||
debug_assert!(
|
debug_assert!(
|
||||||
(limit == 0 && end == 0)
|
(limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()),
|
||||||
|| (limit == 1 && end == 1)
|
|
||||||
|| (off <= limit && limit <= end && end <= text.len()),
|
|
||||||
"{} <= {} <= {} <= {}",
|
"{} <= {} <= {} <= {}",
|
||||||
off,
|
off,
|
||||||
limit,
|
limit,
|
||||||
|
@ -481,11 +497,10 @@ impl<'a> Parser<'a> {
|
||||||
obj
|
obj
|
||||||
}
|
}
|
||||||
|
|
||||||
fn real_next_obj(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
fn real_next_obj(&self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
||||||
if text.len() < 3 {
|
if text.len() < 3 {
|
||||||
return None;
|
None
|
||||||
}
|
} else {
|
||||||
|
|
||||||
let bytes = text.as_bytes();
|
let bytes = text.as_bytes();
|
||||||
match bytes[0] {
|
match bytes[0] {
|
||||||
b'@' if bytes[1] == b'@' => {
|
b'@' if bytes[1] == b'@' => {
|
||||||
|
@ -499,44 +514,35 @@ impl<'a> Parser<'a> {
|
||||||
radio_target::parse(text)
|
radio_target::parse(text)
|
||||||
.map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0))
|
.map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0))
|
||||||
} else {
|
} else {
|
||||||
target::parse(text).map(|(target, off)| (Event::Target { target }, off, 0, 0))
|
target::parse(text)
|
||||||
|
.map(|(target, off)| (Event::Target { target }, off, 0, 0))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b'<' => Timestamp::parse_active(text)
|
b'<' => Timestamp::parse_active(text)
|
||||||
.or_else(|| Timestamp::parse_diary(text))
|
.or_else(|| Timestamp::parse_diary(text))
|
||||||
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0)),
|
.map(|(ts, off)| (Event::Timestamp(ts), off, 0, 0)),
|
||||||
b'[' => {
|
b'[' => {
|
||||||
if text[1..].starts_with("fn:") {
|
if text[1..].starts_with("fn:") {
|
||||||
FnRef::parse(text).map(|(fn_ref, off)| (Event::FnRef(fn_ref), off, 0, 0))
|
FnRef::parse(text).map(|(fn_ref, off)| (Event::FnRef(fn_ref), off, 0, 0))
|
||||||
} else if bytes[1] == b'[' {
|
} else if bytes[1] == b'[' {
|
||||||
Link::parse(text).map(|(link, off)| (Event::Link(link), off, 0, 0))
|
Link::parse(text).map(|(link, off)| (Event::Link(link), off, 0, 0))
|
||||||
|
} else if let Some((cookie, off)) = Cookie::parse(text) {
|
||||||
|
Some((Event::Cookie(cookie), off, 0, 0))
|
||||||
} else {
|
} else {
|
||||||
Cookie::parse(text)
|
|
||||||
.map(|(cookie, off)| (Event::Cookie(cookie), off, 0, 0))
|
|
||||||
.or_else(|| {
|
|
||||||
Timestamp::parse_inactive(text)
|
Timestamp::parse_inactive(text)
|
||||||
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
|
.map(|(ts, off)| (Event::Timestamp(ts), off, 0, 0))
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => self
|
|
||||||
.next_inline(&text[1..])
|
|
||||||
.map(|(event, off, limit, end)| (event, off + 1, limit + 1, end + 1)),
|
|
||||||
_ => self.next_inline(text),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn next_inline(&self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
|
|
||||||
match text.as_bytes()[0] {
|
|
||||||
b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)),
|
b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)),
|
||||||
b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)),
|
b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)),
|
||||||
b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)),
|
b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)),
|
||||||
b'_' => emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end)),
|
b'_' => {
|
||||||
|
emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end))
|
||||||
|
}
|
||||||
b'=' => emphasis::parse(text, b'=')
|
b'=' => emphasis::parse(text, b'=')
|
||||||
.map(|end| (Event::Verbatim(&text[1..end - 1]), end, 0, 0)),
|
.map(|end| (Event::Verbatim(&text[1..end - 1]), end, 0, 0)),
|
||||||
b'~' => {
|
b'~' => emphasis::parse(text, b'~')
|
||||||
emphasis::parse(text, b'~').map(|end| (Event::Code(&text[1..end - 1]), end, 0, 0))
|
.map(|end| (Event::Code(&text[1..end - 1]), end, 0, 0)),
|
||||||
}
|
|
||||||
b's' if text.starts_with("src_") => {
|
b's' if text.starts_with("src_") => {
|
||||||
InlineSrc::parse(text).map(|(src, off)| (Event::InlineSrc(src), off, 0, 0))
|
InlineSrc::parse(text).map(|(src, off)| (Event::InlineSrc(src), off, 0, 0))
|
||||||
}
|
}
|
||||||
|
@ -546,6 +552,7 @@ impl<'a> Parser<'a> {
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn next_list_item(&self, text: &'a str, indent: usize) -> (&'a str, usize, usize, usize) {
|
fn next_list_item(&self, text: &'a str, indent: usize) -> (&'a str, usize, usize, usize) {
|
||||||
use std::iter::once;
|
use std::iter::once;
|
||||||
|
@ -559,7 +566,7 @@ impl<'a> Parser<'a> {
|
||||||
.chain(once(text.len()));
|
.chain(once(text.len()));
|
||||||
let mut pos = lines.next().unwrap();
|
let mut pos = lines.next().unwrap();
|
||||||
|
|
||||||
while let Some(i) = lines.next() {
|
for i in lines {
|
||||||
let line = &text[pos..i];
|
let line = &text[pos..i];
|
||||||
if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
|
if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
|
||||||
if line_indent == indent {
|
if line_indent == indent {
|
||||||
|
|
|
@ -18,6 +18,12 @@ macro_rules! html_test {
|
||||||
|
|
||||||
html_test!(
|
html_test!(
|
||||||
emphasis,
|
emphasis,
|
||||||
|
"*bold*, /italic/,_underlined_, =verbatim= and ~code~",
|
||||||
|
"<section><p><b>bold</b>, <i>italic</i>,<u>underlined</u>, <code>verbatim</code> and <code>code</code></p></section>"
|
||||||
|
);
|
||||||
|
|
||||||
|
html_test!(
|
||||||
|
section_and_headline,
|
||||||
r#"* Title 1
|
r#"* Title 1
|
||||||
*Section 1*
|
*Section 1*
|
||||||
** Title 2
|
** Title 2
|
||||||
|
@ -26,10 +32,14 @@ _Section 2_
|
||||||
/Section 3/
|
/Section 3/
|
||||||
* Title 4
|
* Title 4
|
||||||
=Section 4="#,
|
=Section 4="#,
|
||||||
"<h1>Title 1</h1><section><p><b>Section 1</b></p></section>\
|
"<h1>Title 1</h1>\
|
||||||
<h2>Title 2</h2><section><p><u>Section 2</u></p></section>\
|
<section><p><b>Section 1</b></p></section>\
|
||||||
<h1>Title 3</h1><section><p><i>Section 3</i></p></section>\
|
<h2>Title 2</h2>\
|
||||||
<h1>Title 4</h1><section><p><code>Section 4</code></p></section>"
|
<section><p><u>Section 2</u></p></section>\
|
||||||
|
<h1>Title 3</h1>\
|
||||||
|
<section><p><i>Section 3</i></p></section>\
|
||||||
|
<h1>Title 4</h1>\
|
||||||
|
<section><p><code>Section 4</code></p></section>"
|
||||||
);
|
);
|
||||||
|
|
||||||
html_test!(
|
html_test!(
|
||||||
|
@ -49,3 +59,9 @@ html_test!(
|
||||||
<li><p>5</p></li>\
|
<li><p>5</p></li>\
|
||||||
</ul></section>"
|
</ul></section>"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
html_test!(
|
||||||
|
snippet,
|
||||||
|
"@@html:<del>@@delete this@@html:</del>@@",
|
||||||
|
"<section><p><del>delete this</del></p></section>"
|
||||||
|
);
|
||||||
|
|
Loading…
Reference in a new issue