feat(parser): drawer parsing

This commit is contained in:
PoiScript 2019-04-07 20:10:43 +08:00
parent fe591d2143
commit 6fa43f7571
8 changed files with 202 additions and 85 deletions

View file

@ -1,37 +1,42 @@
use crate::lines::Lines;
use memchr::memchr2;
use memchr::{memchr, memchr_iter};
// return (name, args, contents-begin, contents-end, end)
#[inline]
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(src.starts_with("#+"));
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(text.starts_with("#+"));
if src.len() <= 8 || src[2..8].to_uppercase() != "BEGIN_" {
if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" {
return None;
}
let name = memchr2(b' ', b'\n', src.as_bytes())
.filter(|&i| src.as_bytes()[8..i].iter().all(u8::is_ascii_alphabetic))?;
let mut lines = Lines::new(src);
let (pre_limit, begin, _) = lines.next()?;
let args = if pre_limit == name {
None
} else {
Some(&src[name..pre_limit])
};
let name = &src[8..name];
let end_line = format!(r"#+END_{}", name.to_uppercase());
let mut pre_end = begin;
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', text.as_bytes());
for (_, end, line) in lines {
if line.trim() == end_line {
return Some((name, args, begin, pre_end, end));
} else {
pre_end = end;
let (name, para, off) = lines
.next()
.map(|i| {
memchr(b' ', &bytes[8..i])
.map(|x| (&text[8..8 + x], Some(text[8 + x..i].trim()), i + 1))
.unwrap_or((&text[8..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let mut pos = off;
let end = format!(r"#+END_{}", name.to_uppercase());
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(&end) {
return Some((name, para, off, pos, i + 1));
}
pos = i + 1;
}
None
if text[pos..].trim().eq_ignore_ascii_case(&end) {
Some((name, para, off, pos, text.len()))
} else {
None
}
}
#[cfg(test)]
@ -42,19 +47,23 @@ mod tests {
assert_eq!(
parse("#+BEGIN_SRC\n#+END_SRC"),
Some(("SRC", None, 12, 12, 21))
Some((
"SRC",
None,
"#+BEGIN_SRC\n".len(),
"#+BEGIN_SRC\n".len(),
"#+BEGIN_SRC\n#+END_SRC".len()
))
);
assert_eq!(
parse(
r#"#+BEGIN_SRC rust
fn main() {
// print "Hello World!" to the console
println!("Hello World!");
}
#+END_SRC
"#
),
Some(("SRC", Some(" rust"), 17, 104, 114))
parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"),
Some((
"SRC",
Some("javascript"),
"#+BEGIN_SRC javascript \n".len(),
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n".len(),
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n".len()
))
);
// TODO: more testing
}

51
src/elements/drawer.rs Normal file
View file

@ -0,0 +1,51 @@
use memchr::memchr_iter;
// return (name, offset, limit, end)
pub(crate) fn parse<'a>(text: &'a str) -> Option<(&'a str, usize, usize, usize)> {
debug_assert!(text.starts_with(':'));
let mut lines = memchr_iter(b'\n', text.as_bytes());
let (name, off) = lines
.next()
.map(|i| (text[1..i].trim_end(), i + 1))
.filter(|(name, _)| {
name.ends_with(':')
&& name[0..name.len() - 1]
.as_bytes()
.iter()
.all(|&c| c.is_ascii_alphabetic() || c == b'-' || c == b'_')
})?;
let mut pos = off;
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(":END:") {
return Some((&name[0..name.len() - 1], off, pos, i + 1));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case(":END:") {
Some((&name[0..name.len() - 1], off, pos, text.len()))
} else {
None
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
Some((
"PROPERTIES",
":PROPERTIES:\n".len(),
":PROPERTIES:\n :CUSTOM_ID: id\n".len(),
":PROPERTIES:\n :CUSTOM_ID: id\n :END:".len()
))
)
}
}

View file

@ -1,40 +1,41 @@
use crate::lines::Lines;
use memchr::{memchr, memchr2};
use memchr::{memchr, memchr_iter};
/// return (name, parameters, contents-begin, contents-end, end)
// return (name, parameters, offset, limit, end)
#[inline]
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(src.starts_with("#+"));
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(text.starts_with("#+"));
if src.len() <= 9 || !src[2..9].eq_ignore_ascii_case("BEGIN: ") {
if text.len() <= 9 || !text[2..9].eq_ignore_ascii_case("BEGIN: ") {
return None;
}
let mut lines = Lines::new(src);
let (mut pre_limit, _, _) = lines.next()?;
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', bytes);
for (limit, end, line) in lines {
if line.trim().eq_ignore_ascii_case("#+END:") {
let bytes = src.as_bytes();
let (name, para, off) = lines
.next()
.map(|i| {
memchr(b' ', &bytes[9..i])
.map(|x| (&text[9..9 + x], Some(text[9 + x..i].trim()), i + 1))
.unwrap_or((&text[9..i], None, i + 1))
})
.filter(|(name, _, _)| name.as_bytes().iter().all(|&c| c.is_ascii_alphabetic()))?;
let i = memchr2(b' ', b'\n', &bytes[9..])
.map(|i| i + 9)
.filter(|&i| bytes[9..i].iter().all(|&c| c.is_ascii_alphabetic()))?;
let name = &src[8..i].trim();
let mut pos = off;
return Some(if bytes[i] == b'\n' {
(name, None, i, pre_limit, end)
} else {
let begin = memchr(b'\n', bytes)
.map(|i| i + 1)
.unwrap_or_else(|| src.len());
(name, Some(&src[i..begin].trim()), begin, pre_limit, end)
});
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case("#+END:") {
return Some((name, para, off, pos, i + 1));
}
pre_limit = limit;
pos = i + 1;
}
None
if text[pos..].trim().eq_ignore_ascii_case("#+END:") {
Some((name, para, off, pos, text.len()))
} else {
None
}
}
#[cfg(test)]
@ -45,13 +46,14 @@ mod tests {
// TODO: testing
assert_eq!(
parse(
r"#+BEGIN: clocktable :scope file
CONTENTS
#+END:
"
),
Some(("clocktable", Some(":scope file"), 32, 40, 48))
parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"),
Some((
"clocktable",
Some(":scope file"),
"#+BEGIN: clocktable :scope file\n".len(),
"#+BEGIN: clocktable :scope file\nCONTENTS\n".len(),
"#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n".len(),
))
);
}
}

View file

@ -1,4 +1,5 @@
use crate::lines::Lines;
use memchr::memchr;
#[inline]
pub fn is_item(text: &str) -> Option<bool> {
@ -33,7 +34,7 @@ pub fn is_item(text: &str) -> Option<bool> {
}
}
// returns (bullets, contents begin, contents end, end, has more)
// return (bullets, offset, limit, end, has more)
#[inline]
pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
debug_assert!(
@ -50,7 +51,7 @@ pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
let mut lines = Lines::new(src);
let (mut pre_limit, mut pre_end, first_line) = lines.next().unwrap();
let begin = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) {
let begin = match memchr(b' ', &first_line.as_bytes()[ident..]) {
Some(i) => i + ident + 1,
None => {
let len = first_line.len();

View file

@ -1,5 +1,6 @@
pub(crate) mod block;
pub(crate) mod clock;
pub(crate) mod drawer;
pub(crate) mod dyn_block;
pub(crate) mod fn_def;
pub(crate) mod keyword;

View file

@ -28,6 +28,12 @@ pub trait HtmlHandler<W: Write, E: From<Error>> {
fn section_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</section>")?)
}
fn drawer_beg(&mut self, w: &mut W, name: &str) -> Result<(), E> {
Ok(())
}
fn drawer_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(())
}
fn paragraph_beg(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "<p>")?)
}

View file

@ -10,6 +10,8 @@ macro_rules! handle_event {
SectionEnd => $handler.section_end($writer)?,
ParagraphBeg => $handler.paragraph_beg($writer)?,
ParagraphEnd => $handler.paragraph_end($writer)?,
DrawerBeg(n) => $handler.drawer_beg($writer, n)?,
DrawerEnd => $handler.drawer_end($writer)?,
CtrBlockBeg => $handler.ctr_block_beg($writer)?,
CtrBlockEnd => $handler.ctr_block_end($writer)?,
QteBlockBeg => $handler.qte_block_beg($writer)?,

View file

@ -9,6 +9,7 @@ use memchr::memchr_iter;
enum Container {
Headline(usize),
Section(usize),
Drawer,
Paragraph,
CtrBlock,
QteBlock,
@ -92,6 +93,9 @@ pub enum Event<'a> {
Planning(Planning<'a>),
DrawerBeg(&'a str),
DrawerEnd,
TableStart,
TableEnd,
TableCell,
@ -243,16 +247,16 @@ impl<'a> Parser<'a> {
.unwrap_or_else(|| {
let mut pos = 0;
for off in memchr_iter(b'\n', tail.as_bytes()) {
if tail.as_bytes()[pos + 1..off]
if tail.as_bytes()[pos..off]
.iter()
.all(u8::is_ascii_whitespace)
{
return (Event::ParagraphBeg, 0, pos + start, off + start);
} else if let Some(buf) = self.real_next_ele(&tail[pos + 1..]) {
} else if let Some(buf) = self.real_next_ele(&tail[pos..]) {
self.ele_buf = Some(buf);
return (Event::ParagraphBeg, 0, pos + start, pos + start);
}
pos = off;
pos = off + 1;
}
let len = text.len();
(
@ -263,9 +267,17 @@ impl<'a> Parser<'a> {
)
});
debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len());
debug_assert!(
(limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()),
"{} <= {} <= {} <= {}",
off,
limit,
end,
text.len()
);
match ele {
Event::DrawerBeg(_) => self.push_stack(Container::Drawer, limit, end),
Event::ParagraphBeg => self.push_stack(Container::Paragraph, limit, end),
Event::QteBlockBeg => self.push_stack(Container::QteBlock, limit, end),
Event::CtrBlockBeg => self.push_stack(Container::CtrBlock, limit, end),
@ -309,7 +321,18 @@ impl<'a> Parser<'a> {
if tail.starts_with("-----") {
let off = rule::parse(tail);
if off != 0 {
return Some((Event::Rule, off, 0, 0));
return Some((Event::Rule, off + line_begin, 0, 0));
}
}
if tail.starts_with(':') {
if let Some((name, off, limit, end)) = drawer::parse(tail) {
return Some((
Event::DrawerBeg(name),
off + line_begin,
limit + line_begin,
end + line_begin,
));
}
}
@ -344,27 +367,47 @@ impl<'a> Parser<'a> {
.map(|(name, args, begin, limit, end)| {
let cont = &tail[begin..limit];
match &*name.to_uppercase() {
"COMMENT" => (Event::CommentBlock { args, cont }, end, 0, 0),
"EXAMPLE" => (Event::ExampleBlock { args, cont }, end, 0, 0),
"EXPORT" => (Event::ExportBlock { args, cont }, end, 0, 0),
"SRC" => (Event::SrcBlock { args, cont }, end, 0, 0),
"VERSE" => (Event::VerseBlock { args, cont }, end, 0, 0),
"CENTER" => (Event::CtrBlockBeg, begin, limit, end),
"QUOTE" => (Event::QteBlockBeg, begin, limit, end),
_ => (Event::SplBlockBeg { name, args }, begin, limit, end),
"COMMENT" => (Event::CommentBlock { args, cont }, end + line_begin, 0, 0),
"EXAMPLE" => (Event::ExampleBlock { args, cont }, end + line_begin, 0, 0),
"EXPORT" => (Event::ExportBlock { args, cont }, end + line_begin, 0, 0),
"SRC" => (Event::SrcBlock { args, cont }, end + line_begin, 0, 0),
"VERSE" => (Event::VerseBlock { args, cont }, end + line_begin, 0, 0),
"CENTER" => (
Event::CtrBlockBeg,
begin + line_begin,
limit + line_begin,
end + line_begin,
),
"QUOTE" => (
Event::QteBlockBeg,
begin + line_begin,
limit + line_begin,
end + line_begin,
),
_ => (
Event::SplBlockBeg { name, args },
begin + line_begin,
limit + line_begin,
end + line_begin,
),
}
})
.or_else(|| {
dyn_block::parse(tail).map(|(name, args, begin, limit, end)| {
(Event::DynBlockBeg { name, args }, begin, limit, end)
(
Event::DynBlockBeg { name, args },
begin + line_begin,
limit + line_begin,
end + line_begin,
)
})
})
.or_else(|| {
keyword::parse(tail).map(|(key, value, off)| {
if let Key::Call = key {
(Event::Call { value }, off, 0, 0)
(Event::Call { value }, off + line_begin, 0, 0)
} else {
(Event::Keyword { key, value }, off, 0, 0)
(Event::Keyword { key, value }, off + line_begin, 0, 0)
}
})
})
@ -510,6 +553,7 @@ impl<'a> Parser<'a> {
let (container, _, _) = self.stack.pop().unwrap();
match container {
Container::Bold => Event::BoldEnd,
Container::Drawer => Event::DrawerEnd,
Container::CtrBlock => Event::CtrBlockEnd,
Container::DynBlock => Event::DynBlockEnd,
Container::Headline(_) => Event::HeadlineEnd,
@ -553,7 +597,8 @@ impl<'a> Iterator for Parser<'a> {
self.next_headline(tail)
}
}
Container::DynBlock
Container::Drawer
| Container::DynBlock
| Container::CtrBlock
| Container::QteBlock
| Container::SplBlock