feat(parser): timestamp parsing

This commit is contained in:
PoiScript 2019-04-04 21:08:23 +08:00
parent 1f52e75d3d
commit 1bb5286dd3
12 changed files with 887 additions and 1023 deletions

View file

@ -32,7 +32,7 @@ impl From<FromUtf8Error> for Error {
type Result = std::result::Result<(), Error>; type Result = std::result::Result<(), Error>;
impl<W: Write> HtmlHandler<W, Error> for CustomHtmlHandler { impl<W: Write> HtmlHandler<W, Error> for CustomHtmlHandler {
fn handle_headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result { fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result {
if hdl.level > 6 { if hdl.level > 6 {
Err(Error::Heading) Err(Error::Heading)
} else { } else {

View file

@ -1,429 +1,8 @@
pub mod block; pub(crate) mod block;
pub mod dyn_block; pub(crate) mod dyn_block;
pub mod fn_def; pub(crate) mod fn_def;
pub mod keyword; pub(crate) mod keyword;
pub mod list; pub(crate) mod list;
pub mod rule; pub(crate) mod rule;
pub use self::keyword::Key; pub use self::keyword::Key;
use memchr::memchr_iter;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Element<'a> {
Paragraph {
cont_end: usize,
end: usize,
},
Keyword {
key: Key<'a>,
value: &'a str,
},
Call {
value: &'a str,
},
FnDef {
label: &'a str,
cont: &'a str,
},
CtrBlock {
args: Option<&'a str>,
cont_end: usize,
end: usize,
},
QteBlock {
args: Option<&'a str>,
cont_end: usize,
end: usize,
},
SplBlock {
args: Option<&'a str>,
name: &'a str,
cont_end: usize,
end: usize,
},
CommentBlock {
args: Option<&'a str>,
cont: &'a str,
},
ExampleBlock {
args: Option<&'a str>,
cont: &'a str,
},
ExportBlock {
args: Option<&'a str>,
cont: &'a str,
},
SrcBlock {
args: Option<&'a str>,
cont: &'a str,
},
VerseBlock {
args: Option<&'a str>,
cont: &'a str,
},
DynBlock {
args: Option<&'a str>,
name: &'a str,
cont_end: usize,
end: usize,
},
Rule,
Comment(&'a str),
FixedWidth(&'a str),
List {
ident: usize,
ordered: bool,
},
// Element::Empty actually means Option<Element>::None
Empty,
}
// return (element, off, next element, next offset)
// the end of first element is relative to the offset
// next offset is relative to the end of the first element
pub fn parse(src: &str) -> (Element<'_>, usize, Option<(Element<'_>, usize)>) {
// skip empty lines
let mut pos = match src.chars().position(|c| c != '\n') {
Some(pos) => pos,
None => return (Element::Empty, src.len(), None),
};
let start = pos;
let bytes = src.as_bytes();
let mut line_ends = memchr_iter(b'\n', &bytes[start..]).map(|i| i + start);
loop {
let line_beg = pos;
macro_rules! brk {
($ele:expr, $off:expr) => {
break if line_beg == start || pos == start {
($ele, pos + $off, None)
} else {
(
Element::Paragraph {
cont_end: line_beg - start - 1,
end: line_beg - start,
},
start,
Some(($ele, $off)),
)
};
};
}
let tail = &src[pos..];
// Unlike other element, footnote def must starts at column 0
if tail.starts_with("[fn:") {
if let Some((label, cont, off)) = fn_def::parse(tail) {
brk!(Element::FnDef { label, cont }, off + 1);
}
}
if bytes[pos] == b'\n' {
break (
Element::Paragraph {
cont_end: pos - start - 1,
end: pos - start + 1,
},
start,
None,
);
}
pos = skip_space!(src, pos);
let tail = &src[pos..];
let (is_item, ordered) = list::is_item(tail);
if is_item {
let list = Element::List {
ident: pos - line_beg,
ordered,
};
break if line_beg == start {
(list, start, None)
} else {
(
Element::Paragraph {
cont_end: line_beg - start - 1,
end: line_beg - start,
},
start,
Some((list, 0)),
)
};
}
// TODO: LaTeX environment
if tail.starts_with("\\begin{") {}
// rule
if tail.starts_with("-----") {
let off = rule::parse(tail);
if off != 0 {
brk!(Element::Rule, off);
}
}
// fixed width
if tail.starts_with(": ") || tail.starts_with(":\n") {
let end = line_ends
.skip_while(|&i| src[i + 1..].starts_with(": ") || src[i + 1..].starts_with(":\n"))
.next()
.map(|i| i + 1)
.unwrap_or_else(|| src.len());
let off = end - pos;
brk!(Element::FixedWidth(&tail[0..off]), off);
}
// comment
if tail.starts_with("# ") || tail.starts_with("#\n") {
let end = line_ends
.skip_while(|&i| src[i + 1..].starts_with("# ") || src[i + 1..].starts_with("#\n"))
.next()
.map(|i| i + 1)
.unwrap_or_else(|| src.len());
let off = end - pos;
brk!(Element::Comment(&tail[0..off]), off);
}
if tail.starts_with("#+") {
if let Some((name, args, cont_beg, cont_end, end)) = block::parse(tail) {
let cont = &tail[cont_beg..cont_end];
match &*name.to_uppercase() {
"COMMENT" => brk!(Element::CommentBlock { args, cont }, end),
"EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end),
"EXPORT" => brk!(Element::ExportBlock { args, cont }, end),
"SRC" => brk!(Element::SrcBlock { args, cont }, end),
"VERSE" => brk!(Element::VerseBlock { args, cont }, end),
"CENTER" => brk!(
Element::CtrBlock {
args,
cont_end: cont_end - cont_beg,
end: end - cont_beg,
},
cont_beg
),
"QUOTE" => brk!(
Element::QteBlock {
args,
cont_end: cont_end - cont_beg,
end: end - cont_beg,
},
cont_beg
),
_ => brk!(
Element::SplBlock {
name,
args,
cont_end: cont_end - cont_beg,
end: end - cont_beg,
},
cont_beg
),
};
}
if let Some((name, args, cont_beg, cont_end, end)) = dyn_block::parse(tail) {
brk!(
Element::DynBlock {
name,
args,
cont_end: cont_end - cont_beg,
end: end - cont_beg,
},
cont_beg
)
}
if let Some((key, value, off)) = keyword::parse(tail) {
brk!(
if let Key::Call = key {
Element::Call { value }
} else {
Element::Keyword { key, value }
},
off
)
}
}
// move to the beginning of the next line
if let Some(off) = line_ends.next() {
pos = off + 1;
// the last character
if pos >= src.len() {
break (
Element::Paragraph {
cont_end: src.len() - start - 1,
end: src.len() - start,
},
start,
None,
);
}
} else {
break (
Element::Paragraph {
cont_end: src.len() - start,
end: src.len() - start,
},
start,
None,
);
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::keyword::Key;
use super::parse;
use super::Element::*;
assert_eq!(parse("\n\n\n"), (Empty, 3, None));
let len = "Lorem ipsum dolor sit amet.".len();
assert_eq!(
parse("\nLorem ipsum dolor sit amet.\n\n\n"),
(
Paragraph {
cont_end: len,
end: len + 2,
},
1,
None
)
);
assert_eq!(
parse("\n\nLorem ipsum dolor sit amet.\n\n"),
(
Paragraph {
cont_end: len,
end: len + 2,
},
2,
None
)
);
assert_eq!(
parse("\nLorem ipsum dolor sit amet.\n"),
(
Paragraph {
cont_end: len,
end: len + 1,
},
1,
None
)
);
assert_eq!(
parse("\n\n\nLorem ipsum dolor sit amet."),
(
Paragraph {
cont_end: len,
end: len,
},
3,
None
)
);
assert_eq!(
parse("\n\n\n: Lorem ipsum dolor sit amet.\n"),
(
FixedWidth(": Lorem ipsum dolor sit amet.\n"),
"\n\n\n: Lorem ipsum dolor sit amet.\n".len(),
None
)
);
assert_eq!(
parse("\n\n\n: Lorem ipsum dolor sit amet."),
(
FixedWidth(": Lorem ipsum dolor sit amet."),
"\n\n\n: Lorem ipsum dolor sit amet.".len(),
None
)
);
assert_eq!(
parse("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n"),
(
Paragraph {
cont_end: len,
end: len + 1,
},
2,
Some((FixedWidth(": Lorem ipsum dolor sit amet.\n"), 30))
)
);
assert_eq!(
parse("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n:\n: Lorem ipsum dolor sit amet."),
(
Paragraph {
cont_end: len,
end: len + 1,
},
2,
Some((FixedWidth(": Lorem ipsum dolor sit amet.\n:\n: Lorem ipsum dolor sit amet."), 61))
)
);
assert_eq!(
parse("\n\nLorem ipsum dolor sit amet.\n+ Lorem ipsum dolor sit amet.\n"),
(
Paragraph {
cont_end: len,
end: len + 1,
},
2,
Some((
List {
ident: 0,
ordered: false,
},
0
))
)
);
assert_eq!(
parse("\n\nLorem ipsum dolor sit amet.\n#+BEGIN_QUOTE\nLorem ipsum dolor sit amet.\n#+END_QUOTE\n"),
(
Paragraph {
cont_end: len,
end: len + 1,
},
2,
Some((
QteBlock {
args: None,
cont_end: len + 1,
end: len + 1 + "#+END_QUOTE\n".len()
},
"#+BEGIN_QUOTE\n".len()
))
)
);
assert_eq!(
parse("\n #+ATTR_HTML: :width 200px"),
(
Keyword {
key: Key::Attr { backend: "HTML" },
value: ":width 200px"
},
"\n #+ATTR_HTML: :width 200px".len(),
None
)
);
// TODO: more tests
}
}

View file

@ -2,7 +2,7 @@
use crate::elements::Key; use crate::elements::Key;
use crate::headline::Headline; use crate::headline::Headline;
use crate::objects::Cookie; use crate::objects::{Cookie, Timestamp};
use crate::parser::Parser; use crate::parser::Parser;
use jetscii::ascii_chars; use jetscii::ascii_chars;
use std::convert::From; use std::convert::From;
@ -11,94 +11,94 @@ use std::io::{Error, Write};
use std::marker::PhantomData; use std::marker::PhantomData;
pub trait HtmlHandler<W: Write, E: From<Error>> { pub trait HtmlHandler<W: Write, E: From<Error>> {
fn handle_headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), E> { fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<(), E> {
let level = if hdl.level <= 6 { hdl.level } else { 6 }; let level = if hdl.level <= 6 { hdl.level } else { 6 };
Ok(write!(w, "<h{0}>{1}</h{0}>", level, Escape(hdl.title))?) Ok(write!(w, "<h{0}>{1}</h{0}>", level, Escape(hdl.title))?)
} }
fn handle_headline_end(&mut self, w: &mut W) -> Result<(), E> { fn headline_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_section_beg(&mut self, w: &mut W) -> Result<(), E> { fn section_beg(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "<section>")?) Ok(write!(w, "<section>")?)
} }
fn handle_section_end(&mut self, w: &mut W) -> Result<(), E> { fn section_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</section>")?) Ok(write!(w, "</section>")?)
} }
fn handle_paragraph_beg(&mut self, w: &mut W) -> Result<(), E> { fn paragraph_beg(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "<p>")?) Ok(write!(w, "<p>")?)
} }
fn handle_paragraph_end(&mut self, w: &mut W) -> Result<(), E> { fn paragraph_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</p>")?) Ok(write!(w, "</p>")?)
} }
fn handle_ctr_block_beg(&mut self, w: &mut W) -> Result<(), E> { fn ctr_block_beg(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, r#"<div style="text-align: center">"#)?) Ok(write!(w, r#"<div style="text-align: center">"#)?)
} }
fn handle_ctr_block_end(&mut self, w: &mut W) -> Result<(), E> { fn ctr_block_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</div>")?) Ok(write!(w, "</div>")?)
} }
fn handle_qte_block_beg(&mut self, w: &mut W) -> Result<(), E> { fn qte_block_beg(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "<blockquote>")?) Ok(write!(w, "<blockquote>")?)
} }
fn handle_qte_block_end(&mut self, w: &mut W) -> Result<(), E> { fn qte_block_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</blockquote>")?) Ok(write!(w, "</blockquote>")?)
} }
fn handle_spl_block_beg(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> { fn spl_block_beg(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> {
Ok(write!(w, "<div>")?) Ok(write!(w, "<div>")?)
} }
fn handle_spl_block_end(&mut self, w: &mut W) -> Result<(), E> { fn spl_block_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</div>")?) Ok(write!(w, "</div>")?)
} }
fn handle_comment_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { fn comment_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_example_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { fn example_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> {
Ok(write!(w, "<pre><code>{}</code></pre>", Escape(cont))?) Ok(write!(w, "<pre><code>{}</code></pre>", Escape(cont))?)
} }
fn handle_export_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { fn export_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_src_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { fn src_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> {
Ok(write!(w, "<pre><code>{}</code></pre>", Escape(cont))?) Ok(write!(w, "<pre><code>{}</code></pre>", Escape(cont))?)
} }
fn handle_verse_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> { fn verse_block(&mut self, w: &mut W, cont: &str, args: Option<&str>) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_dyn_block_beg(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> { fn dyn_block_beg(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_dyn_block_end(&mut self, w: &mut W) -> Result<(), E> { fn dyn_block_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_list_beg(&mut self, w: &mut W, ordered: bool) -> Result<(), E> { fn list_beg(&mut self, w: &mut W, ordered: bool) -> Result<(), E> {
if ordered { if ordered {
Ok(write!(w, "<ol>")?) Ok(write!(w, "<ol>")?)
} else { } else {
Ok(write!(w, "<ul>")?) Ok(write!(w, "<ul>")?)
} }
} }
fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<(), E> { fn list_end(&mut self, w: &mut W, ordered: bool) -> Result<(), E> {
if ordered { if ordered {
Ok(write!(w, "</ol>")?) Ok(write!(w, "</ol>")?)
} else { } else {
Ok(write!(w, "</ul>")?) Ok(write!(w, "</ul>")?)
} }
} }
fn handle_list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<(), E> { fn list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<(), E> {
Ok(write!(w, "<li>")?) Ok(write!(w, "<li>")?)
} }
fn handle_list_end_item(&mut self, w: &mut W) -> Result<(), E> { fn list_end_item(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</li>")?) Ok(write!(w, "</li>")?)
} }
fn handle_call(&mut self, w: &mut W, value: &str) -> Result<(), E> { fn call(&mut self, w: &mut W, value: &str) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_clock(&mut self, w: &mut W) -> Result<(), E> { fn clock(&mut self, w: &mut W) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_comment(&mut self, w: &mut W, cont: &str) -> Result<(), E> { fn comment(&mut self, w: &mut W, cont: &str) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_fixed_width(&mut self, w: &mut W, cont: &str) -> Result<(), E> { fn fixed_width(&mut self, w: &mut W, cont: &str) -> Result<(), E> {
for line in cont.lines() { for line in cont.lines() {
// remove leading colon // remove leading colon
write!(w, "<pre>{}</pre>", Escape(&line[1..]))?; write!(w, "<pre>{}</pre>", Escape(&line[1..]))?;
@ -106,39 +106,34 @@ pub trait HtmlHandler<W: Write, E: From<Error>> {
Ok(()) Ok(())
} }
fn handle_table_start(&mut self, w: &mut W) -> Result<(), E> { fn table_start(&mut self, w: &mut W) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_table_end(&mut self, w: &mut W) -> Result<(), E> { fn table_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_table_cell(&mut self, w: &mut W) -> Result<(), E> { fn table_cell(&mut self, w: &mut W) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_latex_env(&mut self, w: &mut W) -> Result<(), E> { fn latex_env(&mut self, w: &mut W) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_fn_def(&mut self, w: &mut W, label: &str, cont: &str) -> Result<(), E> { fn fn_def(&mut self, w: &mut W, label: &str, cont: &str) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_keyword(&mut self, w: &mut W, key: Key<'_>, value: &str) -> Result<(), E> { fn keyword(&mut self, w: &mut W, key: Key<'_>, value: &str) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_rule(&mut self, w: &mut W) -> Result<(), E> { fn rule(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "<hr>")?) Ok(write!(w, "<hr>")?)
} }
fn handle_cookie(&mut self, w: &mut W, cookie: Cookie) -> Result<(), E> { fn cookie(&mut self, w: &mut W, cookie: Cookie) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_fn_ref( fn fn_ref(&mut self, w: &mut W, label: Option<&str>, def: Option<&str>) -> Result<(), E> {
&mut self,
w: &mut W,
label: Option<&str>,
def: Option<&str>,
) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_inline_call( fn inline_call(
&mut self, &mut self,
w: &mut W, w: &mut W,
name: &str, name: &str,
@ -148,7 +143,7 @@ pub trait HtmlHandler<W: Write, E: From<Error>> {
) -> Result<(), E> { ) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_inline_src( fn inline_src(
&mut self, &mut self,
w: &mut W, w: &mut W,
lang: &str, lang: &str,
@ -157,7 +152,7 @@ pub trait HtmlHandler<W: Write, E: From<Error>> {
) -> Result<(), E> { ) -> Result<(), E> {
Ok(write!(w, "<code>{}</code>", Escape(body))?) Ok(write!(w, "<code>{}</code>", Escape(body))?)
} }
fn handle_link(&mut self, w: &mut W, path: &str, desc: Option<&str>) -> Result<(), E> { fn link(&mut self, w: &mut W, path: &str, desc: Option<&str>) -> Result<(), E> {
if let Some(desc) = desc { if let Some(desc) = desc {
Ok(write!( Ok(write!(
w, w,
@ -169,53 +164,56 @@ pub trait HtmlHandler<W: Write, E: From<Error>> {
Ok(write!(w, r#"<a href="{0}">{0}</a>"#, Escape(path))?) Ok(write!(w, r#"<a href="{0}">{0}</a>"#, Escape(path))?)
} }
} }
fn handle_macros(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> { fn macros(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_radio_target(&mut self, w: &mut W, target: &str) -> Result<(), E> { fn radio_target(&mut self, w: &mut W, target: &str) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_snippet(&mut self, w: &mut W, name: &str, value: &str) -> Result<(), E> { fn snippet(&mut self, w: &mut W, name: &str, value: &str) -> Result<(), E> {
if name.eq_ignore_ascii_case("HTML") { if name.eq_ignore_ascii_case("HTML") {
Ok(write!(w, "{}", value)?) Ok(write!(w, "{}", value)?)
} else { } else {
Ok(()) Ok(())
} }
} }
fn handle_target(&mut self, w: &mut W, target: &str) -> Result<(), E> { fn target(&mut self, w: &mut W, target: &str) -> Result<(), E> {
Ok(()) Ok(())
} }
fn handle_bold_beg(&mut self, w: &mut W) -> Result<(), E> { fn timestamp(&mut self, w: &mut W, timestamp: Timestamp) -> Result<(), E> {
Ok(())
}
fn bold_beg(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "<b>")?) Ok(write!(w, "<b>")?)
} }
fn handle_bold_end(&mut self, w: &mut W) -> Result<(), E> { fn bold_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</b>")?) Ok(write!(w, "</b>")?)
} }
fn handle_italic_beg(&mut self, w: &mut W) -> Result<(), E> { fn italic_beg(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "<i>")?) Ok(write!(w, "<i>")?)
} }
fn handle_italic_end(&mut self, w: &mut W) -> Result<(), E> { fn italic_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</i>")?) Ok(write!(w, "</i>")?)
} }
fn handle_strike_beg(&mut self, w: &mut W) -> Result<(), E> { fn strike_beg(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "<s>")?) Ok(write!(w, "<s>")?)
} }
fn handle_strike_end(&mut self, w: &mut W) -> Result<(), E> { fn strike_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</s>")?) Ok(write!(w, "</s>")?)
} }
fn handle_underline_beg(&mut self, w: &mut W) -> Result<(), E> { fn underline_beg(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "<u>")?) Ok(write!(w, "<u>")?)
} }
fn handle_underline_end(&mut self, w: &mut W) -> Result<(), E> { fn underline_end(&mut self, w: &mut W) -> Result<(), E> {
Ok(write!(w, "</u>")?) Ok(write!(w, "</u>")?)
} }
fn handle_verbatim(&mut self, w: &mut W, cont: &str) -> Result<(), E> { fn verbatim(&mut self, w: &mut W, cont: &str) -> Result<(), E> {
Ok(write!(w, "<code>{}</code>", Escape(cont))?) Ok(write!(w, "<code>{}</code>", Escape(cont))?)
} }
fn handle_code(&mut self, w: &mut W, cont: &str) -> Result<(), E> { fn code(&mut self, w: &mut W, cont: &str) -> Result<(), E> {
Ok(write!(w, "<code>{}</code>", Escape(cont))?) Ok(write!(w, "<code>{}</code>", Escape(cont))?)
} }
fn handle_text(&mut self, w: &mut W, cont: &str) -> Result<(), E> { fn text(&mut self, w: &mut W, cont: &str) -> Result<(), E> {
Ok(write!(w, "{}", Escape(cont))?) Ok(write!(w, "{}", Escape(cont))?)
} }
} }

View file

@ -4,67 +4,66 @@ macro_rules! handle_event {
use crate::parser::Event::*; use crate::parser::Event::*;
match $event { match $event {
HeadlineBeg(hdl) => $handler.handle_headline_beg($writer, hdl)?, HeadlineBeg(hdl) => $handler.headline_beg($writer, hdl)?,
HeadlineEnd => $handler.handle_headline_end($writer)?, HeadlineEnd => $handler.headline_end($writer)?,
SectionBeg => $handler.handle_section_beg($writer)?, SectionBeg => $handler.section_beg($writer)?,
SectionEnd => $handler.handle_section_end($writer)?, SectionEnd => $handler.section_end($writer)?,
ParagraphBeg => $handler.handle_paragraph_beg($writer)?, ParagraphBeg => $handler.paragraph_beg($writer)?,
ParagraphEnd => $handler.handle_paragraph_end($writer)?, ParagraphEnd => $handler.paragraph_end($writer)?,
CtrBlockBeg => $handler.handle_ctr_block_beg($writer)?, CtrBlockBeg => $handler.ctr_block_beg($writer)?,
CtrBlockEnd => $handler.handle_ctr_block_end($writer)?, CtrBlockEnd => $handler.ctr_block_end($writer)?,
QteBlockBeg => $handler.handle_qte_block_beg($writer)?, QteBlockBeg => $handler.qte_block_beg($writer)?,
QteBlockEnd => $handler.handle_qte_block_end($writer)?, QteBlockEnd => $handler.qte_block_end($writer)?,
SplBlockBeg { name, args } => $handler.handle_spl_block_beg($writer, name, args)?, SplBlockBeg { name, args } => $handler.spl_block_beg($writer, name, args)?,
SplBlockEnd => $handler.handle_spl_block_end($writer)?, SplBlockEnd => $handler.spl_block_end($writer)?,
CommentBlock { cont, args } => $handler.handle_comment_block($writer, cont, args)?, CommentBlock { cont, args } => $handler.comment_block($writer, cont, args)?,
ExampleBlock { cont, args } => $handler.handle_example_block($writer, cont, args)?, ExampleBlock { cont, args } => $handler.example_block($writer, cont, args)?,
ExportBlock { cont, args } => $handler.handle_export_block($writer, cont, args)?, ExportBlock { cont, args } => $handler.export_block($writer, cont, args)?,
SrcBlock { cont, args } => $handler.handle_src_block($writer, cont, args)?, SrcBlock { cont, args } => $handler.src_block($writer, cont, args)?,
VerseBlock { cont, args } => $handler.handle_verse_block($writer, cont, args)?, VerseBlock { cont, args } => $handler.verse_block($writer, cont, args)?,
DynBlockBeg { name, args } => $handler.handle_dyn_block_beg($writer, name, args)?, DynBlockBeg { name, args } => $handler.dyn_block_beg($writer, name, args)?,
DynBlockEnd => $handler.handle_dyn_block_end($writer)?, DynBlockEnd => $handler.dyn_block_end($writer)?,
ListBeg { ordered } => $handler.handle_list_beg($writer, ordered)?, ListBeg { ordered } => $handler.list_beg($writer, ordered)?,
ListEnd { ordered } => $handler.handle_list_end($writer, ordered)?, ListEnd { ordered } => $handler.list_end($writer, ordered)?,
ListItemBeg { bullet } => $handler.handle_list_beg_item($writer, bullet)?, ListItemBeg { bullet } => $handler.list_beg_item($writer, bullet)?,
ListItemEnd => $handler.handle_list_end_item($writer)?, ListItemEnd => $handler.list_end_item($writer)?,
Call { value } => $handler.handle_call($writer, value)?, Call { value } => $handler.call($writer, value)?,
Clock => $handler.handle_clock($writer)?, Clock => $handler.clock($writer)?,
Comment(c) => $handler.handle_comment($writer, c)?, Timestamp(t) => $handler.timestamp($writer, t)?,
FixedWidth(f) => $handler.handle_fixed_width($writer, f)?, Comment(c) => $handler.comment($writer, c)?,
TableStart => $handler.handle_table_start($writer)?, FixedWidth(f) => $handler.fixed_width($writer, f)?,
TableEnd => $handler.handle_table_end($writer)?, TableStart => $handler.table_start($writer)?,
TableCell => $handler.handle_table_cell($writer)?, TableEnd => $handler.table_end($writer)?,
LatexEnv => $handler.handle_latex_env($writer)?, TableCell => $handler.table_cell($writer)?,
FnDef { label, cont } => $handler.handle_fn_def($writer, label, cont)?, LatexEnv => $handler.latex_env($writer)?,
Keyword { key, value } => $handler.handle_keyword($writer, key, value)?, FnDef { label, cont } => $handler.fn_def($writer, label, cont)?,
Rule => $handler.handle_rule($writer)?, Keyword { key, value } => $handler.keyword($writer, key, value)?,
Cookie(cookie) => $handler.handle_cookie($writer, cookie)?, Rule => $handler.rule($writer)?,
FnRef { label, def } => $handler.handle_fn_ref($writer, label, def)?, Cookie(cookie) => $handler.cookie($writer, cookie)?,
InlineSrc { lang, option, body } => { FnRef { label, def } => $handler.fn_ref($writer, label, def)?,
$handler.handle_inline_src($writer, lang, option, body)? InlineSrc { lang, option, body } => $handler.inline_src($writer, lang, option, body)?,
}
InlineCall { InlineCall {
name, name,
args, args,
inside_header, inside_header,
end_header, end_header,
} => $handler.handle_inline_call($writer, name, args, inside_header, end_header)?, } => $handler.inline_call($writer, name, args, inside_header, end_header)?,
Link { path, desc } => $handler.handle_link($writer, path, desc)?, Link { path, desc } => $handler.link($writer, path, desc)?,
Macros { name, args } => $handler.handle_macros($writer, name, args)?, Macros { name, args } => $handler.macros($writer, name, args)?,
RadioTarget { target } => $handler.handle_radio_target($writer, target)?, RadioTarget { target } => $handler.radio_target($writer, target)?,
Snippet { name, value } => $handler.handle_snippet($writer, name, value)?, Snippet { name, value } => $handler.snippet($writer, name, value)?,
Target { target } => $handler.handle_target($writer, target)?, Target { target } => $handler.target($writer, target)?,
BoldBeg => $handler.handle_bold_beg($writer)?, BoldBeg => $handler.bold_beg($writer)?,
BoldEnd => $handler.handle_bold_end($writer)?, BoldEnd => $handler.bold_end($writer)?,
ItalicBeg => $handler.handle_italic_beg($writer)?, ItalicBeg => $handler.italic_beg($writer)?,
ItalicEnd => $handler.handle_italic_end($writer)?, ItalicEnd => $handler.italic_end($writer)?,
StrikeBeg => $handler.handle_strike_beg($writer)?, StrikeBeg => $handler.strike_beg($writer)?,
StrikeEnd => $handler.handle_strike_end($writer)?, StrikeEnd => $handler.strike_end($writer)?,
UnderlineBeg => $handler.handle_underline_beg($writer)?, UnderlineBeg => $handler.underline_beg($writer)?,
UnderlineEnd => $handler.handle_underline_end($writer)?, UnderlineEnd => $handler.underline_end($writer)?,
Verbatim(cont) => $handler.handle_verbatim($writer, cont)?, Verbatim(cont) => $handler.verbatim($writer, cont)?,
Code(cont) => $handler.handle_code($writer, cont)?, Code(cont) => $handler.code($writer, cont)?,
Text(cont) => $handler.handle_text($writer, cont)?, Text(cont) => $handler.text($writer, cont)?,
} }
}; };
} }

View file

@ -2,7 +2,7 @@
use memchr::{memchr, memchr2, memrchr}; use memchr::{memchr, memchr2, memrchr};
const HEADLINE_DEFAULT_KEYWORDS: &[&str] = pub(crate) const DEFAULT_KEYWORDS: &[&str] =
&["TODO", "DONE", "NEXT", "WAITING", "LATER", "CANCELLED"]; &["TODO", "DONE", "NEXT", "WAITING", "LATER", "CANCELLED"];
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
@ -21,28 +21,7 @@ pub struct Headline<'a> {
} }
impl<'a> Headline<'a> { impl<'a> Headline<'a> {
/// parsing the input string and returning the parsed headline pub(crate) fn parse(text: &'a str, keywords: &'a [&'a str]) -> (Headline<'a>, usize, usize) {
/// and the content-begin and the end of headline container.
///
/// ```rust
/// use orgize::headline::Headline;
///
/// let (hdl, _, _) = Headline::parse("* DONE [#A] COMMENT Title :tag:a2%:");
///
/// assert_eq!(hdl.level, 1);
/// assert_eq!(hdl.priority, Some('A'));
/// assert_eq!(hdl.tags, Some(":tag:a2%:"));
/// assert_eq!(hdl.title, "COMMENT Title");
/// assert_eq!(hdl.keyword, Some("DONE"));
/// ```
pub fn parse(text: &'a str) -> (Headline<'a>, usize, usize) {
Self::parse_with_keywords(text, HEADLINE_DEFAULT_KEYWORDS)
}
pub fn parse_with_keywords(
text: &'a str,
keywords: &'a [&'a str],
) -> (Headline<'a>, usize, usize) {
let level = memchr2(b'\n', b' ', text.as_bytes()).unwrap_or_else(|| text.len()); let level = memchr2(b'\n', b' ', text.as_bytes()).unwrap_or_else(|| text.len());
debug_assert!(level > 0); debug_assert!(level > 0);
@ -118,7 +97,7 @@ impl<'a> Headline<'a> {
) )
} }
pub fn find_level(text: &str, level: usize) -> usize { pub(crate) fn find_level(text: &str, level: usize) -> usize {
use jetscii::ByteSubstring; use jetscii::ByteSubstring;
let bytes = text.as_bytes(); let bytes = text.as_bytes();
@ -159,12 +138,12 @@ impl<'a> Headline<'a> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::Headline; use super::*;
#[test] #[test]
fn parse() { fn parse() {
assert_eq!( assert_eq!(
Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:").0, Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:", DEFAULT_KEYWORDS).0,
Headline { Headline {
level: 4, level: 4,
priority: Some('A'), priority: Some('A'),
@ -174,7 +153,7 @@ mod tests {
}, },
); );
assert_eq!( assert_eq!(
Headline::parse("**** ToDO [#A] COMMENT Title :tag:a2%:").0, Headline::parse("**** ToDO [#A] COMMENT Title :tag:a2%:", DEFAULT_KEYWORDS).0,
Headline { Headline {
level: 4, level: 4,
priority: None, priority: None,
@ -184,7 +163,7 @@ mod tests {
}, },
); );
assert_eq!( assert_eq!(
Headline::parse("**** T0DO [#A] COMMENT Title :tag:a2%:").0, Headline::parse("**** T0DO [#A] COMMENT Title :tag:a2%:", DEFAULT_KEYWORDS).0,
Headline { Headline {
level: 4, level: 4,
priority: None, priority: None,
@ -194,7 +173,7 @@ mod tests {
}, },
); );
assert_eq!( assert_eq!(
Headline::parse("**** TODO [#1] COMMENT Title :tag:a2%:").0, Headline::parse("**** TODO [#1] COMMENT Title :tag:a2%:", DEFAULT_KEYWORDS).0,
Headline { Headline {
level: 4, level: 4,
priority: None, priority: None,
@ -204,7 +183,7 @@ mod tests {
}, },
); );
assert_eq!( assert_eq!(
Headline::parse("**** TODO [#a] COMMENT Title :tag:a2%:").0, Headline::parse("**** TODO [#a] COMMENT Title :tag:a2%:", DEFAULT_KEYWORDS).0,
Headline { Headline {
level: 4, level: 4,
priority: None, priority: None,
@ -214,7 +193,7 @@ mod tests {
}, },
); );
assert_eq!( assert_eq!(
Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%").0, Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%", DEFAULT_KEYWORDS).0,
Headline { Headline {
level: 4, level: 4,
priority: Some('A'), priority: Some('A'),
@ -224,7 +203,7 @@ mod tests {
}, },
); );
assert_eq!( assert_eq!(
Headline::parse("**** TODO [#A] COMMENT Title tag:a2%:").0, Headline::parse("**** TODO [#A] COMMENT Title tag:a2%:", DEFAULT_KEYWORDS).0,
Headline { Headline {
level: 4, level: 4,
priority: Some('A'), priority: Some('A'),
@ -234,7 +213,7 @@ mod tests {
}, },
); );
assert_eq!( assert_eq!(
Headline::parse("**** COMMENT Title tag:a2%:").0, Headline::parse("**** COMMENT Title tag:a2%:", DEFAULT_KEYWORDS).0,
Headline { Headline {
level: 4, level: 4,
priority: None, priority: None,
@ -245,7 +224,7 @@ mod tests {
); );
assert_eq!( assert_eq!(
Headline::parse_with_keywords("**** TODO [#A] COMMENT Title :tag:a2%:", &[]).0, Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:", &[]).0,
Headline { Headline {
level: 4, level: 4,
priority: None, priority: None,
@ -255,7 +234,7 @@ mod tests {
}, },
); );
assert_eq!( assert_eq!(
Headline::parse_with_keywords("**** TASK [#A] COMMENT Title :tag:a2%:", &["TASK"]).0, Headline::parse("**** TASK [#A] COMMENT Title :tag:a2%:", &["TASK"]).0,
Headline { Headline {
level: 4, level: 4,
priority: Some('A'), priority: Some('A'),
@ -268,21 +247,43 @@ mod tests {
#[test] #[test]
fn is_commented() { fn is_commented() {
assert!(Headline::parse("* COMMENT Title").0.is_commented()); assert!(Headline::parse("* COMMENT Title", DEFAULT_KEYWORDS)
assert!(!Headline::parse("* Title").0.is_commented()); .0
assert!(!Headline::parse("* C0MMENT Title").0.is_commented()); .is_commented());
assert!(!Headline::parse("* comment Title").0.is_commented()); assert!(!Headline::parse("* Title", DEFAULT_KEYWORDS)
.0
.is_commented());
assert!(!Headline::parse("* C0MMENT Title", DEFAULT_KEYWORDS)
.0
.is_commented());
assert!(!Headline::parse("* comment Title", DEFAULT_KEYWORDS)
.0
.is_commented());
} }
#[test] #[test]
fn is_archived() { fn is_archived() {
assert!(Headline::parse("* Title :ARCHIVE:").0.is_archived()); assert!(Headline::parse("* Title :ARCHIVE:", DEFAULT_KEYWORDS)
assert!(Headline::parse("* Title :tag:ARCHIVE:").0.is_archived()); .0
assert!(Headline::parse("* Title :ARCHIVE:tag:").0.is_archived()); .is_archived());
assert!(!Headline::parse("* Title").0.is_commented()); assert!(Headline::parse("* Title :tag:ARCHIVE:", DEFAULT_KEYWORDS)
assert!(!Headline::parse("* Title :ARCHIVED:").0.is_archived()); .0
assert!(!Headline::parse("* Title :ARCHIVES:").0.is_archived()); .is_archived());
assert!(!Headline::parse("* Title :archive:").0.is_archived()); assert!(Headline::parse("* Title :ARCHIVE:tag:", DEFAULT_KEYWORDS)
.0
.is_archived());
assert!(!Headline::parse("* Title", DEFAULT_KEYWORDS)
.0
.is_commented());
assert!(!Headline::parse("* Title :ARCHIVED:", DEFAULT_KEYWORDS)
.0
.is_archived());
assert!(!Headline::parse("* Title :ARCHIVES:", DEFAULT_KEYWORDS)
.0
.is_archived());
assert!(!Headline::parse("* Title :archive:", DEFAULT_KEYWORDS)
.0
.is_archived());
} }
#[test] #[test]

View file

@ -59,7 +59,7 @@
//! struct CustomHtmlHandler; //! struct CustomHtmlHandler;
//! //!
//! impl<W: Write> HtmlHandler<W, Error> for CustomHtmlHandler { //! impl<W: Write> HtmlHandler<W, Error> for CustomHtmlHandler {
//! fn handle_headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<()> { //! fn headline_beg(&mut self, w: &mut W, hdl: Headline) -> Result<()> {
//! write!( //! write!(
//! w, //! w,
//! r##"<h{0}><a class="anchor" href="#{1}">{2}</a></h{0}>"##, //! r##"<h{0}><a class="anchor" href="#{1}">{2}</a></h{0}>"##,
@ -90,9 +90,6 @@
//! let result = String::from_utf8(cursor.into_inner()).expect("invalid utf-8"); //! let result = String::from_utf8(cursor.into_inner()).expect("invalid utf-8");
//! ``` //! ```
#[macro_use]
mod utils;
pub mod elements; pub mod elements;
pub mod export; pub mod export;
pub mod headline; pub mod headline;

View file

@ -6,9 +6,11 @@ use memchr::memchr2;
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize)> { pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize)> {
debug_assert!(text.starts_with("{{{")); debug_assert!(text.starts_with("{{{"));
expect!(text, 3, |c: u8| c.is_ascii_alphabetic())?;
let bytes = text.as_bytes(); let bytes = text.as_bytes();
if text.len() <= 3 || !bytes[3].is_ascii_alphabetic() {
return None;
}
let (name, off) = memchr2(b'}', b'(', bytes) let (name, off) = memchr2(b'}', b'(', bytes)
.filter(|&i| { .filter(|&i| {
bytes[3..i] bytes[3..i]
@ -18,8 +20,9 @@ pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize)> {
.map(|i| (&text[3..i], i))?; .map(|i| (&text[3..i], i))?;
let (args, off) = if bytes[off] == b'}' { let (args, off) = if bytes[off] == b'}' {
expect!(text, off + 1, b'}')?; if text.len() <= off + 2 || bytes[off + 1] != b'}' || bytes[off + 2] != b'}' {
expect!(text, off + 2, b'}')?; return None;
}
(None, off + 3 /* }}} */) (None, off + 3 /* }}} */)
} else { } else {
Substring::new(")}}}") Substring::new(")}}}")

View file

@ -1,200 +1,14 @@
mod cookie; pub(crate) mod cookie;
mod emphasis; pub(crate) mod emphasis;
mod fn_ref; pub(crate) mod fn_ref;
mod inline_call; pub(crate) mod inline_call;
mod inline_src; pub(crate) mod inline_src;
mod link; pub(crate) mod link;
mod macros; pub(crate) mod macros;
mod radio_target; pub(crate) mod radio_target;
mod snippet; pub(crate) mod snippet;
mod target; pub(crate) mod target;
pub(crate) mod timestamp;
pub use self::cookie::Cookie; pub use self::cookie::Cookie;
use jetscii::bytes; pub use self::timestamp::*;
#[cfg_attr(test, derive(PartialEq, Debug))]
pub enum Object<'a> {
Cookie(Cookie<'a>),
FnRef {
label: Option<&'a str>,
def: Option<&'a str>,
},
InlineCall {
name: &'a str,
args: &'a str,
inside_header: Option<&'a str>,
end_header: Option<&'a str>,
},
InlineSrc {
lang: &'a str,
option: Option<&'a str>,
body: &'a str,
},
Link {
path: &'a str,
desc: Option<&'a str>,
},
Macros {
name: &'a str,
args: Option<&'a str>,
},
RadioTarget {
target: &'a str,
},
Snippet {
name: &'a str,
value: &'a str,
},
Target {
target: &'a str,
},
// `end` indicates the position of the second marker
Bold {
end: usize,
},
Italic {
end: usize,
},
Strike {
end: usize,
},
Underline {
end: usize,
},
Verbatim(&'a str),
Code(&'a str),
Text(&'a str),
}
pub fn parse(src: &str) -> (Object<'_>, usize, Option<(Object<'_>, usize)>) {
let bytes = src.as_bytes();
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
let mut pos = 0;
while let Some(off) = if pos == 0 {
Some(0)
} else {
bs.find(&bytes[pos..])
} {
pos += off;
if src.len() - pos < 3 {
return (Object::Text(src), src.len(), None);
}
macro_rules! brk {
($obj:expr, $off:expr, $pos:expr) => {
return if $pos == 0 {
($obj, $off, None)
} else {
(Object::Text(&src[0..$pos]), $pos, Some(($obj, $off)))
};
};
}
let tail = &src[pos..];
match bytes[pos] {
b'@' if bytes[pos + 1] == b'@' => {
if let Some((name, value, off)) = snippet::parse(tail) {
brk!(Object::Snippet { name, value }, off, pos);
}
}
b'{' if bytes[pos + 1] == b'{' && bytes[pos + 2] == b'{' => {
if let Some((name, args, off)) = macros::parse(tail) {
brk!(Object::Macros { name, args }, off, pos);
}
}
b'<' if bytes[pos + 1] == b'<' => {
if bytes[pos + 2] == b'<' {
if let Some((target, off)) = radio_target::parse(tail) {
brk!(Object::RadioTarget { target }, off, pos);
}
} else if bytes[pos + 2] != b'\n' {
if let Some((target, off)) = target::parse(tail) {
brk!(Object::Target { target }, off, pos);
}
}
}
b'[' => {
if tail[1..].starts_with("fn:") {
if let Some((label, def, off)) = fn_ref::parse(tail) {
brk!(Object::FnRef { label, def }, off, pos);
}
}
if bytes[pos + 1] == b'[' {
if let Some((path, desc, off)) = link::parse(tail) {
brk!(Object::Link { path, desc }, off, pos);
}
}
if let Some((cookie, off)) = cookie::parse(tail) {
brk!(Object::Cookie(cookie), off, pos);
}
// TODO: Timestamp
}
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
if let Some((obj, off)) = parse_text_markup(&tail[1..]) {
brk!(obj, off, pos + 1);
}
}
_ => {
if let Some((obj, off)) = parse_text_markup(tail) {
brk!(obj, off, pos);
}
}
}
pos += 1;
}
(Object::Text(src), src.len(), None)
}
fn parse_text_markup(src: &str) -> Option<(Object<'_>, usize)> {
match src.as_bytes()[0] {
b'*' => emphasis::parse(src, b'*').map(|end| (Object::Bold { end }, 1)),
b'+' => emphasis::parse(src, b'+').map(|end| (Object::Strike { end }, 1)),
b'/' => emphasis::parse(src, b'/').map(|end| (Object::Italic { end }, 1)),
b'_' => emphasis::parse(src, b'_').map(|end| (Object::Underline { end }, 1)),
b'=' => emphasis::parse(src, b'=').map(|end| (Object::Verbatim(&src[1..end]), end + 1)),
b'~' => emphasis::parse(src, b'~').map(|end| (Object::Code(&src[1..end]), end + 1)),
b's' if src.starts_with("src_") => inline_src::parse(src)
.map(|(lang, option, body, off)| (Object::InlineSrc { lang, option, body }, off)),
b'c' if src.starts_with("call_") => {
inline_call::parse(src).map(|(name, args, inside_header, end_header, off)| {
(
Object::InlineCall {
name,
args,
inside_header,
end_header,
},
off,
)
})
}
_ => None,
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::*;
assert_eq!(parse("*bold*"), (Object::Bold { end: 5 }, 1, None));
assert_eq!(
parse("Normal =verbatim="),
(
Object::Text("Normal "),
"Normal ".len(),
Some((Object::Verbatim("verbatim"), "=verbatim=".len()))
)
);
// TODO: more tests
}
}

View file

@ -1,9 +1,401 @@
use memchr::memchr;
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)] #[derive(Debug)]
pub struct Time<'a> { pub struct Datetime {
pub date: &'a str, pub date: (u16, u8, u8),
pub time: Option<(u8, u8)>,
} }
pub enum Timestamp<'a> { #[cfg_attr(test, derive(PartialEq))]
ActiveRange, #[derive(Debug)]
pub enum RepeaterType {
Cumulate,
CatchUp,
Restart,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum DelayType {
All,
First,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum TimeUnit {
Hour,
Day,
Week,
Month,
Year,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Repeater {
pub ty: RepeaterType,
pub value: usize,
pub unit: TimeUnit,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Delay {
pub ty: DelayType,
pub value: usize,
pub unit: TimeUnit,
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Timestamp<'a> {
Active {
start: Datetime,
repeater: Option<Repeater>,
delay: Option<Delay>,
},
Inactive {
start: Datetime,
repeater: Option<Repeater>,
delay: Option<Delay>,
},
ActiveRange {
start: Datetime,
end: Datetime,
repeater: Option<Repeater>,
delay: Option<Delay>,
},
InactiveRange {
start: Datetime,
end: Datetime,
repeater: Option<Repeater>,
delay: Option<Delay>,
},
Diary(&'a str),
}
pub fn parse_active(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('<'));
let bytes = text.as_bytes();
let mut off = memchr(b'>', bytes)?;
let (start, mut end) = parse_datetime(&bytes[1..off])?;
if end.is_none()
&& off <= text.len() - 14 /* --<YYYY-MM-DD> */
&& text[off + 1..].starts_with("--<")
{
if let Some(new_off) = memchr(b'>', &bytes[off + 1..]) {
if let Some((start, _)) = parse_datetime(&bytes[off + 4..off + 1 + new_off]) {
end = Some(start);
off += new_off + 1;
}
}
}
Some((
if let Some(end) = end {
Timestamp::ActiveRange {
start,
end,
repeater: None,
delay: None,
}
} else {
Timestamp::Active {
start,
repeater: None,
delay: None,
}
},
off + 1,
))
}
pub fn parse_inactive(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('['));
let bytes = text.as_bytes();
let mut off = memchr(b']', bytes)?;
let (start, mut end) = parse_datetime(&bytes[1..off])?;
if end.is_none()
&& off <= text.len() - 14 /* --[YYYY-MM-DD] */
&& text[off + 1..].starts_with("--[")
{
if let Some(new_off) = memchr(b']', &bytes[off + 1..]) {
if let Some((start, _)) = parse_datetime(&bytes[off + 4..off + 1 + new_off]) {
end = Some(start);
off += new_off + 1;
}
}
}
Some((
if let Some(end) = end {
Timestamp::InactiveRange {
start,
end,
repeater: None,
delay: None,
}
} else {
Timestamp::Inactive {
start,
repeater: None,
delay: None,
}
},
off + 1,
))
}
fn parse_datetime(bytes: &[u8]) -> Option<(Datetime, Option<Datetime>)> {
if !bytes[0].is_ascii_digit() || !bytes[bytes.len() - 1].is_ascii_alphanumeric() {
return None;
}
// similar to str::split_ascii_whitespace, but for &[u8]
let mut words = bytes
.split(u8::is_ascii_whitespace)
.filter(|s| !s.is_empty());
let date = words
.next()
.filter(|word| {
word.len() == 10 /* YYYY-MM-DD */
&& word[0..4].iter().all(u8::is_ascii_digit)
&& word[4] == b'-'
&& word[5..7].iter().all(u8::is_ascii_digit)
&& word[7] == b'-'
&& word[8..10].iter().all(u8::is_ascii_digit)
})
.map(|word| {
(
(u16::from(word[0]) - u16::from(b'0')) * 1000
+ (u16::from(word[1]) - u16::from(b'0')) * 100
+ (u16::from(word[2]) - u16::from(b'0')) * 10
+ (u16::from(word[3]) - u16::from(b'0')),
(word[5] - b'0') * 10 + (word[6] - b'0'),
(word[8] - b'0') * 10 + (word[9] - b'0'),
)
})?;
let _dayname = words.next().filter(|word| {
word.iter().all(|&c| {
!(c == b'+' || c == b'-' || c == b']' || c == b'>' || c.is_ascii_digit() || c == b'\n')
})
})?;
let (start, end) = if let Some(word) = words.next() {
macro_rules! datetime {
($a:expr, $b:expr, $c:expr) => {
Datetime {
date,
time: Some((word[$a] - b'0', (word[$b] - b'0') * 10 + (word[$c] - b'0'))),
}
};
($a:expr, $b:expr, $c:expr, $d:expr) => {
Datetime {
date,
time: Some((
(word[$a] - b'0') * 10 + (word[$b] - b'0'),
(word[$c] - b'0') * 10 + (word[$d] - b'0'),
)),
}
};
}
if word.len() == 4 // H:MM
&& word[0].is_ascii_digit()
&& word[1] == b':'
&& word[2..4].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 2, 3), None)
} else if word.len() == 5 // HH:MM
&& word[0..2].iter().all(u8::is_ascii_digit)
&& word[2] == b':'
&& word[3..5].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 1, 3, 4), None)
} else if word.len() == 9 // H:MM-H:MM
&& word[0].is_ascii_digit()
&& word[1] == b':'
&& word[2..4].iter().all(u8::is_ascii_digit)
&& word[4] == b'-'
&& word[5].is_ascii_digit()
&& word[6] == b':'
&& word[7..9].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 2, 3), Some(datetime!(5, 7, 8)))
} else if word.len() == 10 // H:MM-HH:MM
&& word[0].is_ascii_digit()
&& word[1] == b':'
&& word[2..4].iter().all(u8::is_ascii_digit)
&& word[4] == b'-'
&& word[5..7].iter().all(u8::is_ascii_digit)
&& word[7] == b':'
&& word[8..10].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 2, 3), Some(datetime!(5, 6, 8, 9)))
} else if word.len() == 10 // HH:MM-H:MM
&& word[0..2].iter().all(u8::is_ascii_digit)
&& word[2] == b':'
&& word[3..5].iter().all(u8::is_ascii_digit)
&& word[5] == b'-'
&& word[6].is_ascii_digit()
&& word[7] == b':'
&& word[8..10].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 1, 3, 4), Some(datetime!(6, 8, 9)))
} else if word.len() == 11 // HH:MM-HH:MM
&& word[0..2].iter().all(u8::is_ascii_digit)
&& word[2] == b':'
&& word[3..5].iter().all(u8::is_ascii_digit)
&& word[5] == b'-'
&& word[6..8].iter().all(u8::is_ascii_digit)
&& word[8] == b':'
&& word[9..11].iter().all(u8::is_ascii_digit)
{
(datetime!(0, 1, 3, 4), Some(datetime!(6, 7, 9, 10)))
} else {
return None;
}
} else {
(Datetime { date, time: None }, None)
};
// TODO: repeater and delay
if words.next().is_some() {
None
} else {
Some((start, end))
}
}
pub fn parse_diary(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('<'));
if text.len() <= 6 /* <%%()> */ || &text[1..4] != "%%(" {
return None;
}
let bytes = text.as_bytes();
memchr(b'>', bytes)
.filter(|i| bytes[i - 1] == b')' && bytes[4..i - 1].iter().all(|&c| c != b'\n'))
.map(|i| (Timestamp::Diary(&text[4..i - 1]), i))
}
#[cfg(test)]
mod tests {
#[test]
fn parse_range() {
use super::*;
assert_eq!(
parse_inactive("[2003-09-16 Tue]"),
Some((
Timestamp::Inactive {
start: Datetime {
date: (2003, 9, 16),
time: None
},
repeater: None,
delay: None,
},
"[2003-09-16 Tue]".len()
))
);
assert_eq!(
parse_inactive("[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]"),
Some((
Timestamp::InactiveRange {
start: Datetime {
date: (2003, 9, 16),
time: Some((9, 39))
},
end: Datetime {
date: (2003, 9, 16),
time: Some((10, 39))
},
repeater: None,
delay: None
},
"[2003-09-16 Tue 09:39]--[2003-09-16 Tue 10:39]".len()
))
);
assert_eq!(
parse_active("<2003-09-16 Tue 09:39-10:39>"),
Some((
Timestamp::ActiveRange {
start: Datetime {
date: (2003, 9, 16),
time: Some((9, 39))
},
end: Datetime {
date: (2003, 9, 16),
time: Some((10, 39))
},
repeater: None,
delay: None
},
"<2003-09-16 Tue 09:39-10:39>".len()
))
);
}
#[test]
fn parse_datetime() {
use super::*;
assert_eq!(
parse_datetime(b"2003-09-16 Tue"),
Some((
Datetime {
date: (2003, 9, 16),
time: None
},
None
))
);
assert_eq!(
parse_datetime(b"2003-09-16 Tue 9:39"),
Some((
Datetime {
date: (2003, 9, 16),
time: Some((9, 39))
},
None
))
);
assert_eq!(
parse_datetime(b"2003-09-16 Tue 09:39"),
Some((
Datetime {
date: (2003, 9, 16),
time: Some((9, 39))
},
None
))
);
assert_eq!(
parse_datetime(b"2003-09-16 Tue 9:39-10:39"),
Some((
Datetime {
date: (2003, 9, 16),
time: Some((9, 39))
},
Some(Datetime {
date: (2003, 9, 16),
time: Some((10, 39))
}),
))
);
assert_eq!(parse_datetime(b"2003-9-16 Tue"), None);
assert_eq!(parse_datetime(b"2003-09-16"), None);
assert_eq!(parse_datetime(b"2003-09-16 09:39"), None);
assert_eq!(parse_datetime(b"2003-09-16 Tue 0939"), None);
}
} }

View file

@ -1,8 +1,8 @@
//! Parser //! Parser
use crate::elements::{self, *}; use crate::{elements::*, headline::*, objects::*};
use crate::headline::*; use jetscii::bytes;
use crate::objects::{self, *}; use memchr::memchr_iter;
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
@ -105,6 +105,7 @@ pub enum Event<'a> {
}, },
Rule, Rule,
Timestamp(Timestamp<'a>),
Cookie(Cookie<'a>), Cookie(Cookie<'a>),
FnRef { FnRef {
label: Option<&'a str>, label: Option<&'a str>,
@ -158,10 +159,9 @@ pub struct Parser<'a> {
text: &'a str, text: &'a str,
stack: Vec<(Container, usize, usize)>, stack: Vec<(Container, usize, usize)>,
off: usize, off: usize,
ele_buf: Option<(Element<'a>, usize)>, ele_buf: Option<(Event<'a>, usize, usize, usize)>,
obj_buf: Option<(Object<'a>, usize)>, obj_buf: Option<(Event<'a>, usize, usize, usize)>,
keywords: Option<&'a [&'a str]>, keywords: &'a [&'a str],
list_more_item: bool, list_more_item: bool,
} }
@ -175,7 +175,7 @@ impl<'a> Parser<'a> {
ele_buf: None, ele_buf: None,
obj_buf: None, obj_buf: None,
list_more_item: false, list_more_item: false,
keywords: None, keywords: DEFAULT_KEYWORDS,
} }
} }
@ -190,15 +190,14 @@ impl<'a> Parser<'a> {
} }
pub fn set_keywords(&mut self, keywords: &'a [&'a str]) { pub fn set_keywords(&mut self, keywords: &'a [&'a str]) {
self.keywords = Some(keywords) self.keywords = keywords;
} }
fn next_section_or_headline(&mut self) -> Event<'a> { fn next_section_or_headline(&mut self) -> Event<'a> {
let end = Headline::find_level(&self.text[self.off..], std::usize::MAX); let end = Headline::find_level(&self.text[self.off..], std::usize::MAX);
debug_assert!(end <= self.text[self.off..].len()); debug_assert!(end <= self.text[self.off..].len());
if end != 0 { if end != 0 {
self.stack self.push_stack(Container::Section, end, end);
.push((Container::Section, self.off + end, self.off + end));
Event::SectionBeg Event::SectionBeg
} else { } else {
self.next_headline() self.next_headline()
@ -206,165 +205,286 @@ impl<'a> Parser<'a> {
} }
fn next_headline(&mut self) -> Event<'a> { fn next_headline(&mut self) -> Event<'a> {
let (hdl, off, end) = if let Some(keywords) = self.keywords { let (hdl, off, end) = Headline::parse(&self.text[self.off..], self.keywords);
Headline::parse_with_keywords(&self.text[self.off..], keywords)
} else {
Headline::parse(&self.text[self.off..])
};
debug_assert!(end <= self.text[self.off..].len()); debug_assert!(end <= self.text[self.off..].len());
self.stack.push(( self.push_stack(Container::Headline(self.off + off), end, end);
Container::Headline(self.off + off),
self.off + end,
self.off + end,
));
self.off += off; self.off += off;
Event::HeadlineBeg(hdl) Event::HeadlineBeg(hdl)
} }
fn next_ele(&mut self, end: usize) -> Event<'a> { fn next_ele(&mut self, text: &'a str) -> Event<'a> {
let text = &self.text[self.off..end]; let (ele, off, limit, end) = self
let (ele, off) = self.ele_buf.take().unwrap_or_else(|| { .ele_buf
let (ele, off, next_ele) = elements::parse(text); .take()
self.ele_buf = next_ele; .or_else(|| self.real_next_ele(text))
(ele, off) .unwrap_or_else(|| {
}); let len = text.len();
let start = text.find(|c| c != '\n').unwrap_or(0);
if start == len - 1 {
(self.end(), len, 0, 0)
} else {
let mut pos = start;
for off in memchr_iter(b'\n', &text.as_bytes()[start..]) {
if text[pos..off + start].trim().is_empty() {
return (Event::ParagraphBeg, start, pos, off + start);
} else {
pos = off + start;
if let Some(buf) = self.real_next_ele(&text[pos + 1..]) {
self.ele_buf = Some(buf);
return (Event::ParagraphBeg, start, pos, pos);
}
}
}
(
Event::ParagraphBeg,
start,
if text.ends_with('\n') { len - 1 } else { len },
len,
)
}
});
debug_assert!(off <= text.len()); debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len());
match ele {
Event::ParagraphBeg => self.push_stack(Container::Paragraph, limit, end),
Event::QteBlockBeg => self.push_stack(Container::QteBlock, limit, end),
Event::CtrBlockBeg => self.push_stack(Container::CtrBlock, limit, end),
Event::SplBlockBeg { .. } => self.push_stack(Container::SplBlock, limit, end),
Event::DynBlockBeg { .. } => self.push_stack(Container::DynBlock, limit, end),
Event::ListBeg { ordered, .. } => {
self.push_stack(Container::List(limit, ordered), end, end);
self.list_more_item = true;
}
_ => (),
}
self.off += off; self.off += off;
match ele { ele
Element::Paragraph { cont_end, end } => { }
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack // returns (event, offset, container limit, container end)
.push((Container::Paragraph, cont_end + self.off, end + self.off)); fn real_next_ele(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
Event::ParagraphBeg if text.starts_with("[fn:") {
if let Some((label, cont, off)) = fn_def::parse(text) {
return Some((Event::FnDef { label, cont }, off + 1, 0, 0));
} }
Element::QteBlock { end, cont_end, .. } => { }
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack let (tail, line_begin) = text
.push((Container::QteBlock, cont_end + self.off, end + self.off)); .find(|c| c != ' ')
Event::QteBlockBeg .map(|off| (&text[off..], off))
.unwrap_or((text, 0));
let (is_item, ordered) = list::is_item(tail);
if is_item {
return Some((Event::ListBeg { ordered }, 0, line_begin, text.len()));
}
// TODO: LaTeX environment
if tail.starts_with("\\begin{") {}
// rule
if tail.starts_with("-----") {
let off = rule::parse(tail);
if off != 0 {
return Some((Event::Rule, off, 0, 0));
} }
Element::CtrBlock { end, cont_end, .. } => { }
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack // fixed width
.push((Container::CtrBlock, cont_end + self.off, end + self.off)); if tail.starts_with(": ") || tail.starts_with(":\n") {
Event::CtrBlockBeg // let end = line_ends
} // .skip_while(|&i| {
Element::SplBlock { // text[i + 1..].starts_with(": ") || text[i + 1..].starts_with(":\n")
name, // })
args, // .next()
end, // .map(|i| i + 1)
cont_end, // .unwrap_or_else(|| text.len());
} => { // let off = end - pos;
debug_assert!(cont_end <= text.len() && end <= text.len()); // brk!(Element::FixedWidth(&tail[0..off]), off);
self.stack }
.push((Container::SplBlock, cont_end + self.off, end + self.off));
Event::SplBlockBeg { name, args } // comment
} if tail.starts_with("# ") || tail.starts_with("#\n") {
Element::DynBlock { // let end = line_ends
name, // .skip_while(|&i| {
args, // text[i + 1..].starts_with("# ") || text[i + 1..].starts_with("#\n")
cont_end, // })
end, // .next()
} => { // .map(|i| i + 1)
debug_assert!(cont_end <= text.len() && end <= text.len()); // .unwrap_or_else(|| text.len());
self.stack // let off = end - pos;
.push((Container::DynBlock, cont_end + self.off, end + self.off)); // brk!(Element::Comment(&tail[0..off]), off);
Event::DynBlockBeg { name, args } }
}
Element::List { ident, ordered } => { if tail.starts_with("#+") {
self.stack.push((Container::List(ident, ordered), end, end)); block::parse(tail)
self.list_more_item = true; .map(|(name, args, begin, limit, end)| {
Event::ListBeg { ordered } let cont = &tail[begin..limit];
} match &*name.to_uppercase() {
Element::Call { value } => Event::Call { value }, "COMMENT" => (Event::CommentBlock { args, cont }, end, 0, 0),
Element::Comment(c) => Event::Comment(c), "EXAMPLE" => (Event::ExampleBlock { args, cont }, end, 0, 0),
Element::CommentBlock { args, cont } => Event::CommentBlock { args, cont }, "EXPORT" => (Event::ExportBlock { args, cont }, end, 0, 0),
Element::ExampleBlock { args, cont } => Event::ExampleBlock { args, cont }, "SRC" => (Event::SrcBlock { args, cont }, end, 0, 0),
Element::ExportBlock { args, cont } => Event::ExportBlock { args, cont }, "VERSE" => (Event::VerseBlock { args, cont }, end, 0, 0),
Element::FixedWidth(f) => Event::FixedWidth(f), "CENTER" => (Event::CtrBlockBeg, begin, limit, end),
Element::FnDef { label, cont } => Event::FnDef { label, cont }, "QUOTE" => (Event::QteBlockBeg, begin, limit, end),
Element::Keyword { key, value } => Event::Keyword { key, value }, _ => (Event::SplBlockBeg { name, args }, begin, limit, end),
Element::Rule => Event::Rule, }
Element::SrcBlock { args, cont } => Event::SrcBlock { args, cont }, })
Element::VerseBlock { args, cont } => Event::VerseBlock { args, cont }, .or_else(|| {
Element::Empty => self.end(), dyn_block::parse(tail).map(|(name, args, begin, limit, end)| {
(Event::DynBlockBeg { name, args }, begin, limit, end)
})
})
.or_else(|| {
keyword::parse(tail).map(|(key, value, off)| {
if let Key::Call = key {
(Event::Call { value }, off, 0, 0)
} else {
(Event::Keyword { key, value }, off, 0, 0)
}
})
})
} else {
None
} }
} }
fn next_obj(&mut self, end: usize) -> Event<'a> { fn next_obj(&mut self, text: &'a str) -> Event<'a> {
let text = &self.text[self.off..end]; let (obj, off, limit, end) = self
let (obj, off) = self.obj_buf.take().unwrap_or_else(|| { .obj_buf
let (obj, off, next_obj) = objects::parse(text); .take()
self.obj_buf = next_obj; .or_else(|| self.real_next_obj(text))
(obj, off) .unwrap_or_else(|| {
}); let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
let bytes = text.as_bytes();
let mut pos = 0;
debug_assert!(off <= text.len()); while let Some(off) = bs.find(&bytes[pos..]) {
pos += off + 1;
if let Some(buf) = self.real_next_obj(&text[pos..]) {
self.obj_buf = Some(buf);
return (Event::Text(&text[0..pos]), pos, 0, 0);
}
}
(Event::Text(text), text.len(), 0, 0)
});
debug_assert!(off <= text.len() && limit <= text.len() && end <= text.len());
self.off += off; self.off += off;
match obj { match obj {
Object::Underline { end } => { Event::UnderlineBeg => self.push_stack(Container::Underline, limit, end),
debug_assert!(end <= text.len()); Event::StrikeBeg => self.push_stack(Container::Strike, limit, end),
self.stack Event::ItalicBeg => self.push_stack(Container::Italic, limit, end),
.push((Container::Underline, end + self.off - 1, end + self.off)); Event::BoldBeg => self.push_stack(Container::Bold, limit, end),
Event::UnderlineBeg _ => (),
}
obj
}
fn real_next_obj(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
if text.len() < 3 {
return None;
}
let bytes = text.as_bytes();
match bytes[0] {
b'@' if bytes[1] == b'@' => snippet::parse(text)
.map(|(name, value, off)| (Event::Snippet { name, value }, off, 0, 0)),
b'{' if bytes[1] == b'{' && bytes[2] == b'{' => macros::parse(text)
.map(|(name, args, off)| (Event::Macros { name, args }, off, 0, 0)),
b'<' if bytes[1] == b'<' => {
if bytes[2] == b'<' {
radio_target::parse(text)
.map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0))
} else {
target::parse(text).map(|(target, off)| (Event::Target { target }, off, 0, 0))
}
} }
Object::Strike { end } => { b'<' => timestamp::parse_active(text)
debug_assert!(end <= text.len()); .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
self.stack .or_else(|| {
.push((Container::Strike, end + self.off - 1, end + self.off)); timestamp::parse_diary(text)
Event::StrikeBeg .map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
}),
b'[' => {
if text[1..].starts_with("fn:") {
fn_ref::parse(text)
.map(|(label, def, off)| (Event::FnRef { label, def }, off, 0, 0))
} else if bytes[1] == b'[' {
link::parse(text)
.map(|(path, desc, off)| (Event::Link { path, desc }, off, 0, 0))
} else {
cookie::parse(text)
.map(|(cookie, off)| (Event::Cookie(cookie), off, 0, 0))
.or_else(|| {
timestamp::parse_inactive(text)
.map(|(timestamp, off)| (Event::Timestamp(timestamp), off, 0, 0))
})
}
} }
Object::Italic { end } => { b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => self.next_inline(&text[1..]),
debug_assert!(end <= text.len()); _ => self.next_inline(text),
self.stack
.push((Container::Italic, end + self.off - 1, end + self.off));
Event::ItalicBeg
}
Object::Bold { end } => {
debug_assert!(end <= text.len());
self.stack
.push((Container::Bold, end + self.off - 1, end + self.off));
Event::BoldBeg
}
Object::Code(c) => Event::Code(c),
Object::Cookie(c) => Event::Cookie(c),
Object::FnRef { label, def } => Event::FnRef { label, def },
Object::InlineCall {
name,
args,
inside_header,
end_header,
} => Event::InlineCall {
name,
args,
inside_header,
end_header,
},
Object::InlineSrc { lang, option, body } => Event::InlineSrc { lang, option, body },
Object::Link { path, desc } => Event::Link { path, desc },
Object::Macros { name, args } => Event::Macros { name, args },
Object::RadioTarget { target } => Event::RadioTarget { target },
Object::Snippet { name, value } => Event::Snippet { name, value },
Object::Target { target } => Event::Target { target },
Object::Text(t) => Event::Text(t),
Object::Verbatim(v) => Event::Verbatim(v),
} }
} }
fn next_list_item(&mut self, ident: usize, end: usize) -> Event<'a> { fn next_inline(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
let (bullet, off, cont_end, end, has_more) = list::parse(&self.text[self.off..end], ident); match text.as_bytes()[0] {
self.stack b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)),
.push((Container::ListItem, cont_end + self.off, end + self.off)); b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)),
b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)),
b'_' => emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end)),
b'=' => emphasis::parse(text, b'=')
.map(|end| (Event::Verbatim(&text[1..end]), end + 1, 0, 0)),
b'~' => {
emphasis::parse(text, b'~').map(|end| (Event::Code(&text[1..end]), end + 1, 0, 0))
}
b's' if text.starts_with("src_") => {
inline_src::parse(text).map(|(lang, option, body, off)| {
(Event::InlineSrc { lang, option, body }, off, 0, 0)
})
}
b'c' if text.starts_with("call_") => {
inline_call::parse(text).map(|(name, args, inside_header, end_header, off)| {
(
Event::InlineCall {
name,
args,
inside_header,
end_header,
},
off,
0,
0,
)
})
}
_ => None,
}
}
fn next_list_item(&mut self, ident: usize, text: &'a str) -> Event<'a> {
let (bullet, off, limit, end, has_more) = list::parse(text, ident);
self.push_stack(Container::ListItem, limit, end);
self.off += off; self.off += off;
self.list_more_item = has_more; self.list_more_item = has_more;
Event::ListItemBeg { bullet } Event::ListItemBeg { bullet }
} }
#[inline]
fn push_stack(&mut self, container: Container, limit: usize, end: usize) {
self.stack
.push((container, self.off + limit, self.off + end));
}
#[inline] #[inline]
fn end(&mut self) -> Event<'a> { fn end(&mut self) -> Event<'a> {
let (container, _, _) = self.stack.pop().unwrap(); let (container, _, _) = self.stack.pop().unwrap();
@ -390,53 +510,46 @@ impl<'a> Iterator for Parser<'a> {
type Item = Event<'a>; type Item = Event<'a>;
fn next(&mut self) -> Option<Event<'a>> { fn next(&mut self) -> Option<Event<'a>> {
self.stack if let Some(&(container, limit, end)) = self.stack.last() {
.last() Some(if self.off >= limit {
.cloned() debug_assert!(self.off <= limit && self.off <= end);
.map(|(container, cont_end, end)| { self.off = end;
if self.off >= cont_end { self.end()
debug_assert!(self.off <= cont_end); } else {
debug_assert!(self.off <= end); match container {
self.off = end; Container::Headline(beg) => {
self.end() debug_assert!(self.off >= beg);
} else { if self.off == beg {
match container { self.next_section_or_headline()
Container::Headline(beg) => { } else {
debug_assert!(self.off >= beg); self.next_headline()
if self.off == beg {
self.next_section_or_headline()
} else {
self.next_headline()
}
} }
Container::DynBlock
| Container::CtrBlock
| Container::QteBlock
| Container::SplBlock
| Container::ListItem
| Container::Section => self.next_ele(end),
Container::List(ident, _) => {
if self.list_more_item {
self.next_list_item(ident, end)
} else {
self.end()
}
}
Container::Paragraph
| Container::Bold
| Container::Underline
| Container::Italic
| Container::Strike => self.next_obj(cont_end),
} }
Container::DynBlock
| Container::CtrBlock
| Container::QteBlock
| Container::SplBlock
| Container::ListItem
| Container::Section => self.next_ele(&self.text[self.off..limit]),
Container::List(ident, _) => {
if self.list_more_item {
self.next_list_item(ident, &self.text[self.off..limit])
} else {
self.end()
}
}
Container::Paragraph
| Container::Bold
| Container::Underline
| Container::Italic
| Container::Strike => self.next_obj(&self.text[self.off..limit]),
} }
}) })
.or_else(|| { } else if self.off < self.text.len() {
if self.off >= self.text.len() { Some(self.next_section_or_headline())
None } else {
} else { None
Some(self.next_section_or_headline()) }
}
})
} }
} }
@ -454,6 +567,7 @@ fn parse() {
}), }),
SectionBeg, SectionBeg,
ParagraphBeg, ParagraphBeg,
Text("test "),
BoldBeg, BoldBeg,
Text("Section 1"), Text("Section 1"),
BoldEnd, BoldEnd,
@ -507,14 +621,10 @@ fn parse() {
assert_eq!( assert_eq!(
Parser::new( Parser::new(
r#"* Title 1 r#"#+OPTIONS: H:3 num:nil toc:t \n:nil ::t |:t ^:t -:t f:t *:t tex:t d:(HIDE) tags:not-in-toc
*Section 1*
** Title 2 * Definitions
_Section 2_ "#
* Title 3
/Section 3/
* Title 4
=Section 4="#
) )
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
expected expected

View file

@ -1,5 +1,5 @@
use crate::elements::{fn_def, keyword, Key}; use crate::elements::{fn_def, keyword, Key};
use crate::headline::Headline; use crate::headline::{Headline, DEFAULT_KEYWORDS};
use memchr::memchr; use memchr::memchr;
type Headlines<'a> = Vec<Headline<'a>>; type Headlines<'a> = Vec<Headline<'a>>;
@ -15,7 +15,7 @@ pub fn metadata(src: &str) -> (Headlines<'_>, Keywords<'_>, Footnotes<'_>) {
if line.starts_with('*') { if line.starts_with('*') {
let level = memchr(b' ', line.as_bytes()).unwrap_or_else(|| line.len()); let level = memchr(b' ', line.as_bytes()).unwrap_or_else(|| line.len());
if line.as_bytes()[0..level].iter().all(|&c| c == b'*') { if line.as_bytes()[0..level].iter().all(|&c| c == b'*') {
headlines.push(Headline::parse(line).0) headlines.push(Headline::parse(line, DEFAULT_KEYWORDS).0)
} }
} else if line.starts_with("#+") { } else if line.starts_with("#+") {
if let Some((key, value, _)) = keyword::parse(line) { if let Some((key, value, _)) = keyword::parse(line) {

View file

@ -1,29 +0,0 @@
//! Utils macros
#[macro_export]
macro_rules! expect {
($src:ident, $index:expr, $expect:tt) => {
$src.as_bytes().get($index).filter(|&&b| b == $expect)
};
($src:ident, $index:expr, $expect:expr) => {
$src.as_bytes().get($index).filter(|&&b| $expect(b))
};
}
#[macro_export]
macro_rules! skip_space {
($src:ident) => {
$src.as_bytes()
.iter()
.position(|c| c != b' ' && c != b'\t')
.unwrap_or(0)
};
($src:ident, $from:expr) => {
$src[$from..]
.as_bytes()
.iter()
.position(|&c| c != b' ' && c != b'\t')
.map(|i| i + $from)
.unwrap_or(0)
};
}