feat: indextree-based org parser

This commit is contained in:
PoiScript 2019-06-26 21:53:08 +08:00
parent 3beabcedfa
commit f786233852
34 changed files with 1919 additions and 1282 deletions

View file

@ -12,11 +12,18 @@ keywords = ["orgmode","emacs","parser"]
[badges]
travis-ci = { repository = "PoiScript/orgize" }
[features]
default = ["serde", "chrono"]
extra-serde-info = []
[dependencies]
bytecount = "0.5"
chrono = { version = "0.4", optional = true }
indextree = "3.2.0"
jetscii = "0.4"
memchr = "2"
serde = { version = "1.0.93", features = ["derive"], optional = true }
serde_json = "1.0.39"
[dev-dependencies]
slugify = "0.1.0"

View file

@ -3,14 +3,14 @@
extern crate orgize;
extern crate test;
use orgize::Parser;
use orgize::Org;
use test::Bencher;
#[bench]
fn org_syntax(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/dev/org-syntax.org
b.iter(|| {
let _ = Parser::new(include_str!("org-syntax.org")).collect::<Vec<_>>();
Org::new(include_str!("org-syntax.org")).parse();
})
}
@ -18,7 +18,7 @@ fn org_syntax(b: &mut Bencher) {
fn doc(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/doc.org
b.iter(|| {
let _ = Parser::new(include_str!("doc.org")).collect::<Vec<_>>();
Org::new(include_str!("doc.org")).parse();
})
}
@ -26,6 +26,6 @@ fn doc(b: &mut Bencher) {
fn org_faq(b: &mut Bencher) {
// wget https://orgmode.org/worg/sources/org-faq.org
b.iter(|| {
let _ = Parser::new(include_str!("org-faq.org")).collect::<Vec<_>>();
Org::new(include_str!("org-faq.org")).parse();
})
}

View file

@ -1,8 +1,16 @@
use memchr::{memchr, memchr_iter};
// return (name, args, contents-begin, contents-end, end)
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Block<'a> {
pub name: &'a str,
pub args: Option<&'a str>,
}
impl Block<'_> {
#[inline]
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
// return (block, contents-begin, contents-end, end)
pub fn parse(text: &str) -> Option<(Block<'_>, usize, usize, usize)> {
debug_assert!(text.starts_with("#+"));
if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" {
@ -11,7 +19,7 @@ pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
let mut lines = memchr_iter(b'\n', text.as_bytes());
let (name, para, off) = lines
let (name, args, off) = lines
.next()
.map(|i| {
memchr(b' ', &text.as_bytes()[8..i])
@ -25,40 +33,41 @@ pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(&end) {
return Some((name, para, off, pos, i + 1));
return Some((Block { name, args }, off, pos, i + 1));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case(&end) {
Some((name, para, off, pos, text.len()))
Some((Block { name, args }, off, pos, text.len()))
} else {
None
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse("#+BEGIN_SRC\n#+END_SRC"),
Block::parse("#+BEGIN_SRC\n#+END_SRC"),
Some((
"SRC",
None,
Block {
name: "SRC",
args: None,
},
"#+BEGIN_SRC\n".len(),
"#+BEGIN_SRC\n".len(),
"#+BEGIN_SRC\n#+END_SRC".len()
))
);
assert_eq!(
parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"),
Block::parse("#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n"),
Some((
"SRC",
Some("javascript"),
Block {
name: "SRC",
args: Some("javascript"),
},
"#+BEGIN_SRC javascript \n".len(),
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n".len(),
"#+BEGIN_SRC javascript \nconsole.log('Hello World!');\n#+END_SRC\n".len()
@ -66,4 +75,3 @@ mod tests {
);
// TODO: more testing
}
}

View file

@ -1,4 +1,4 @@
use crate::objects::timestamp::{Datetime, Timestamp};
use crate::elements::{Datetime, Timestamp};
use memchr::memchr;
/// clock elements
@ -23,8 +23,8 @@ pub enum Clock<'a> {
},
}
impl<'a> Clock<'a> {
pub(crate) fn parse(text: &'a str) -> Option<(Clock<'a>, usize)> {
impl Clock<'_> {
pub(crate) fn parse(text: &str) -> Option<(Clock<'_>, usize)> {
let (text, eol) = memchr(b'\n', text.as_bytes())
.map(|i| (text[..i].trim(), i + 1))
.unwrap_or_else(|| (text.trim(), text.len()));
@ -104,7 +104,7 @@ impl<'a> Clock<'a> {
}
/// returns `Some` if the clock is closed, `None` if running
pub fn duration(&self) -> Option<&'a str> {
pub fn duration(&self) -> Option<&str> {
match self {
Clock::Closed { duration, .. } => Some(duration),
Clock::Running { .. } => None,

View file

@ -7,8 +7,9 @@ pub enum Cookie<'a> {
Slash(&'a str, &'a str),
}
impl<'a> Cookie<'a> {
impl Cookie<'_> {
#[inline]
// return (clock, offset)
pub(crate) fn parse(src: &str) -> Option<(Cookie<'_>, usize)> {
debug_assert!(src.starts_with('['));

View file

@ -1,7 +1,15 @@
use memchr::memchr_iter;
// return (name, offset, limit, end)
pub(crate) fn parse(text: &str) -> Option<(&str, usize, usize, usize)> {
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Drawer<'a> {
pub name: &'a str,
}
impl<'a> Drawer<'a> {
#[inline]
// return (drawer, contents-begin, contents-end , end)
pub(crate) fn parse(text: &'a str) -> Option<(Drawer<'a>, usize, usize, usize)> {
debug_assert!(text.starts_with(':'));
let mut lines = memchr_iter(b'\n', text.as_bytes());
@ -20,32 +28,42 @@ pub(crate) fn parse(text: &str) -> Option<(&str, usize, usize, usize)> {
let mut pos = off;
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case(":END:") {
return Some((&name[0..name.len() - 1], off, pos, i + 1));
return Some((
Drawer {
name: &name[0..name.len() - 1],
},
off,
pos,
i + 1,
));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case(":END:") {
Some((&name[0..name.len() - 1], off, pos, text.len()))
Some((
Drawer {
name: &name[0..name.len() - 1],
},
off,
pos,
text.len(),
))
} else {
None
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
Drawer::parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
Some((
"PROPERTIES",
Drawer { name: "PROPERTIES" },
":PROPERTIES:\n".len(),
":PROPERTIES:\n :CUSTOM_ID: id\n".len(),
":PROPERTIES:\n :CUSTOM_ID: id\n :END:".len()
))
)
}
}

View file

@ -1,8 +1,16 @@
use memchr::{memchr, memchr_iter};
// return (name, parameters, offset, limit, end)
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct DynBlock<'a> {
pub block_name: &'a str,
pub arguments: Option<&'a str>,
}
impl DynBlock<'_> {
#[inline]
pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
// return (dyn_block, contents-begin, contents-end, end)
pub(crate) fn parse(text: &str) -> Option<(DynBlock<'_>, usize, usize, usize)> {
debug_assert!(text.starts_with("#+"));
if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") {
@ -31,35 +39,49 @@ pub fn parse(text: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
for i in lines {
if text[pos..i].trim().eq_ignore_ascii_case("#+END:") {
return Some((name, para, off, pos, i + 1));
return Some((
DynBlock {
block_name: name,
arguments: para,
},
off,
pos,
i + 1,
));
}
pos = i + 1;
}
if text[pos..].trim().eq_ignore_ascii_case("#+END:") {
Some((name, para, off, pos, text.len()))
Some((
DynBlock {
block_name: name,
arguments: para,
},
off,
pos,
text.len(),
))
} else {
None
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
// TODO: testing
assert_eq!(
parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"),
DynBlock::parse("#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n"),
Some((
"clocktable",
Some(":scope file"),
DynBlock {
block_name: "clocktable",
arguments: Some(":scope file"),
},
"#+BEGIN: clocktable :scope file\n".len(),
"#+BEGIN: clocktable :scope file\nCONTENTS\n".len(),
"#+BEGIN: clocktable :scope file\nCONTENTS\n#+END:\n".len(),
))
);
}
}

View file

@ -1,7 +1,14 @@
use memchr::memchr;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct FnDef<'a> {
pub label: &'a str,
}
impl FnDef<'_> {
#[inline]
pub fn parse(text: &str) -> Option<(&str, &str, usize)> {
pub fn parse(text: &str) -> Option<(FnDef<'_>, usize, usize)> {
if text.starts_with("[fn:") {
let (label, off) = memchr(b']', text.as_bytes())
.filter(|&i| {
@ -12,49 +19,50 @@ pub fn parse(text: &str) -> Option<(&str, &str, usize)> {
})
.map(|i| (&text["[fn:".len()..i], i + 1))?;
let (content, off) = memchr(b'\n', text.as_bytes())
.map(|i| (&text[off..i], i))
.unwrap_or_else(|| (&text[off..], text.len()));
let end = memchr(b'\n', text.as_bytes()).unwrap_or_else(|| text.len());
Some((label, content, off))
Some((FnDef { label }, off, end))
} else {
None
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse("[fn:1] https://orgmode.org"),
FnDef::parse("[fn:1] https://orgmode.org"),
Some((
"1",
" https://orgmode.org",
FnDef { label: "1" },
"[fn:1]".len(),
"[fn:1] https://orgmode.org".len()
))
);
assert_eq!(
parse("[fn:word_1] https://orgmode.org"),
FnDef::parse("[fn:word_1] https://orgmode.org"),
Some((
"word_1",
" https://orgmode.org",
FnDef { label: "word_1" },
"[fn:word_1]".len(),
"[fn:word_1] https://orgmode.org".len()
))
);
assert_eq!(
parse("[fn:WORD-1] https://orgmode.org"),
FnDef::parse("[fn:WORD-1] https://orgmode.org"),
Some((
"WORD-1",
" https://orgmode.org",
FnDef { label: "WORD-1" },
"[fn:WORD-1]".len(),
"[fn:WORD-1] https://orgmode.org".len()
))
);
assert_eq!(parse("[fn:WORD]"), Some(("WORD", "", "[fn:WORD]".len())));
assert_eq!(parse("[fn:] https://orgmode.org"), None);
assert_eq!(parse("[fn:wor d] https://orgmode.org"), None);
assert_eq!(parse("[fn:WORD https://orgmode.org"), None);
}
assert_eq!(
FnDef::parse("[fn:WORD]"),
Some((
FnDef { label: "WORD" },
"[fn:WORD]".len(),
"[fn:WORD]".len()
))
);
assert_eq!(FnDef::parse("[fn:] https://orgmode.org"), None);
assert_eq!(FnDef::parse("[fn:wor d] https://orgmode.org"), None);
assert_eq!(FnDef::parse("[fn:WORD https://orgmode.org"), None);
}

View file

@ -7,8 +7,9 @@ pub struct FnRef<'a> {
pub definition: Option<&'a str>,
}
impl<'a> FnRef<'a> {
impl FnRef<'_> {
#[inline]
// return (fn_ref, offset)
pub fn parse(text: &str) -> Option<(FnRef<'_>, usize)> {
debug_assert!(text.starts_with("[fn:"));

View file

@ -21,8 +21,8 @@ pub struct Headline<'a> {
pub keyword: Option<&'a str>,
}
impl<'a> Headline<'a> {
pub(crate) fn parse(text: &'a str, keywords: &'a [&'a str]) -> (Headline<'a>, usize, usize) {
impl Headline<'_> {
pub(crate) fn parse<'a>(text: &'a str, keywords: &[&str]) -> (Headline<'a>, usize, usize) {
let level = memchr2(b'\n', b' ', text.as_bytes()).unwrap_or_else(|| text.len());
debug_assert!(level > 0);

View file

@ -1,59 +1,16 @@
use memchr::{memchr, memchr2};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Key<'a> {
// Affiliated Keywords
// Only "CAPTION" and "RESULTS" keywords can have an optional value.
Caption { option: Option<&'a str> },
Header,
Name,
Plot,
Results { option: Option<&'a str> },
Attr { backend: &'a str },
// Keywords
Author,
Date,
Title,
Custom(&'a str),
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Keyword<'a> {
pub key: Key<'a>,
pub key: &'a str,
pub option: Option<&'a str>,
pub value: &'a str,
}
impl<'a> Keyword<'a> {
#[inline]
pub(crate) fn new(key: &'a str, option: Option<&'a str>, value: &'a str) -> Keyword<'a> {
Keyword {
key: match &*key.to_uppercase() {
"AUTHOR" => Key::Author,
"DATE" => Key::Date,
"HEADER" => Key::Header,
"NAME" => Key::Name,
"PLOT" => Key::Plot,
"TITLE" => Key::Title,
"RESULTS" => Key::Results { option },
"CAPTION" => Key::Caption { option },
k => {
if k.starts_with("ATTR_") {
Key::Attr {
backend: &key["ATTR_".len()..],
}
} else {
Key::Custom(key)
}
}
},
value,
}
}
impl Keyword<'_> {
#[inline]
// return (key, option, value, offset)
pub(crate) fn parse(text: &str) -> Option<(&str, Option<&str>, &str, usize)> {
debug_assert!(text.starts_with("#+"));
@ -79,11 +36,11 @@ impl<'a> Keyword<'a> {
(None, off)
};
let (value, off) = memchr(b'\n', bytes)
.map(|i| (&text[off..i], i + 1))
.unwrap_or_else(|| (&text[off..], text.len()));
let end = memchr(b'\n', bytes)
.map(|i| i + 1)
.unwrap_or_else(|| text.len());
Some((key, option, value.trim(), off))
Some((key, option, &text[off..end].trim(), end))
}
}

View file

@ -8,8 +8,9 @@ pub struct Link<'a> {
pub desc: Option<&'a str>,
}
impl<'a> Link<'a> {
impl Link<'_> {
#[inline]
// return (link, offset)
pub(crate) fn parse(text: &str) -> Option<(Link<'_>, usize)> {
debug_assert!(text.starts_with("[["));

View file

@ -1,9 +1,17 @@
use memchr::memchr_iter;
use std::iter::once;
// (indentation, ordered, limit, end)
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct List {
pub indent: usize,
pub ordered: bool,
}
impl List {
#[inline]
pub fn parse(text: &str) -> Option<(usize, bool, usize, usize)> {
// return (list, begin, end)
pub(crate) fn parse(text: &str) -> Option<(List, usize, usize)> {
let (indent, tail) = text
.find(|c| c != ' ')
.map(|off| (off, &text[off..]))
@ -23,7 +31,7 @@ pub fn parse(text: &str) -> Option<(usize, bool, usize, usize)> {
if line_indent < indent
|| (line_indent == indent && is_item(&line[line_indent..]).is_none())
{
Some((indent, ordered, pos, pos))
Some((List { indent, ordered }, pos, pos))
} else {
pos = i;
continue;
@ -35,20 +43,62 @@ pub fn parse(text: &str) -> Option<(usize, bool, usize, usize)> {
if line_indent < indent
|| (line_indent == indent && is_item(&line[line_indent..]).is_none())
{
Some((indent, ordered, pos, pos))
Some((List { indent, ordered }, pos, pos))
} else {
pos = next_i;
continue;
}
} else {
Some((indent, ordered, pos, next_i))
Some((List { indent, ordered }, pos, next_i))
}
} else {
Some((indent, ordered, pos, i))
Some((List { indent, ordered }, pos, i))
};
}
Some((indent, ordered, pos, pos))
Some((List { indent, ordered }, pos, pos))
}
}
pub struct ListItem<'a> {
pub bullet: &'a str,
}
impl ListItem<'_> {
pub fn parse(text: &str, indent: usize) -> (ListItem<'_>, usize, usize) {
debug_assert!(&text[0..indent].trim().is_empty());
let off = &text[indent..].find(' ').unwrap() + 1 + indent;
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', bytes)
.map(|i| i + 1)
.chain(once(text.len()));
let mut pos = lines.next().unwrap();
for i in lines {
let line = &text[pos..i];
if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
if line_indent == indent {
return (
ListItem {
bullet: &text[indent..off],
},
off,
pos,
);
}
}
pos = i;
}
(
ListItem {
bullet: &text[indent..off],
},
off,
text.len(),
)
}
}
#[inline]
@ -97,60 +147,91 @@ fn test_is_item() {
}
#[test]
fn test_parse() {
fn list_parse() {
assert_eq!(
parse("+ item1\n+ item2"),
Some((0, false, "+ item1\n+ item2".len(), "+ item1\n+ item2".len()))
List::parse("+ item1\n+ item2"),
Some((
List {
indent: 0,
ordered: false,
},
"+ item1\n+ item2".len(),
"+ item1\n+ item2".len()
))
);
assert_eq!(
parse("* item1\n \n* item2"),
List::parse("* item1\n \n* item2"),
Some((
0,
false,
List {
indent: 0,
ordered: false
},
"* item1\n \n* item2".len(),
"* item1\n \n* item2".len()
))
);
assert_eq!(
parse("* item1\n \n \n* item2"),
Some((0, false, "* item1\n".len(), "* item1\n \n \n".len()))
);
assert_eq!(
parse("* item1\n \n "),
Some((0, false, "+ item1\n".len(), "* item1\n \n ".len()))
);
assert_eq!(
parse("+ item1\n + item2\n "),
List::parse("* item1\n \n \n* item2"),
Some((
0,
false,
List {
indent: 0,
ordered: false,
},
"* item1\n".len(),
"* item1\n \n \n".len()
))
);
assert_eq!(
List::parse("* item1\n \n "),
Some((
List {
indent: 0,
ordered: false,
},
"+ item1\n".len(),
"* item1\n \n ".len()
))
);
assert_eq!(
List::parse("+ item1\n + item2\n "),
Some((
List {
indent: 0,
ordered: false,
},
"+ item1\n + item2\n".len(),
"+ item1\n + item2\n ".len()
))
);
assert_eq!(
parse("+ item1\n \n + item2\n \n+ item 3"),
List::parse("+ item1\n \n + item2\n \n+ item 3"),
Some((
0,
false,
List {
indent: 0,
ordered: false,
},
"+ item1\n \n + item2\n \n+ item 3".len(),
"+ item1\n \n + item2\n \n+ item 3".len()
))
);
assert_eq!(
parse(" + item1\n \n + item2"),
List::parse(" + item1\n \n + item2"),
Some((
2,
false,
List {
indent: 2,
ordered: false,
},
" + item1\n \n + item2".len(),
" + item1\n \n + item2".len()
))
);
assert_eq!(
parse("+ 1\n\n - 2\n\n - 3\n\n+ 4"),
List::parse("+ 1\n\n - 2\n\n - 3\n\n+ 4"),
Some((
0,
false,
List {
indent: 0,
ordered: false,
},
"+ 1\n\n - 2\n\n - 3\n\n+ 4".len(),
"+ 1\n\n - 2\n\n - 3\n\n+ 4".len()
))

View file

@ -8,9 +8,9 @@ pub struct Macros<'a> {
pub arguments: Option<&'a str>,
}
impl<'a> Macros<'a> {
impl Macros<'_> {
#[inline]
pub fn parse(text: &str) -> Option<(Macros<'_>, usize)> {
pub(crate) fn parse(text: &str) -> Option<(Macros<'_>, usize)> {
debug_assert!(text.starts_with("{{{"));
let bytes = text.as_bytes();

View file

@ -1,16 +1,224 @@
/// elements
///
/// elements means some syntactical parts that have the same level with paragraph.
pub(crate) mod block;
pub(crate) mod clock;
pub(crate) mod drawer;
pub(crate) mod dyn_block;
pub(crate) mod fn_def;
pub(crate) mod keyword;
pub(crate) mod list;
pub(crate) mod planning;
pub(crate) mod rule;
mod block;
mod clock;
mod cookie;
mod drawer;
mod dyn_block;
mod fn_def;
mod fn_ref;
mod fragment;
mod headline;
mod inline_call;
mod inline_src;
mod keyword;
mod link;
mod list;
mod macros;
mod planning;
mod radio_target;
mod rule;
mod snippet;
mod target;
mod timestamp;
pub use self::clock::Clock;
pub use self::keyword::{Key, Keyword};
pub use self::planning::Planning;
pub mod emphasis;
pub use self::{
block::Block,
clock::Clock,
cookie::Cookie,
drawer::Drawer,
dyn_block::DynBlock,
fn_def::FnDef,
fn_ref::FnRef,
headline::Headline,
inline_call::InlineCall,
inline_src::InlineSrc,
keyword::Keyword,
link::Link,
list::{List, ListItem},
macros::Macros,
planning::Planning,
radio_target::RadioTarget,
rule::Rule,
snippet::Snippet,
target::Target,
timestamp::*,
};
#[derive(Debug)]
pub enum Element<'a> {
Block {
block: Block<'a>,
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
BabelCall {
value: &'a str,
begin: usize,
end: usize,
},
Section {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Clock {
clock: Clock<'a>,
begin: usize,
end: usize,
},
Cookie {
cookie: Cookie<'a>,
begin: usize,
end: usize,
},
RadioTarget {
radio_target: RadioTarget<'a>,
begin: usize,
end: usize,
},
Drawer {
drawer: Drawer<'a>,
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Document {
begin: usize,
end: usize,
},
DynBlock {
dyn_block: DynBlock<'a>,
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
FnDef {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
fn_def: FnDef<'a>,
},
FnRef {
fn_ref: FnRef<'a>,
begin: usize,
end: usize,
},
Headline {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
headline: Headline<'a>,
},
InlineCall {
inline_call: InlineCall<'a>,
begin: usize,
end: usize,
},
InlineSrc {
inline_src: InlineSrc<'a>,
begin: usize,
end: usize,
},
Keyword {
keyword: Keyword<'a>,
begin: usize,
end: usize,
},
Link {
link: Link<'a>,
begin: usize,
end: usize,
},
List {
list: List,
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
ListItem {
list_item: ListItem<'a>,
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Macros {
macros: Macros<'a>,
begin: usize,
end: usize,
},
Planning(Planning<'a>),
Snippet {
begin: usize,
end: usize,
snippet: Snippet<'a>,
},
Text {
value: &'a str,
begin: usize,
end: usize,
},
Paragraph {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Rule {
begin: usize,
end: usize,
},
Timestamp {
begin: usize,
end: usize,
timestamp: Timestamp<'a>,
},
Target {
target: Target<'a>,
begin: usize,
end: usize,
},
Bold {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Strike {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Italic {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Underline {
begin: usize,
end: usize,
contents_begin: usize,
contents_end: usize,
},
Verbatim {
begin: usize,
end: usize,
value: &'a str,
},
Code {
begin: usize,
end: usize,
value: &'a str,
},
}

View file

@ -1,4 +1,4 @@
use crate::objects::Timestamp;
use crate::elements::Timestamp;
use memchr::memchr;
/// palnning elements
@ -13,8 +13,9 @@ pub struct Planning<'a> {
pub closed: Option<Timestamp<'a>>,
}
impl<'a> Planning<'a> {
pub(crate) fn parse(text: &'a str) -> Option<(Planning<'a>, usize)> {
impl Planning<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(Planning<'_>, usize)> {
let (mut deadline, mut scheduled, mut closed) = (None, None, None);
let (mut tail, off) = memchr(b'\n', text.as_bytes())
.map(|i| (text[..i].trim(), i + 1))
@ -60,7 +61,7 @@ impl<'a> Planning<'a> {
#[test]
fn prase() {
use crate::objects::Datetime;
use crate::elements::Datetime;
assert_eq!(
Planning::parse("SCHEDULED: <2019-04-08 Mon>\n"),

View file

@ -0,0 +1,53 @@
use jetscii::Substring;
// TODO: text-markup, entities, latex-fragments, subscript and superscript
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct RadioTarget<'a> {
contents: &'a str,
}
impl RadioTarget<'_> {
#[inline]
// return (radio_target, offset)
pub(crate) fn parse(src: &str) -> Option<(RadioTarget<'_>, usize)> {
debug_assert!(src.starts_with("<<<"));
let bytes = src.as_bytes();
let (contents, off) = Substring::new(">>>")
.find(src)
.filter(|&i| {
bytes[3] != b' '
&& bytes[i - 1] != b' '
&& bytes[3..i]
.iter()
.all(|&c| c != b'<' && c != b'\n' && c != b'>')
})
.map(|i| (&src[3..i], i + ">>>".len()))?;
Some((RadioTarget { contents }, off))
}
}
#[test]
fn parse() {
assert_eq!(
RadioTarget::parse("<<<target>>>"),
Some((RadioTarget { contents: "target" }, "<<<target>>>".len()))
);
assert_eq!(
RadioTarget::parse("<<<tar get>>>"),
Some((
RadioTarget {
contents: "tar get"
},
"<<<tar get>>>".len()
))
);
assert_eq!(RadioTarget::parse("<<<target >>>"), None);
assert_eq!(RadioTarget::parse("<<< target>>>"), None);
assert_eq!(RadioTarget::parse("<<<ta<get>>>"), None);
assert_eq!(RadioTarget::parse("<<<ta>get>>>"), None);
assert_eq!(RadioTarget::parse("<<<ta\nget>>>"), None);
assert_eq!(RadioTarget::parse("<<<target>>"), None);
}

View file

@ -1,37 +1,37 @@
pub struct Rule;
impl Rule {
#[inline]
pub fn parse(text: &str) -> usize {
// return offset
pub(crate) fn parse(text: &str) -> Option<usize> {
let (text, off) = memchr::memchr(b'\n', text.as_bytes())
.map(|i| (text[..i].trim(), i + 1))
.unwrap_or_else(|| (text.trim(), text.len()));
if text.len() >= 5 && text.as_bytes().iter().all(|&c| c == b'-') {
off
Some(off)
} else {
0
None
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(parse("-----"), "-----".len());
assert_eq!(parse("--------"), "--------".len());
assert_eq!(parse(" -----"), " -----".len());
assert_eq!(parse("\t\t-----"), "\t\t-----".len());
assert_eq!(parse("\t\t-----\n"), "\t\t-----\n".len());
assert_eq!(parse("\t\t----- \n"), "\t\t----- \n".len());
assert_eq!(parse(""), 0);
assert_eq!(parse("----"), 0);
assert_eq!(parse(" ----"), 0);
assert_eq!(parse(" 0----"), 0);
assert_eq!(parse("0 ----"), 0);
assert_eq!(parse("0------"), 0);
assert_eq!(parse("----0----"), 0);
assert_eq!(parse("\t\t----"), 0);
assert_eq!(parse("------0"), 0);
assert_eq!(parse("----- 0"), 0);
}
assert_eq!(Rule::parse("-----"), Some("-----".len()));
assert_eq!(Rule::parse("--------"), Some("--------".len()));
assert_eq!(Rule::parse(" -----"), Some(" -----".len()));
assert_eq!(Rule::parse("\t\t-----"), Some("\t\t-----".len()));
assert_eq!(Rule::parse("\t\t-----\n"), Some("\t\t-----\n".len()));
assert_eq!(Rule::parse("\t\t----- \n"), Some("\t\t----- \n".len()));
assert_eq!(Rule::parse(""), None);
assert_eq!(Rule::parse("----"), None);
assert_eq!(Rule::parse(" ----"), None);
assert_eq!(Rule::parse(" None----"), None);
assert_eq!(Rule::parse("None ----"), None);
assert_eq!(Rule::parse("None------"), None);
assert_eq!(Rule::parse("----None----"), None);
assert_eq!(Rule::parse("\t\t----"), None);
assert_eq!(Rule::parse("------None"), None);
assert_eq!(Rule::parse("----- None"), None);
}

View file

@ -8,8 +8,9 @@ pub struct Snippet<'a> {
pub value: &'a str,
}
impl<'a> Snippet<'a> {
impl Snippet<'_> {
#[inline]
// return (snippet offset)
pub(crate) fn parse(text: &str) -> Option<(Snippet<'_>, usize)> {
debug_assert!(text.starts_with("@@"));

53
src/elements/target.rs Normal file
View file

@ -0,0 +1,53 @@
use jetscii::Substring;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Target<'a> {
pub target: &'a str,
}
impl Target<'_> {
#[inline]
// return (target, offset)
pub(crate) fn parse(text: &str) -> Option<(Target<'_>, usize)> {
debug_assert!(text.starts_with("<<"));
let bytes = text.as_bytes();
Substring::new(">>")
.find(text)
.filter(|&i| {
bytes[2] != b' '
&& bytes[i - 1] != b' '
&& bytes[2..i]
.iter()
.all(|&c| c != b'<' && c != b'\n' && c != b'>')
})
.map(|i| {
(
Target {
target: &text[2..i],
},
i + ">>".len(),
)
})
}
}
#[test]
fn parse() {
assert_eq!(
Target::parse("<<target>>"),
Some((Target { target: "target" }, "<<target>>".len()))
);
assert_eq!(
Target::parse("<<tar get>>"),
Some((Target { target: "tar get" }, "<<tar get>>".len()))
);
assert_eq!(Target::parse("<<target >>"), None);
assert_eq!(Target::parse("<< target>>"), None);
assert_eq!(Target::parse("<<ta<get>>"), None);
assert_eq!(Target::parse("<<ta>get>>"), None);
assert_eq!(Target::parse("<<ta\nget>>"), None);
assert_eq!(Target::parse("<<target>"), None);
}

View file

@ -9,7 +9,7 @@ pub struct Datetime<'a> {
pub(crate) dayname: &'a str,
}
impl<'a> Datetime<'a> {
impl Datetime<'_> {
pub fn year(&self) -> u32 {
u32::from_str(&self.date[0..4]).unwrap()
}
@ -145,8 +145,8 @@ pub enum Timestamp<'a> {
Diary(&'a str),
}
impl<'a> Timestamp<'a> {
pub(crate) fn parse(text: &'a str) -> Option<(Timestamp<'a>, usize)> {
impl Timestamp<'_> {
pub(crate) fn parse(text: &str) -> Option<(Timestamp<'_>, usize)> {
if text.starts_with('<') {
Timestamp::parse_active(text).or_else(|| Timestamp::parse_diary(text))
} else if text.starts_with('[') {
@ -156,7 +156,7 @@ impl<'a> Timestamp<'a> {
}
}
pub(crate) fn parse_active(text: &'a str) -> Option<(Timestamp<'a>, usize)> {
pub(crate) fn parse_active(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('<'));
let bytes = text.as_bytes();
@ -194,7 +194,7 @@ impl<'a> Timestamp<'a> {
))
}
pub(crate) fn parse_inactive(text: &'a str) -> Option<(Timestamp<'a>, usize)> {
pub(crate) fn parse_inactive(text: &str) -> Option<(Timestamp<'_>, usize)> {
debug_assert!(text.starts_with('['));
let bytes = text.as_bytes();
@ -231,7 +231,7 @@ impl<'a> Timestamp<'a> {
))
}
fn parse_datetime(text: &'a str) -> Option<(Datetime<'a>, Option<Datetime<'a>>)> {
fn parse_datetime(text: &str) -> Option<(Datetime<'_>, Option<Datetime<'_>>)> {
if text.is_empty()
|| !text.starts_with(|c: char| c.is_ascii_digit())
|| !text.ends_with(|c: char| c.is_ascii_alphanumeric())

View file

@ -140,10 +140,8 @@
//! ```
pub mod elements;
pub mod export;
pub mod headline;
pub mod objects;
mod parser;
pub mod tools;
pub mod org;
#[cfg(feature = "serde")]
mod serde;
pub use parser::{Event, Parser};
pub use org::Org;

View file

@ -1,38 +0,0 @@
pub struct Entity<'a> {
pub name: &'a str,
pub contents: Option<&'a str>,
}
impl<'a> Entity<'a> {
pub fn parse(src: &'a str) -> Option<(Entity<'a>, usize)> {
expect!(src, 0, b'\\')?;
let name = 0;
if src.as_bytes()[name] == b'[' {
Some((
Entity {
name: &src[1..name],
contents: None,
},
name,
))
} else if src.as_bytes()[name] == b'{' {
Some((
Entity {
name: &src[1..name],
contents: None,
},
name,
))
} else {
Some((
Entity {
name: &src[1..name],
contents: None,
},
name,
))
}
}
}

View file

@ -1,23 +0,0 @@
/// objects
///
/// objects is something that included in an element.
pub(crate) mod cookie;
pub(crate) mod emphasis;
pub(crate) mod fn_ref;
pub(crate) mod inline_call;
pub(crate) mod inline_src;
pub(crate) mod link;
pub(crate) mod macros;
pub(crate) mod radio_target;
pub(crate) mod snippet;
pub(crate) mod target;
pub(crate) mod timestamp;
pub use self::cookie::Cookie;
pub use self::fn_ref::FnRef;
pub use self::inline_call::InlineCall;
pub use self::inline_src::InlineSrc;
pub use self::link::Link;
pub use self::macros::Macros;
pub use self::snippet::Snippet;
pub use self::timestamp::*;

View file

@ -1,44 +0,0 @@
use jetscii::Substring;
// TODO: text-markup, entities, latex-fragments, subscript and superscript
#[inline]
pub fn parse(src: &str) -> Option<(&str, usize)> {
debug_assert!(src.starts_with("<<<"));
let bytes = src.as_bytes();
let (target, off) = Substring::new(">>>")
.find(src)
.filter(|&i| {
bytes[3] != b' '
&& bytes[i - 1] != b' '
&& bytes[3..i]
.iter()
.all(|&c| c != b'<' && c != b'\n' && c != b'>')
})
.map(|i| (&src[3..i], i + ">>>".len()))?;
Some((target, off))
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse("<<<target>>>"),
Some(("target", "<<<target>>>".len()))
);
assert_eq!(
parse("<<<tar get>>>"),
Some(("tar get", "<<<tar get>>>".len()))
);
assert_eq!(parse("<<<target >>>"), None);
assert_eq!(parse("<<< target>>>"), None);
assert_eq!(parse("<<<ta<get>>>"), None);
assert_eq!(parse("<<<ta>get>>>"), None);
assert_eq!(parse("<<<ta\nget>>>"), None);
assert_eq!(parse("<<<target>>"), None);
}
}

View file

@ -1,36 +0,0 @@
use jetscii::Substring;
#[inline]
pub fn parse(text: &str) -> Option<(&str, usize)> {
debug_assert!(text.starts_with("<<"));
let bytes = text.as_bytes();
Substring::new(">>")
.find(text)
.filter(|&i| {
bytes[2] != b' '
&& bytes[i - 1] != b' '
&& bytes[2..i]
.iter()
.all(|&c| c != b'<' && c != b'\n' && c != b'>')
})
.map(|i| (&text[2..i], i + ">>".len()))
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(parse("<<target>>"), Some(("target", "<<target>>".len())));
assert_eq!(parse("<<tar get>>"), Some(("tar get", "<<tar get>>".len())));
assert_eq!(parse("<<target >>"), None);
assert_eq!(parse("<< target>>"), None);
assert_eq!(parse("<<ta<get>>"), None);
assert_eq!(parse("<<ta>get>>"), None);
assert_eq!(parse("<<ta\nget>>"), None);
assert_eq!(parse("<<target>"), None);
}
}

647
src/org.rs Normal file
View file

@ -0,0 +1,647 @@
use crate::elements::*;
use indextree::{Arena, NodeId};
use jetscii::bytes;
use memchr::{memchr_iter, memrchr_iter};
pub struct Org<'a> {
pub(crate) arena: Arena<Element<'a>>,
pub(crate) root: NodeId,
text: &'a str,
}
impl<'a> Org<'a> {
pub fn new(text: &'a str) -> Self {
let mut arena = Arena::new();
let root = arena.new_node(Element::Document {
begin: 0,
end: text.len(),
});
Org { arena, root, text }
}
pub fn finish(&self) -> bool {
self.arena[self.root].first_child().is_some()
}
pub fn parse(&mut self) {
if self.finish() {
return;
}
let mut node = self.root;
loop {
match self.arena[node].data {
Element::Document { begin, end, .. }
| Element::Headline {
contents_begin: begin,
contents_end: end,
..
} => {
let mut begin = begin;
if begin < end {
let off = Headline::find_level(&self.text[begin..end], std::usize::MAX);
if off != 0 {
let (contents_begin, contents_end) =
skip_empty_lines(&self.text[begin..begin + off]);
let section = Element::Section {
begin,
end: begin + off,
contents_begin: begin + contents_begin,
contents_end: begin + contents_end,
};
let new_node = self.arena.new_node(section);
node.append(new_node, &mut self.arena).unwrap();
begin += off;
}
}
while begin < end {
let (headline, off, end) = Headline::parse(&self.text[begin..end], &[]);
let headline = Element::Headline {
headline,
begin,
end: begin + end,
contents_begin: begin + off,
contents_end: begin + end,
};
let new_node = self.arena.new_node(headline);
node.append(new_node, &mut self.arena).unwrap();
begin += end;
}
}
Element::Section {
contents_begin,
contents_end,
..
}
| Element::Block {
contents_begin,
contents_end,
..
}
| Element::ListItem {
contents_begin,
contents_end,
..
} => {
self.parse_elements_children(contents_begin, contents_end, node);
}
Element::Paragraph {
contents_begin,
contents_end,
..
}
| Element::Bold {
contents_begin,
contents_end,
..
}
| Element::Underline {
contents_begin,
contents_end,
..
}
| Element::Italic {
contents_begin,
contents_end,
..
}
| Element::Strike {
contents_begin,
contents_end,
..
} => {
self.parse_objects_children(contents_begin, contents_end, node);
}
Element::List {
list: List { indent, .. },
contents_begin,
contents_end,
..
} => {
self.parse_list_items(contents_begin, contents_end, indent, node);
}
_ => (),
}
if let Some(next_node) = self.next_node(node) {
node = next_node;
} else {
break;
}
}
}
fn next_node(&self, mut node: NodeId) -> Option<NodeId> {
if let Some(child) = self.arena[node].first_child() {
return Some(child);
}
loop {
if let Some(sibling) = self.arena[node].next_sibling() {
return Some(sibling);
} else if let Some(parent) = self.arena[node].parent() {
node = parent;
} else {
return None;
}
}
}
fn parse_elements_children(&mut self, mut begin: usize, end: usize, node: NodeId) {
'out: while begin < end {
let text = &self.text[begin..end];
let mut pos = 0;
for i in memchr_iter(b'\n', text.as_bytes()) {
if text.as_bytes()[pos..i].iter().all(u8::is_ascii_whitespace) {
let (end, _) = skip_empty_lines(&text[i..]);
let new_node = self.arena.new_node(Element::Paragraph {
begin,
end: begin + i + end,
contents_begin: begin,
contents_end: begin + pos,
});
node.append(new_node, &mut self.arena).unwrap();
begin += i + end;
continue 'out;
} else if let Some((ty, off)) = self.parse_element(begin + pos, end) {
let new_node = self.arena.new_node(Element::Paragraph {
begin,
end: begin + pos,
contents_begin: begin,
contents_end: begin + pos,
});
node.append(new_node, &mut self.arena).unwrap();
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
begin += pos + off;
continue 'out;
}
pos = i + 1;
}
let new_node = self.arena.new_node(Element::Paragraph {
begin,
end,
contents_begin: begin,
contents_end: if text.ends_with('\n') { end - 1 } else { end },
});
begin = end;
node.append(new_node, &mut self.arena).unwrap();
}
}
fn parse_element(&self, begin: usize, end: usize) -> Option<(Element<'a>, usize)> {
let text = &self.text[begin..end];
if let Some((fn_def, off, end)) = FnDef::parse(text) {
let fn_def = Element::FnDef {
begin,
end: begin + end,
contents_begin: begin + off,
contents_end: begin + end,
fn_def,
};
return Some((fn_def, end));
} else if let Some((list, limit, end)) = List::parse(text) {
let list = Element::List {
list,
begin,
end: begin + end,
contents_begin: begin,
contents_end: begin + limit,
};
return Some((list, end));
}
let line_begin = text.find(|c: char| !c.is_ascii_whitespace()).unwrap_or(0);
let tail = &text[line_begin..];
if let Some((clock, end)) = Clock::parse(tail) {
let clock = Element::Clock {
clock,
begin,
end: begin + line_begin + end,
};
return Some((clock, line_begin + end));
}
// TODO: LaTeX environment
if tail.starts_with("\\begin{") {}
// rule
if tail.starts_with("-----") {
if let Some(end) = Rule::parse(tail) {
let rule = Element::Rule {
begin,
end: begin + line_begin + end,
};
return Some((rule, line_begin + end));
}
}
if tail.starts_with(':') {
if let Some((drawer, off, limit, end)) = Drawer::parse(tail) {
let drawer = Element::Drawer {
drawer,
begin,
end: begin + line_begin + end,
contents_begin: begin + line_begin + off,
contents_end: begin + line_begin + limit,
};
return Some((drawer, line_begin + end));
}
}
// fixed width
if tail.starts_with(": ") || tail.starts_with(":\n") {
// let end = line_ends
// .skip_while(|&i| {
// text[i + 1..].starts_with(": ") || text[i + 1..].starts_with(":\n")
// })
// .next()
// .map(|i| i + 1)
// .unwrap_or_else(|| text.len());
// let off = end - pos;
// brk!(Element::FixedWidth(&tail[0..off]), off);
}
// comment
if tail.starts_with("# ") || tail.starts_with("#\n") {
// let end = line_ends
// .skip_while(|&i| {
// text[i + 1..].starts_with("# ") || text[i + 1..].starts_with("#\n")
// })
// .next()
// .map(|i| i + 1)
// .unwrap_or_else(|| text.len());
// let off = end - pos;
// brk!(Element::Comment(&tail[0..off]), off);
}
if tail.starts_with("#+") {
if let Some((block, off, limit, end)) = Block::parse(tail) {
let block = Element::Block {
block,
begin,
end: begin + line_begin + end,
contents_begin: begin + line_begin + off,
contents_end: begin + line_begin + limit,
};
return Some((block, line_begin + end));
} else if let Some((dyn_block, off, limit, end)) = DynBlock::parse(tail) {
let dyn_block = Element::DynBlock {
dyn_block,
begin,
end: begin + line_begin + end,
contents_begin: begin + line_begin + off,
contents_end: begin + line_begin + limit,
};
return Some((dyn_block, line_begin + end));
} else if let Some((key, option, value, end)) = Keyword::parse(tail) {
if key.eq_ignore_ascii_case("CALL") {
let call = Element::BabelCall {
value,
begin,
end: begin + line_begin + end,
};
return Some((call, line_begin + end));
} else {
let kw = Element::Keyword {
keyword: Keyword { key, option, value },
begin,
end: begin + line_begin + end,
};
return Some((kw, line_begin + end));
}
}
}
None
}
fn parse_objects_children(&mut self, mut begin: usize, end: usize, node: NodeId) {
'out: while begin < end {
let bytes = self.text[begin..end].as_bytes();
match bytes[0] {
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
if let Some((ty, off)) = self.parse_object(begin + 1, end) {
let new_node = self.arena.new_node(Element::Text {
value: &self.text[begin..=begin],
begin,
end,
});
node.append(new_node, &mut self.arena).unwrap();
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
begin += 1 + off;
continue;
}
}
_ => {
if let Some((ty, off)) = self.parse_object(begin, end) {
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
begin += off;
continue;
}
}
}
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
let mut pos = 0;
while let Some(off) = bs.find(&bytes[pos..]) {
pos += off;
assert!(begin + pos <= end);
match bytes[pos] {
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
if let Some((ty, off)) = self.parse_object(begin + pos + 1, end) {
let new_node = self.arena.new_node(Element::Text {
value: &self.text[begin..=begin + pos],
begin,
end,
});
node.append(new_node, &mut self.arena).unwrap();
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
begin += pos + 1 + off;
continue 'out;
}
}
_ => {
if let Some((ty, off)) = self.parse_object(begin + pos, end) {
let new_node = self.arena.new_node(Element::Text {
value: &self.text[begin..begin + pos],
begin,
end,
});
node.append(new_node, &mut self.arena).unwrap();
let new_node = self.arena.new_node(ty);
node.append(new_node, &mut self.arena).unwrap();
begin += pos + off;
continue 'out;
}
}
}
pos += 1;
}
let new_node = self.arena.new_node(Element::Text {
value: &self.text[begin..end],
begin,
end,
});
node.append(new_node, &mut self.arena).unwrap();
begin = end;
}
}
fn parse_object(&self, begin: usize, end: usize) -> Option<(Element<'a>, usize)> {
let text = &self.text[begin..end];
if text.len() < 3 {
None
} else {
let bytes = text.as_bytes();
match bytes[0] {
b'@' if bytes[1] == b'@' => Snippet::parse(text).map(|(snippet, off)| {
(
Element::Snippet {
snippet,
begin,
end: begin + off,
},
off,
)
}),
b'{' if bytes[1] == b'{' && bytes[2] == b'{' => {
Macros::parse(text).map(|(macros, off)| {
(
Element::Macros {
macros,
begin,
end: begin + off,
},
off,
)
})
}
b'<' if bytes[1] == b'<' => {
if bytes[2] == b'<' {
RadioTarget::parse(text).map(|(radio_target, off)| {
(
Element::RadioTarget {
radio_target,
begin,
end: begin + off,
},
off,
)
})
} else {
Target::parse(text).map(|(target, off)| {
(
Element::Target {
target,
begin,
end: begin + off,
},
off,
)
})
}
}
b'<' => Timestamp::parse_active(text)
.or_else(|| (Timestamp::parse_diary(text)))
.map(|(timestamp, off)| {
(
Element::Timestamp {
timestamp,
begin,
end: begin + off,
},
off,
)
}),
b'[' => {
if text[1..].starts_with("fn:") {
FnRef::parse(text).map(|(fn_ref, off)| {
(
Element::FnRef {
fn_ref,
begin,
end: begin + off,
},
off,
)
})
} else if bytes[1] == b'[' {
Link::parse(text).map(|(link, off)| {
(
Element::Link {
link,
begin,
end: begin + off,
},
off,
)
})
} else {
Cookie::parse(text)
.map(|(cookie, off)| {
(
Element::Cookie {
cookie,
begin,
end: begin + off,
},
off,
)
})
.or_else(|| {
Timestamp::parse_inactive(text).map(|(timestamp, off)| {
(
Element::Timestamp {
timestamp,
begin,
end: begin + off,
},
off,
)
})
})
}
}
b'*' => emphasis::parse(text, b'*').map(|off| {
(
Element::Bold {
begin,
contents_begin: begin + 1,
contents_end: begin + off - 1,
end: begin + off,
},
off,
)
}),
b'+' => emphasis::parse(text, b'+').map(|off| {
(
Element::Strike {
begin,
contents_begin: begin + 1,
contents_end: begin + off - 1,
end: begin + off,
},
off,
)
}),
b'/' => emphasis::parse(text, b'/').map(|off| {
(
Element::Italic {
begin,
contents_begin: begin + 1,
contents_end: begin + off - 1,
end: begin + off,
},
off,
)
}),
b'_' => emphasis::parse(text, b'_').map(|off| {
(
Element::Underline {
begin,
contents_begin: begin + 1,
contents_end: begin + off - 1,
end: begin + off,
},
off,
)
}),
b'=' => emphasis::parse(text, b'=').map(|off| {
(
Element::Verbatim {
begin,
end: begin + off,
value: &text[1..off - 1],
},
off,
)
}),
b'~' => emphasis::parse(text, b'~').map(|off| {
(
Element::Code {
begin,
end: begin + off,
value: &text[1..off - 1],
},
off,
)
}),
b's' if text.starts_with("src_") => {
InlineSrc::parse(text).map(|(inline_src, off)| {
(
Element::InlineSrc {
inline_src,
begin,
end: begin + off,
},
off,
)
})
}
b'c' if text.starts_with("call_") => {
InlineCall::parse(text).map(|(inline_call, off)| {
(
Element::InlineCall {
inline_call,
begin,
end: begin + off,
},
off,
)
})
}
_ => None,
}
}
}
fn parse_list_items(&mut self, mut begin: usize, end: usize, indent: usize, node: NodeId) {
while begin < end {
let text = &self.text[begin..end];
let (list_item, off, end) = ListItem::parse(text, indent);
let list_item = Element::ListItem {
list_item,
begin,
end: begin + end,
contents_begin: begin + off,
contents_end: begin + end,
};
let new_node = self.arena.new_node(list_item);
node.append(new_node, &mut self.arena).unwrap();
begin += end;
}
}
}
fn skip_empty_lines(text: &str) -> (usize, usize) {
let mut i = 0;
let mut j = text.len();
for pos in memchr_iter(b'\n', text.as_bytes()) {
if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) {
i = pos + 1;
} else {
break;
}
}
for pos in memrchr_iter(b'\n', text.as_bytes()) {
if text.as_bytes()[pos..j].iter().all(u8::is_ascii_whitespace) {
j = pos;
} else {
break;
}
}
(i, j)
}

View file

@ -1,703 +0,0 @@
//! Parser
use crate::{elements::*, headline::*, objects::*};
use jetscii::bytes;
use memchr::memchr_iter;
#[derive(Copy, Clone, Debug)]
enum Container {
Headline(usize),
Section(usize),
Drawer,
Paragraph,
CtrBlock,
QteBlock,
SplBlock,
DynBlock,
List(usize, bool),
ListItem,
Italic,
Strike,
Bold,
Underline,
}
#[derive(Debug)]
pub enum Event<'a> {
HeadlineBeg(Headline<'a>),
HeadlineEnd,
SectionBeg,
SectionEnd,
ParagraphBeg,
ParagraphEnd,
CtrBlockBeg,
CtrBlockEnd,
QteBlockBeg,
QteBlockEnd,
SplBlockBeg {
name: &'a str,
args: Option<&'a str>,
},
SplBlockEnd,
DynBlockBeg {
name: &'a str,
args: Option<&'a str>,
},
DynBlockEnd,
CommentBlock {
args: Option<&'a str>,
cont: &'a str,
},
ExampleBlock {
args: Option<&'a str>,
cont: &'a str,
},
ExportBlock {
args: Option<&'a str>,
cont: &'a str,
},
SrcBlock {
args: Option<&'a str>,
cont: &'a str,
},
VerseBlock {
args: Option<&'a str>,
cont: &'a str,
},
ListBeg {
indent: usize,
ordered: bool,
},
ListEnd {
indent: usize,
ordered: bool,
},
ListItemBeg {
bullet: &'a str,
},
ListItemEnd,
Call {
value: &'a str,
},
Clock(Clock<'a>),
Comment(&'a str),
FixedWidth(&'a str),
Planning(Planning<'a>),
DrawerBeg(&'a str),
DrawerEnd,
TableStart,
TableEnd,
TableCell,
LatexEnv,
FnDef {
label: &'a str,
cont: &'a str,
},
Keyword(Keyword<'a>),
Rule,
Timestamp(Timestamp<'a>),
Cookie(Cookie<'a>),
FnRef(FnRef<'a>),
InlineCall(InlineCall<'a>),
InlineSrc(InlineSrc<'a>),
Link(Link<'a>),
Macros(Macros<'a>),
RadioTarget {
target: &'a str,
},
Snippet(Snippet<'a>),
Target {
target: &'a str,
},
BoldBeg,
BoldEnd,
ItalicBeg,
ItalicEnd,
StrikeBeg,
StrikeEnd,
UnderlineBeg,
UnderlineEnd,
Verbatim(&'a str),
Code(&'a str),
Text(&'a str),
}
pub struct Parser<'a> {
text: &'a str,
stack: Vec<(Container, usize, usize)>,
off: usize,
ele_buf: Option<(Event<'a>, usize, usize, usize)>,
obj_buf: Option<(Event<'a>, usize, usize, usize)>,
todo_keywords: &'a [&'a str],
}
impl<'a> Parser<'a> {
/// creates a new parser from string
pub fn new(text: &'a str) -> Parser<'a> {
Parser {
text,
stack: Vec::new(),
off: 0,
ele_buf: None,
obj_buf: None,
todo_keywords: DEFAULT_TODO_KEYWORDS,
}
}
/// creates a new parser from string, with the specified keywords
pub fn with_todo_keywrods(text: &'a str, todo_keywords: &'a [&'a str]) -> Parser<'a> {
Parser {
text,
stack: Vec::new(),
off: 0,
ele_buf: None,
obj_buf: None,
todo_keywords,
}
}
/// returns current offset
pub fn offset(&self) -> usize {
self.off
}
/// returns current stack depth
pub fn stack_depth(&self) -> usize {
self.stack.len()
}
/// set todo keywords
pub fn set_todo_keywords(&mut self, todo_keywords: &'a [&'a str]) {
self.todo_keywords = todo_keywords;
}
/// set text
pub fn set_text(&mut self, text: &'a str) {
self.off = 0;
self.stack.clear();
self.ele_buf = None;
self.obj_buf = None;
self.text = text;
}
/// skip the current container if exists and return its Event
pub fn skip_container(&mut self) -> Option<Event<'a>> {
let (container, _, end) = self.stack.pop()?;
self.off = end;
Some(match container {
Container::Bold => Event::BoldEnd,
Container::Drawer => Event::DrawerEnd,
Container::CtrBlock => Event::CtrBlockEnd,
Container::DynBlock => Event::DynBlockEnd,
Container::Headline(_) => Event::HeadlineEnd,
Container::Italic => Event::ItalicEnd,
Container::List(indent, ordered) => Event::ListEnd { indent, ordered },
Container::ListItem => Event::ListItemEnd,
Container::Paragraph => Event::ParagraphEnd,
Container::QteBlock => Event::QteBlockEnd,
Container::Section(_) => Event::SectionEnd,
Container::SplBlock => Event::SplBlockEnd,
Container::Strike => Event::StrikeEnd,
Container::Underline => Event::UnderlineEnd,
})
}
fn next_section_or_headline(&mut self, text: &'a str) -> Event<'a> {
let end = Headline::find_level(text, std::usize::MAX);
if end != 0 {
self.push_stack(Container::Section(self.off), end, end);
Event::SectionBeg
} else {
self.next_headline(text)
}
}
fn next_headline(&mut self, text: &'a str) -> Event<'a> {
let (hdl, off, end) = Headline::parse(text, self.todo_keywords);
self.push_stack(Container::Headline(self.off + off), end, end);
self.off += off;
Event::HeadlineBeg(hdl)
}
fn next_ele(&mut self, text: &'a str) -> Event<'a> {
fn skip_empty_lines(text: &str) -> usize {
let mut i = 0;
for pos in memchr_iter(b'\n', text.as_bytes()) {
if text.as_bytes()[i..pos].iter().all(u8::is_ascii_whitespace) {
i = pos + 1;
} else {
return i;
}
}
if text.as_bytes()[i..].iter().all(u8::is_ascii_whitespace) {
text.len()
} else {
i
}
}
let start = skip_empty_lines(text);
if start == text.len() {
self.off += text.len();
return self.end();
};
let tail = &text[start..];
let (ele, off, limit, end) = self
.ele_buf
.take()
.or_else(|| self.real_next_ele(tail))
.unwrap_or_else(|| {
let mut pos = 0;
for i in memchr_iter(b'\n', tail.as_bytes()) {
if tail.as_bytes()[pos..i].iter().all(u8::is_ascii_whitespace) {
return (Event::ParagraphBeg, 0, pos - 1 + start, i + 1 + start);
} else if let Some(buf) = self.real_next_ele(&tail[pos..]) {
self.ele_buf = Some(buf);
return (Event::ParagraphBeg, 0, pos - 1 + start, pos + start);
}
pos = i + 1;
}
let len = text.len();
(
Event::ParagraphBeg,
0,
if text.ends_with('\n') { len - 1 } else { len },
len,
)
});
debug_assert!(
(limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()),
"{} <= {} <= {} <= {}",
off,
limit,
end,
text.len()
);
match ele {
Event::DrawerBeg(_) => self.push_stack(Container::Drawer, limit, end),
Event::ParagraphBeg => self.push_stack(Container::Paragraph, limit, end),
Event::QteBlockBeg => self.push_stack(Container::QteBlock, limit, end),
Event::CtrBlockBeg => self.push_stack(Container::CtrBlock, limit, end),
Event::SplBlockBeg { .. } => self.push_stack(Container::SplBlock, limit, end),
Event::DynBlockBeg { .. } => self.push_stack(Container::DynBlock, limit, end),
Event::ListBeg { ordered, indent } => {
self.push_stack(Container::List(indent, ordered), limit, end)
}
_ => (),
}
self.off += off + start;
ele
}
// returns (event, offset, container limit, container end)
fn real_next_ele(&mut self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
debug_assert!(!text.starts_with('\n'));
if let Some((label, cont, off)) = fn_def::parse(text) {
return Some((Event::FnDef { label, cont }, off + 1, 0, 0));
} else if let Some((indent, ordered, limit, end)) = list::parse(text) {
return Some((Event::ListBeg { indent, ordered }, 0, limit, end));
}
let (tail, line_begin) = text
.find(|c| c != ' ')
.map(|off| (&text[off..], off))
.unwrap_or((text, 0));
if let Some((clock, off)) = Clock::parse(tail) {
return Some((Event::Clock(clock), off + line_begin, 0, 0));
}
// TODO: LaTeX environment
if tail.starts_with("\\begin{") {}
// rule
if tail.starts_with("-----") {
let off = rule::parse(tail);
if off != 0 {
return Some((Event::Rule, off + line_begin, 0, 0));
}
}
if tail.starts_with(':') {
if let Some((name, off, limit, end)) = drawer::parse(tail) {
return Some((
Event::DrawerBeg(name),
off + line_begin,
limit + line_begin,
end + line_begin,
));
}
}
// fixed width
if tail.starts_with(": ") || tail.starts_with(":\n") {
// let end = line_ends
// .skip_while(|&i| {
// text[i + 1..].starts_with(": ") || text[i + 1..].starts_with(":\n")
// })
// .next()
// .map(|i| i + 1)
// .unwrap_or_else(|| text.len());
// let off = end - pos;
// brk!(Element::FixedWidth(&tail[0..off]), off);
}
// comment
if tail.starts_with("# ") || tail.starts_with("#\n") {
// let end = line_ends
// .skip_while(|&i| {
// text[i + 1..].starts_with("# ") || text[i + 1..].starts_with("#\n")
// })
// .next()
// .map(|i| i + 1)
// .unwrap_or_else(|| text.len());
// let off = end - pos;
// brk!(Element::Comment(&tail[0..off]), off);
}
if tail.starts_with("#+") {
block::parse(tail)
.map(|(name, args, begin, limit, end)| {
let cont = &tail[begin..limit];
match &*name.to_uppercase() {
"COMMENT" => (Event::CommentBlock { args, cont }, end + line_begin, 0, 0),
"EXAMPLE" => (Event::ExampleBlock { args, cont }, end + line_begin, 0, 0),
"EXPORT" => (Event::ExportBlock { args, cont }, end + line_begin, 0, 0),
"SRC" => (Event::SrcBlock { args, cont }, end + line_begin, 0, 0),
"VERSE" => (Event::VerseBlock { args, cont }, end + line_begin, 0, 0),
"CENTER" => (
Event::CtrBlockBeg,
begin + line_begin,
limit + line_begin,
end + line_begin,
),
"QUOTE" => (
Event::QteBlockBeg,
begin + line_begin,
limit + line_begin,
end + line_begin,
),
_ => (
Event::SplBlockBeg { name, args },
begin + line_begin,
limit + line_begin,
end + line_begin,
),
}
})
.or_else(|| {
dyn_block::parse(tail).map(|(name, args, begin, limit, end)| {
(
Event::DynBlockBeg { name, args },
begin + line_begin,
limit + line_begin,
end + line_begin,
)
})
})
.or_else(|| {
Keyword::parse(tail).map(|(key, option, value, off)| {
(
if key.eq_ignore_ascii_case("CALL") {
Event::Call { value }
} else {
Event::Keyword(Keyword::new(key, option, value))
},
off + line_begin,
0,
0,
)
})
})
} else {
None
}
}
fn next_obj(&mut self, text: &'a str) -> Event<'a> {
let bytes = text.as_bytes();
let (obj, off, limit, end) = self
.obj_buf
.take()
.or_else(|| match bytes[0] {
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
if let Some(buf) = self.real_next_obj(&text[1..]) {
self.obj_buf = Some(buf);
Some((Event::Text(&text[0..1]), 1, 0, 0))
} else {
None
}
}
_ => self.real_next_obj(text),
})
.unwrap_or_else(|| {
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
let mut pos = 0;
while let Some(off) = bs.find(&bytes[pos..]) {
pos += off;
match bytes[pos] {
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
if let Some(buf) = self.real_next_obj(&text[pos + 1..]) {
self.obj_buf = Some(buf);
return (Event::Text(&text[0..=pos]), pos + 1, 0, 0);
}
}
_ => {
if let Some(buf) = self.real_next_obj(&text[pos..]) {
self.obj_buf = Some(buf);
return (Event::Text(&text[0..pos]), pos, 0, 0);
}
}
}
pos += 1;
}
(Event::Text(text), text.len(), 0, 0)
});
debug_assert!(
(limit == 0 && end == 0) || (off <= limit && limit <= end && end <= text.len()),
"{} <= {} <= {} <= {}",
off,
limit,
end,
text.len()
);
match obj {
Event::UnderlineBeg => self.push_stack(Container::Underline, limit, end),
Event::StrikeBeg => self.push_stack(Container::Strike, limit, end),
Event::ItalicBeg => self.push_stack(Container::Italic, limit, end),
Event::BoldBeg => self.push_stack(Container::Bold, limit, end),
_ => (),
}
self.off += off;
obj
}
fn real_next_obj(&self, text: &'a str) -> Option<(Event<'a>, usize, usize, usize)> {
if text.len() < 3 {
None
} else {
let bytes = text.as_bytes();
match bytes[0] {
b'@' if bytes[1] == b'@' => {
Snippet::parse(text).map(|(snippet, off)| (Event::Snippet(snippet), off, 0, 0))
}
b'{' if bytes[1] == b'{' && bytes[2] == b'{' => {
Macros::parse(text).map(|(macros, off)| (Event::Macros(macros), off, 0, 0))
}
b'<' if bytes[1] == b'<' => {
if bytes[2] == b'<' {
radio_target::parse(text)
.map(|(target, off)| (Event::RadioTarget { target }, off, 0, 0))
} else {
target::parse(text)
.map(|(target, off)| (Event::Target { target }, off, 0, 0))
}
}
b'<' => Timestamp::parse_active(text)
.or_else(|| Timestamp::parse_diary(text))
.map(|(ts, off)| (Event::Timestamp(ts), off, 0, 0)),
b'[' => {
if text[1..].starts_with("fn:") {
FnRef::parse(text).map(|(fn_ref, off)| (Event::FnRef(fn_ref), off, 0, 0))
} else if bytes[1] == b'[' {
Link::parse(text).map(|(link, off)| (Event::Link(link), off, 0, 0))
} else if let Some((cookie, off)) = Cookie::parse(text) {
Some((Event::Cookie(cookie), off, 0, 0))
} else {
Timestamp::parse_inactive(text)
.map(|(ts, off)| (Event::Timestamp(ts), off, 0, 0))
}
}
b'*' => emphasis::parse(text, b'*').map(|end| (Event::BoldBeg, 1, end - 1, end)),
b'+' => emphasis::parse(text, b'+').map(|end| (Event::StrikeBeg, 1, end - 1, end)),
b'/' => emphasis::parse(text, b'/').map(|end| (Event::ItalicBeg, 1, end - 1, end)),
b'_' => {
emphasis::parse(text, b'_').map(|end| (Event::UnderlineBeg, 1, end - 1, end))
}
b'=' => emphasis::parse(text, b'=')
.map(|end| (Event::Verbatim(&text[1..end - 1]), end, 0, 0)),
b'~' => emphasis::parse(text, b'~')
.map(|end| (Event::Code(&text[1..end - 1]), end, 0, 0)),
b's' if text.starts_with("src_") => {
InlineSrc::parse(text).map(|(src, off)| (Event::InlineSrc(src), off, 0, 0))
}
b'c' if text.starts_with("call_") => {
InlineCall::parse(text).map(|(call, off)| (Event::InlineCall(call), off, 0, 0))
}
_ => None,
}
}
}
fn next_list_item(&self, text: &'a str, indent: usize) -> (&'a str, usize, usize, usize) {
use std::iter::once;
debug_assert!(&text[0..indent].trim().is_empty());
let off = &text[indent..].find(' ').unwrap() + 1 + indent;
let bytes = text.as_bytes();
let mut lines = memchr_iter(b'\n', bytes)
.map(|i| i + 1)
.chain(once(text.len()));
let mut pos = lines.next().unwrap();
for i in lines {
let line = &text[pos..i];
if let Some(line_indent) = line.find(|c: char| !c.is_whitespace()) {
if line_indent == indent {
return (&text[indent..off], off, pos, pos);
}
}
pos = i;
}
(&text[indent..off], off, text.len(), text.len())
}
#[inline]
fn push_stack(&mut self, container: Container, limit: usize, end: usize) {
self.stack
.push((container, self.off + limit, self.off + end));
}
#[inline]
fn end(&mut self) -> Event<'a> {
let (container, _, _) = self.stack.pop().unwrap();
match container {
Container::Bold => Event::BoldEnd,
Container::Drawer => Event::DrawerEnd,
Container::CtrBlock => Event::CtrBlockEnd,
Container::DynBlock => Event::DynBlockEnd,
Container::Headline(_) => Event::HeadlineEnd,
Container::Italic => Event::ItalicEnd,
Container::List(indent, ordered) => Event::ListEnd { indent, ordered },
Container::ListItem => Event::ListItemEnd,
Container::Paragraph => Event::ParagraphEnd,
Container::QteBlock => Event::QteBlockEnd,
Container::Section(_) => Event::SectionEnd,
Container::SplBlock => Event::SplBlockEnd,
Container::Strike => Event::StrikeEnd,
Container::Underline => Event::UnderlineEnd,
}
}
}
impl<'a> Iterator for Parser<'a> {
type Item = Event<'a>;
fn next(&mut self) -> Option<Event<'a>> {
if let Some(&(container, limit, end)) = self.stack.last() {
// eprint!("{:1$}", ' ', self.stack_depth());
debug_assert!(
self.off <= limit && limit <= end && end <= self.text.len(),
"{} <= {} <= {} <= {}",
self.off,
limit,
end,
self.text.len()
);
let tail = &self.text[self.off..limit];
// eprintln!("{:?} {:?}", container, tail);
Some(match container {
Container::Headline(beg) => {
if self.off >= limit {
self.off = end;
self.stack.pop();
Event::HeadlineEnd
} else if self.off == beg {
self.next_section_or_headline(tail)
} else {
self.next_headline(tail)
}
}
Container::Drawer
| Container::DynBlock
| Container::CtrBlock
| Container::QteBlock
| Container::SplBlock
| Container::ListItem => {
if self.off >= limit {
self.off = end;
self.end()
} else {
self.next_ele(tail)
}
}
Container::Section(beg) => {
// planning should be the first line of section
if self.off >= limit {
self.off = end;
self.stack.pop();
Event::SectionEnd
} else if self.off == beg {
if let Some((planning, off)) = Planning::parse(tail) {
self.off += off;
Event::Planning(planning)
} else {
self.next_ele(tail)
}
} else {
self.next_ele(tail)
}
}
Container::List(indent, ordered) => {
if self.off < limit {
let (bullet, off, limit, end) = self.next_list_item(tail, indent);
self.push_stack(Container::ListItem, limit, end);
self.off += off;
Event::ListItemBeg { bullet }
} else {
self.off = end;
self.stack.pop();
Event::ListEnd { indent, ordered }
}
}
Container::Paragraph
| Container::Bold
| Container::Underline
| Container::Italic
| Container::Strike => {
if self.off >= limit {
self.off = end;
self.end()
} else {
self.next_obj(tail)
}
}
})
} else if self.off < self.text.len() {
Some(self.next_section_or_headline(&self.text[self.off..]))
} else {
None
}
}
}

458
src/serde.rs Normal file
View file

@ -0,0 +1,458 @@
use indextree::{Arena, NodeId};
use serde::ser::{SerializeSeq, SerializeStruct, Serializer};
use serde::Serialize;
use crate::elements::Element;
use crate::org::Org;
impl Serialize for Org<'_> {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_newtype_struct(
"Element",
&ElementNode {
node: self.root,
arena: &self.arena,
},
)
}
}
struct ElementNode<'a> {
node: NodeId,
arena: &'a Arena<Element<'a>>,
}
impl Serialize for ElementNode<'_> {
#[allow(unused_variables)]
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
let mut state;
match &self.arena[self.node].data {
Element::Document { begin, end } => {
state = serializer.serialize_struct("Element::Document", 2)?;
state.serialize_field("type", "document")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Block {
block,
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::Block", 2)?;
state.serialize_field("type", "block")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::Section {
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::Section", 2)?;
state.serialize_field("type", "section")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::Drawer {
drawer,
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::Drawer", 2)?;
state.serialize_field("type", "drawer")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::DynBlock {
dyn_block,
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::DynBlock", 2)?;
state.serialize_field("type", "dynamic_block")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::FnDef {
begin,
end,
contents_begin,
contents_end,
fn_def,
} => {
state = serializer.serialize_struct("Element::FnDef", 2)?;
state.serialize_field("type", "footnote_definition")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::Headline {
begin,
end,
contents_begin,
contents_end,
headline,
} => {
state = serializer.serialize_struct("Element::Headline", 2)?;
state.serialize_field("type", "headline")?;
state.serialize_field("level", &headline.level)?;
state.serialize_field("title", &headline.title)?;
if let Some(prior) = &headline.priority {
state.serialize_field("priority", prior)?;
}
if let Some(kw) = &headline.keyword {
state.serialize_field("keyword", kw)?;
}
if !headline.tags.is_empty() {
state.serialize_field("tags", &headline.tags)?;
}
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::List {
list,
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::List", 2)?;
state.serialize_field("type", "list")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::ListItem {
list_item,
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::ListItem", 2)?;
state.serialize_field("type", "list_item")?;
state.serialize_field("bullet", list_item.bullet)?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::Paragraph {
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::Paragraph", 2)?;
state.serialize_field("type", "paragraph")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::Clock { clock, begin, end } => {
state = serializer.serialize_struct("Element::Clock", 2)?;
state.serialize_field("type", "clock")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::BabelCall { value, begin, end } => {
state = serializer.serialize_struct("Element::BabelCall", 2)?;
state.serialize_field("type", "babel_call")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Cookie { cookie, begin, end } => {
state = serializer.serialize_struct("Element::Cookie", 2)?;
state.serialize_field("type", "cookie")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::FnRef { fn_ref, begin, end } => {
state = serializer.serialize_struct("Element::FnRef", 2)?;
state.serialize_field("type", "footnote_reference")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::InlineCall {
inline_call,
begin,
end,
} => {
state = serializer.serialize_struct("Element::InlineCall", 2)?;
state.serialize_field("type", "inline_call")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::InlineSrc {
inline_src,
begin,
end,
} => {
state = serializer.serialize_struct("Element::InlineSrc", 2)?;
state.serialize_field("type", "inlne_source_block")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Keyword {
keyword,
begin,
end,
} => {
state = serializer.serialize_struct("Element::Keyword", 2)?;
state.serialize_field("type", "keyword")?;
state.serialize_field("key", keyword.key)?;
if let Some(option) = keyword.option {
state.serialize_field("option", option)?;
}
state.serialize_field("value", keyword.value)?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Link { link, begin, end } => {
state = serializer.serialize_struct("Element::Link", 2)?;
state.serialize_field("type", "link")?;
state.serialize_field("path", link.path)?;
if let Some(desc) = link.desc {
state.serialize_field("desc", desc)?;
}
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Macros { macros, begin, end } => {
state = serializer.serialize_struct("Element::Macros", 2)?;
state.serialize_field("type", "macros")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Planning(_) => {
state = serializer.serialize_struct("Element::Planning", 2)?;
state.serialize_field("type", "planning")?;
}
Element::Snippet {
begin,
end,
snippet,
} => {
state = serializer.serialize_struct("Element::Snippet", 2)?;
state.serialize_field("type", "snippet")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Text { value, begin, end } => {
state = serializer.serialize_struct("Element::Text", 2)?;
state.serialize_field("type", "text")?;
state.serialize_field("value", value)?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Rule { begin, end } => {
state = serializer.serialize_struct("Element::Rule", 2)?;
state.serialize_field("type", "rule")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Timestamp {
begin,
end,
timestamp,
} => {
state = serializer.serialize_struct("Element::Timestamp", 2)?;
state.serialize_field("type", "timestamp")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Bold {
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::Bold", 2)?;
state.serialize_field("type", "bold")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::Strike {
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::Strike", 2)?;
state.serialize_field("type", "strike")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::Italic {
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::Italic", 2)?;
state.serialize_field("type", "italic")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::Underline {
begin,
end,
contents_begin,
contents_end,
} => {
state = serializer.serialize_struct("Element::Underline", 2)?;
state.serialize_field("type", "underline")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
state.serialize_field("contents_begin", contents_begin)?;
state.serialize_field("contents_end", contents_end)?;
}
}
Element::Code { begin, end, value } => {
state = serializer.serialize_struct("Element::Code", 2)?;
state.serialize_field("type", "code")?;
state.serialize_field("value", value)?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Verbatim { begin, end, value } => {
state = serializer.serialize_struct("Element::Verbatim", 2)?;
state.serialize_field("type", "verbatim")?;
state.serialize_field("value", value)?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::RadioTarget {
radio_target,
begin,
end,
} => {
state = serializer.serialize_struct("Element::RadioTarget", 2)?;
state.serialize_field("type", "radio_target")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
Element::Target { target, begin, end } => {
state = serializer.serialize_struct("Element::Target", 2)?;
state.serialize_field("type", "target")?;
if cfg!(feature = "extra-serde-info") {
state.serialize_field("begin", begin)?;
state.serialize_field("end", end)?;
}
}
}
if let Some(first) = self.arena[self.node].first_child() {
state.serialize_field(
"children",
&ElementChildrenNode {
first,
arena: self.arena,
},
)?;
}
state.end()
}
}
struct ElementChildrenNode<'a> {
first: NodeId,
arena: &'a Arena<Element<'a>>,
}
impl Serialize for ElementChildrenNode<'_> {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
let mut seq = serializer.serialize_seq(None)?;
for node in self.first.following_siblings(&self.arena) {
seq.serialize_element(&ElementNode {
node,
arena: &self.arena,
})?;
}
seq.end()
}
}

View file

@ -1,42 +0,0 @@
use crate::elements::{fn_def, Keyword};
use crate::headline::{Headline, DEFAULT_TODO_KEYWORDS};
use memchr::memchr;
type Headlines<'a> = Vec<Headline<'a>>;
type Keywords<'a> = Vec<(&'a str, &'a str)>;
type Footnotes<'a> = Vec<&'a str>;
pub fn metadata(src: &str) -> (Headlines<'_>, Keywords<'_>, Footnotes<'_>) {
let (mut headlines, mut keywords, mut footnotes) = (Vec::new(), Vec::new(), Vec::new());
for line in src.lines().filter(|l| !l.is_empty()) {
if line.starts_with('*') {
let level = memchr(b' ', line.as_bytes()).unwrap_or_else(|| line.len());
if line.as_bytes()[0..level].iter().all(|&c| c == b'*') {
headlines.push(Headline::parse(line, DEFAULT_TODO_KEYWORDS).0)
}
} else if line.starts_with("#+") {
if let Some((key, _, value, _)) = Keyword::parse(line) {
keywords.push((key, value))
}
} else if line.starts_with("[fn:") {
if let Some((label, _, _)) = fn_def::parse(line) {
footnotes.push(label)
}
}
}
(headlines, keywords, footnotes)
}
pub fn toc(src: &str) -> Headlines<'_> {
metadata(src).0
}
pub fn keywords(src: &str) -> Keywords<'_> {
metadata(src).1
}
pub fn fn_def(src: &str) -> Footnotes<'_> {
metadata(src).2
}