refactor(org): store content in separate Vec

This commit is contained in:
PoiScript 2019-07-28 11:33:34 +08:00
parent 50f6b9f52a
commit 3e82172dfe
14 changed files with 520 additions and 573 deletions

View file

@ -17,19 +17,18 @@ travis-ci = { repository = "PoiScript/orgize" }
[features]
default = ["serde"]
extra-serde-info = ["serde"]
[dependencies]
bytecount = "0.5.1"
chrono = { version = "0.4.7", optional = true }
indextree = "3.2.0"
indextree = "3.3.0"
jetscii = "0.4.4"
memchr = "2.2.0"
serde = { version = "1.0.94", optional = true, features = ["derive"] }
memchr = "2.2.1"
serde = { version = "1.0.97", optional = true, features = ["derive"] }
nom = "5.0.0"
[dev-dependencies]
lazy_static = "1.3.0"
pretty_assertions = "0.6.1"
serde_json = "1.0.39"
serde_json = "1.0.40"
slugify = "0.1.0"

View file

@ -190,8 +190,6 @@ By now, orgize provides three features:
+ `serde`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default.
+ `extra-serde-info`: includes the position information while serializing, disabled by default.
+ `chrono`: adds the ability to convert `Datetime` into `chrono` struct, disabled by default.
## License

View file

@ -9,13 +9,11 @@ pub struct Block<'a> {
pub name: &'a str,
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
pub args: Option<&'a str>,
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
pub contents: &'a str,
}
impl Block<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>)> {
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> {
debug_assert!(text.starts_with("#+"));
if text.len() <= 8 || text[2..8].to_uppercase() != "BEGIN_" {
@ -40,11 +38,8 @@ impl Block<'_> {
if text[pos..i].trim().eq_ignore_ascii_case(&end) {
return Some((
&text[i + 1..],
Element::Block(Block {
name,
args,
contents: &text[off..pos],
}),
Element::Block(Block { name, args }),
&text[off..pos],
));
}
@ -52,14 +47,7 @@ impl Block<'_> {
}
if text[pos..].trim().eq_ignore_ascii_case(&end) {
Some((
"",
Element::Block(Block {
name,
args,
contents: &text[off..pos],
}),
))
Some(("", Element::Block(Block { name, args }), &text[off..pos]))
} else {
None
}
@ -75,8 +63,8 @@ fn parse() {
Element::Block(Block {
name: "SRC",
args: None,
contents: ""
}),
""
))
);
assert_eq!(
@ -86,8 +74,8 @@ fn parse() {
Element::Block(Block {
name: "SRC",
args: Some("javascript"),
contents: "console.log('Hello World!');\n"
}),
"console.log('Hello World!');\n"
))
);
// TODO: more testing

View file

@ -7,13 +7,11 @@ use crate::elements::Element;
#[derive(Debug)]
pub struct Drawer<'a> {
pub name: &'a str,
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
pub contents: &'a str,
}
impl Drawer<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>)> {
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> {
debug_assert!(text.starts_with(':'));
let mut lines = memchr_iter(b'\n', text.as_bytes());
@ -36,8 +34,8 @@ impl Drawer<'_> {
&text[i + 1..],
Element::Drawer(Drawer {
name: &name[0..name.len() - 1],
contents: &text[off..pos],
}),
&text[off..pos],
));
}
pos = i + 1;
@ -48,8 +46,8 @@ impl Drawer<'_> {
"",
Element::Drawer(Drawer {
name: &name[0..name.len() - 1],
contents: &text[off..pos],
}),
&text[off..pos],
))
} else {
None
@ -63,10 +61,8 @@ fn parse() {
Drawer::parse(":PROPERTIES:\n :CUSTOM_ID: id\n :END:"),
Some((
"",
Element::Drawer(Drawer {
name: "PROPERTIES",
contents: " :CUSTOM_ID: id\n"
})
Element::Drawer(Drawer { name: "PROPERTIES" }),
" :CUSTOM_ID: id\n"
))
)
}

View file

@ -9,14 +9,11 @@ pub struct DynBlock<'a> {
pub block_name: &'a str,
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
pub arguments: Option<&'a str>,
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
pub contents: &'a str,
}
impl DynBlock<'_> {
#[inline]
// return (dyn_block, contents-begin, contents-end, end)
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>)> {
pub(crate) fn parse(text: &str) -> Option<(&str, Element<'_>, &str)> {
debug_assert!(text.starts_with("#+"));
if text.len() <= "#+BEGIN: ".len() || !text[2..9].eq_ignore_ascii_case("BEGIN: ") {
@ -50,8 +47,8 @@ impl DynBlock<'_> {
Element::DynBlock(DynBlock {
block_name: name,
arguments: para,
contents: &text[off..pos],
}),
&text[off..pos],
));
}
@ -64,8 +61,8 @@ impl DynBlock<'_> {
Element::DynBlock(DynBlock {
block_name: name,
arguments: para,
contents: &text[off..pos],
}),
&text[off..pos],
))
} else {
None
@ -83,8 +80,8 @@ fn parse() {
Element::DynBlock(DynBlock {
block_name: "clocktable",
arguments: Some(":scope file"),
contents: "CONTENTS\n"
},)
}),
"CONTENTS\n"
))
);
}

View file

@ -1,39 +1,32 @@
use memchr::memchr;
use nom::{
bytes::complete::{tag, take_while1},
IResult,
};
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug)]
pub struct FnDef<'a> {
pub label: &'a str,
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
pub contents: &'a str,
}
fn parse_label(input: &str) -> IResult<&str, &str> {
let (input, _) = tag("[fn:")(input)?;
let (input, label) =
take_while1(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '_')(input)?;
let (input, _) = tag("]")(input)?;
Ok((input, label))
}
impl FnDef<'_> {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, FnDef<'_>)> {
if text.starts_with("[fn:") {
let (label, off) = memchr(b']', text.as_bytes())
.filter(|&i| {
i != 4
&& text.as_bytes()["[fn:".len()..i]
.iter()
.all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_')
})
.map(|i| (&text["[fn:".len()..i], i + 1))?;
pub(crate) fn parse(text: &str) -> Option<(&str, FnDef<'_>, &str)> {
let (tail, label) = parse_label(text).ok()?;
let end = memchr(b'\n', text.as_bytes()).unwrap_or_else(|| text.len());
let end = memchr(b'\n', tail.as_bytes()).unwrap_or_else(|| tail.len());
Some((
&text[end..],
FnDef {
label,
contents: &text[off..end],
},
))
} else {
None
}
Some((&tail[end..], FnDef { label }, &tail[0..end]))
}
}
@ -41,43 +34,19 @@ impl FnDef<'_> {
fn parse() {
assert_eq!(
FnDef::parse("[fn:1] https://orgmode.org"),
Some((
"",
FnDef {
label: "1",
contents: " https://orgmode.org"
},
))
Some(("", FnDef { label: "1" }, " https://orgmode.org"))
);
assert_eq!(
FnDef::parse("[fn:word_1] https://orgmode.org"),
Some((
"",
FnDef {
label: "word_1",
contents: " https://orgmode.org"
},
))
Some(("", FnDef { label: "word_1" }, " https://orgmode.org"))
);
assert_eq!(
FnDef::parse("[fn:WORD-1] https://orgmode.org"),
Some((
"",
FnDef {
label: "WORD-1",
contents: " https://orgmode.org"
},
))
Some(("", FnDef { label: "WORD-1" }, " https://orgmode.org"))
);
assert_eq!(
FnDef::parse("[fn:WORD]"),
Some((
"",
FnDef {
label: "WORD",
contents: ""
},
))
Some(("", FnDef { label: "WORD" }, ""))
);
assert_eq!(FnDef::parse("[fn:] https://orgmode.org"), None);
assert_eq!(FnDef::parse("[fn:wor d] https://orgmode.org"), None);

View file

@ -22,12 +22,13 @@ pub struct Headline<'a> {
/// headline keyword
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
pub keyword: Option<&'a str>,
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
pub contents: &'a str,
}
impl Headline<'_> {
pub(crate) fn parse<'a>(text: &'a str, config: &ParseConfig<'_>) -> (&'a str, Headline<'a>) {
pub(crate) fn parse<'a>(
text: &'a str,
config: &ParseConfig<'_>,
) -> (&'a str, Headline<'a>, &'a str) {
let level = memchr2(b'\n', b' ', text.as_bytes()).unwrap_or_else(|| text.len());
debug_assert!(level > 0);
@ -55,8 +56,8 @@ impl Headline<'_> {
priority: None,
title: "",
tags: Vec::new(),
contents: &text[off..end],
},
&text[off..end],
);
}
@ -108,8 +109,8 @@ impl Headline<'_> {
priority,
title,
tags: tags.split(':').filter(|s| !s.is_empty()).collect(),
contents: &text[off..end],
},
&text[off..end],
)
}
@ -165,8 +166,8 @@ fn parse() {
keyword: Some("DONE"),
title: "COMMENT Title",
tags: vec!["tag", "a2%"],
contents: ""
},
""
)
);
assert_eq!(
@ -179,8 +180,8 @@ fn parse() {
tags: vec!["tag", "a2%"],
title: "ToDO [#A] COMMENT Title",
keyword: None,
contents: ""
},
""
)
);
assert_eq!(
@ -193,8 +194,8 @@ fn parse() {
tags: vec!["tag", "a2%"],
title: "T0DO [#A] COMMENT Title",
keyword: None,
contents: ""
},
""
)
);
assert_eq!(
@ -207,8 +208,8 @@ fn parse() {
tags: vec!["tag", "a2%"],
title: "[#1] COMMENT Title",
keyword: Some("DONE"),
contents: "",
},
""
)
);
assert_eq!(
@ -221,8 +222,8 @@ fn parse() {
tags: vec!["tag", "a2%"],
title: "[#a] COMMENT Title",
keyword: Some("DONE"),
contents: "",
},
""
)
);
assert_eq!(
@ -235,8 +236,8 @@ fn parse() {
tags: Vec::new(),
title: "COMMENT Title :tag:a2%",
keyword: Some("DONE"),
contents: ""
},
""
)
);
assert_eq!(
@ -249,8 +250,8 @@ fn parse() {
tags: Vec::new(),
title: "COMMENT Title tag:a2%:",
keyword: Some("DONE"),
contents: ""
},
""
)
);
assert_eq!(
@ -263,8 +264,8 @@ fn parse() {
tags: Vec::new(),
title: "COMMENT Title tag:a2%:",
keyword: None,
contents: ""
},
""
)
);
}
@ -287,8 +288,8 @@ fn parse_todo_keywords() {
keyword: None,
title: "DONE [#A] COMMENT Title",
tags: vec!["tag", "a2%"],
contents: ""
},
""
)
);
assert_eq!(
@ -307,8 +308,8 @@ fn parse_todo_keywords() {
keyword: Some("TASK"),
title: "COMMENT Title",
tags: vec!["tag", "a2%"],
contents: ""
},
""
)
);
}

View file

@ -4,16 +4,14 @@ use std::iter::once;
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug)]
pub struct List<'a> {
pub struct List {
pub indent: usize,
pub ordered: bool,
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
pub contents: &'a str,
}
impl List<'_> {
impl List {
#[inline]
pub(crate) fn parse(text: &str) -> Option<(&str, List<'_>)> {
pub(crate) fn parse(text: &str) -> Option<(&str, List, &str)> {
let (indent, tail) = text
.find(|c| c != ' ')
.map(|off| (off, &text[off..]))
@ -33,14 +31,7 @@ impl List<'_> {
if line_indent < indent
|| (line_indent == indent && is_item(&line[line_indent..]).is_none())
{
Some((
&text[pos..],
List {
indent,
ordered,
contents: &text[0..pos],
},
))
Some((&text[pos..], List { indent, ordered }, &text[0..pos]))
} else {
pos = i;
continue;
@ -52,48 +43,20 @@ impl List<'_> {
if line_indent < indent
|| (line_indent == indent && is_item(&line[line_indent..]).is_none())
{
Some((
&text[pos..],
List {
indent,
ordered,
contents: &text[0..pos],
},
))
Some((&text[pos..], List { indent, ordered }, &text[0..pos]))
} else {
pos = next_i;
continue;
}
} else {
Some((
&text[next_i..],
List {
indent,
ordered,
contents: &text[0..pos],
},
))
Some((&text[next_i..], List { indent, ordered }, &text[0..pos]))
}
} else {
Some((
&text[i..],
List {
indent,
ordered,
contents: &text[0..pos],
},
))
Some((&text[i..], List { indent, ordered }, &text[0..pos]))
};
}
Some((
&text[pos..],
List {
indent,
ordered,
contents: &text[0..pos],
},
))
Some((&text[pos..], List { indent, ordered }, &text[0..pos]))
}
}
@ -102,12 +65,11 @@ impl List<'_> {
#[derive(Debug)]
pub struct ListItem<'a> {
pub bullet: &'a str,
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
pub contents: &'a str,
}
impl ListItem<'_> {
pub(crate) fn parse(text: &str, indent: usize) -> (&str, ListItem<'_>) {
#[inline]
pub(crate) fn parse(text: &str, indent: usize) -> (&str, ListItem<'_>, &str) {
debug_assert!(&text[0..indent].trim().is_empty());
let off = &text[indent..].find(' ').unwrap() + 1 + indent;
@ -125,8 +87,8 @@ impl ListItem<'_> {
&text[pos..],
ListItem {
bullet: &text[indent..off],
contents: &text[off..pos],
},
&text[off..pos],
);
}
}
@ -137,8 +99,8 @@ impl ListItem<'_> {
"",
ListItem {
bullet: &text[indent..off],
contents: &text[off..],
},
&text[off..],
)
}
}
@ -197,8 +159,8 @@ fn list_parse() {
List {
indent: 0,
ordered: false,
contents: "+ item1\n+ item2"
},
"+ item1\n+ item2"
))
);
assert_eq!(
@ -208,8 +170,8 @@ fn list_parse() {
List {
indent: 0,
ordered: false,
contents: "* item1\n \n* item2"
},
"* item1\n \n* item2"
))
);
assert_eq!(
@ -219,8 +181,8 @@ fn list_parse() {
List {
indent: 0,
ordered: false,
contents: "* item1\n"
},
"* item1\n"
))
);
assert_eq!(
@ -230,8 +192,8 @@ fn list_parse() {
List {
indent: 0,
ordered: false,
contents: "* item1\n"
},
"* item1\n"
))
);
assert_eq!(
@ -241,8 +203,8 @@ fn list_parse() {
List {
indent: 0,
ordered: false,
contents: "+ item1\n + item2\n"
},
"+ item1\n + item2\n"
))
);
assert_eq!(
@ -252,8 +214,8 @@ fn list_parse() {
List {
indent: 0,
ordered: false,
contents: "+ item1\n \n + item2\n \n+ item 3"
},
"+ item1\n \n + item2\n \n+ item 3"
))
);
assert_eq!(
@ -263,8 +225,8 @@ fn list_parse() {
List {
indent: 2,
ordered: false,
contents: " + item1\n \n + item2"
},
" + item1\n \n + item2"
))
);
assert_eq!(
@ -274,8 +236,8 @@ fn list_parse() {
List {
indent: 0,
ordered: false,
contents: "+ 1\n\n - 2\n\n - 3\n\n+ 4"
},
"+ 1\n\n - 2\n\n - 3\n\n+ 4"
))
);
}

View file

@ -44,7 +44,7 @@ fn parse() {
Element::Macros(Macros {
name: "poem",
arguments: Some("red,blue")
},)
})
))
);
assert_eq!(
@ -54,7 +54,7 @@ fn parse() {
Element::Macros(Macros {
name: "poem",
arguments: Some(")")
},)
})
))
);
assert_eq!(
@ -64,7 +64,7 @@ fn parse() {
Element::Macros(Macros {
name: "author",
arguments: None
},)
})
))
);
assert!(Macros::parse("{{{0uthor}}}").is_err());

View file

@ -56,23 +56,16 @@ pub use self::{
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(tag = "type"))]
#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
#[cfg_attr(feature = "serde", serde(tag = "type", rename_all = "snake_case"))]
pub enum Element<'a> {
Block(Block<'a>),
BabelCall(BabelCall<'a>),
Section {
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
contents: &'a str,
},
Section,
Clock(Clock<'a>),
Cookie(Cookie<'a>),
RadioTarget(RadioTarget<'a>),
RadioTarget(RadioTarget),
Drawer(Drawer<'a>),
Document {
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
contents: &'a str,
},
Document,
DynBlock(DynBlock<'a>),
FnDef(FnDef<'a>),
FnRef(FnRef<'a>),
@ -81,49 +74,24 @@ pub enum Element<'a> {
InlineSrc(InlineSrc<'a>),
Keyword(Keyword<'a>),
Link(Link<'a>),
List(List<'a>),
List(List),
ListItem(ListItem<'a>),
Macros(Macros<'a>),
Planning(Planning<'a>),
Snippet(Snippet<'a>),
Text {
value: &'a str,
},
Paragraph {
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
contents: &'a str,
},
Text { value: &'a str },
Paragraph,
Rule,
Timestamp(Timestamp<'a>),
Target(Target<'a>),
Bold {
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
contents: &'a str,
},
Strike {
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
contents: &'a str,
},
Italic {
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
contents: &'a str,
},
Underline {
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
contents: &'a str,
},
Verbatim {
value: &'a str,
},
Code {
value: &'a str,
},
Comment {
value: &'a str,
},
FixedWidth {
value: &'a str,
},
Bold,
Strike,
Italic,
Underline,
Verbatim { value: &'a str },
Code { value: &'a str },
Comment { value: &'a str },
FixedWidth { value: &'a str },
}
macro_rules! impl_from {
@ -140,7 +108,6 @@ impl_from!(Block);
impl_from!(BabelCall);
impl_from!(Clock);
impl_from!(Cookie);
impl_from!(RadioTarget);
impl_from!(Drawer);
impl_from!(DynBlock);
impl_from!(FnDef);
@ -150,7 +117,6 @@ impl_from!(InlineCall);
impl_from!(InlineSrc);
impl_from!(Keyword);
impl_from!(Link);
impl_from!(List);
impl_from!(ListItem);
impl_from!(Macros);
impl_from!(Planning);

View file

@ -10,14 +10,11 @@ use crate::elements::Element;
#[cfg_attr(test, derive(PartialEq))]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug)]
pub struct RadioTarget<'a> {
#[cfg_attr(all(feature = "serde", not(feature = "extra-serde-info")), serde(skip))]
contents: &'a str,
}
pub struct RadioTarget;
impl RadioTarget<'_> {
impl RadioTarget {
#[inline]
pub(crate) fn parse(input: &str) -> IResult<&str, Element<'_>> {
pub(crate) fn parse(input: &str) -> IResult<&str, (Element, &str)> {
let (input, _) = tag("<<<")(input)?;
let (input, contents) = verify(
take_while(|c: char| c != '<' && c != '\n' && c != '>'),
@ -25,7 +22,7 @@ impl RadioTarget<'_> {
)(input)?;
let (input, _) = tag(">>>")(input)?;
Ok((input, Element::RadioTarget(RadioTarget { contents })))
Ok((input, (Element::RadioTarget(RadioTarget), contents)))
}
}
@ -33,16 +30,11 @@ impl RadioTarget<'_> {
fn parse() {
assert_eq!(
RadioTarget::parse("<<<target>>>"),
Ok(("", Element::RadioTarget(RadioTarget { contents: "target" })))
Ok(("", (Element::RadioTarget(RadioTarget), "target")))
);
assert_eq!(
RadioTarget::parse("<<<tar get>>>"),
Ok((
"",
Element::RadioTarget(RadioTarget {
contents: "tar get"
},)
))
Ok(("", (Element::RadioTarget(RadioTarget), "tar get")))
);
assert!(RadioTarget::parse("<<<target >>>").is_err());
assert!(RadioTarget::parse("<<< target>>>").is_err());

View file

@ -1,3 +1,5 @@
pub mod html;
pub mod org;
pub use html::{DefaultHtmlHandler, HtmlHandler};
pub use org::{DefaultOrgHandler, OrgHandler};

View file

@ -201,8 +201,6 @@
//!
//! + `serde`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default.
//!
//! + `extra-serde-info`: includes the position information while serializing, disabled by default.
//!
//! + `chrono`: adds the ability to convert `Datetime` into `chrono` struct, disabled by default.
//!
//! # License

View file

@ -5,7 +5,7 @@ use std::io::{Error, Write};
use crate::config::ParseConfig;
use crate::elements::*;
use crate::export::{DefaultHtmlHandler, HtmlHandler};
use crate::export::*;
use crate::iter::Iter;
pub struct Org<'a> {
@ -13,19 +13,104 @@ pub struct Org<'a> {
pub(crate) document: NodeId,
}
enum Container<'a> {
// List
List {
content: &'a str,
node: NodeId,
indent: usize,
},
// Block, List Item
Block {
content: &'a str,
node: NodeId,
},
// Pargraph, Inline Markup
Inline {
content: &'a str,
node: NodeId,
},
// Headline, Document
Headline {
content: &'a str,
node: NodeId,
},
// Section
Section {
content: &'a str,
node: NodeId,
},
}
impl<'a> Org<'a> {
pub fn parse(text: &'a str) -> Self {
Org::parse_with_config(text, ParseConfig::default())
}
pub fn parse_with_config(text: &'a str, config: ParseConfig<'_>) -> Self {
pub fn parse_with_config(content: &'a str, config: ParseConfig<'_>) -> Self {
let mut arena = Arena::new();
let document = arena.new_node(Element::Document { contents: text });
let document = arena.new_node(Element::Document);
let mut org = Org { arena, document };
org.parse_internal(config);
let mut containers = vec![Container::Headline {
content,
node: document,
}];
org
while let Some(container) = containers.pop() {
match container {
Container::Headline {
mut content,
node: parent,
} => {
if !content.is_empty() {
let off = Headline::find_level(content, std::usize::MAX);
if off != 0 {
let node = arena.new_node(Element::Section);
parent.append(node, &mut arena).unwrap();
containers.push(Container::Section {
content: &content[0..off],
node,
});
content = &content[off..];
}
}
while !content.is_empty() {
let (tail, headline, headline_content) = Headline::parse(content, &config);
let headline = Element::Headline(headline);
let node = arena.new_node(headline);
parent.append(node, &mut arena).unwrap();
containers.push(Container::Headline {
content: headline_content,
node,
});
content = tail;
}
}
Container::Section { content, node } => {
// TODO
if let Some((tail, _planning)) = Planning::parse(content) {
parse_elements_children(&mut arena, tail, node, &mut containers);
} else {
parse_elements_children(&mut arena, content, node, &mut containers);
}
}
Container::Block { content, node } => {
parse_elements_children(&mut arena, content, node, &mut containers);
}
Container::Inline { content, node } => {
parse_objects_children(&mut arena, content, node, &mut containers);
}
Container::List {
content,
node,
indent,
} => {
parse_list_items(&mut arena, content, indent, node, &mut containers);
}
}
}
Org { arena, document }
}
pub fn iter(&'a self) -> Iter<'a> {
@ -49,376 +134,370 @@ impl<'a> Org<'a> {
for event in self.iter() {
match event {
Start(e) => handler.start(&mut writer, e)?,
End(e) => handler.end(&mut writer, e)?,
Start(element) => handler.start(&mut writer, element)?,
End(element) => handler.end(&mut writer, element)?,
}
}
Ok(())
}
}
fn parse_internal(&mut self, config: ParseConfig<'_>) {
let mut node = self.document;
loop {
match self.arena[node].data {
Element::Document { mut contents }
| Element::Headline(Headline { mut contents, .. }) => {
if !contents.is_empty() {
let off = Headline::find_level(contents, std::usize::MAX);
if off != 0 {
let section = Element::Section {
contents: &contents[0..off],
};
let new_node = self.arena.new_node(section);
node.append(new_node, &mut self.arena).unwrap();
contents = &contents[off..];
}
}
while !contents.is_empty() {
let (tail, headline) = Headline::parse(contents, &config);
let headline = Element::Headline(headline);
let new_node = self.arena.new_node(headline);
node.append(new_node, &mut self.arena).unwrap();
contents = tail;
}
}
Element::Section { contents } => {
// TODO
if let Some((tail, _planning)) = Planning::parse(contents) {
self.parse_elements_children(tail, node);
} else {
self.parse_elements_children(contents, node);
}
}
Element::Block(Block { contents, .. })
| Element::ListItem(ListItem { contents, .. }) => {
self.parse_elements_children(contents, node);
}
Element::Paragraph { contents }
| Element::Bold { contents }
| Element::Underline { contents }
| Element::Italic { contents }
| Element::Strike { contents } => {
self.parse_objects_children(contents, node);
}
Element::List(List {
contents, indent, ..
}) => {
self.parse_list_items(contents, indent, node);
}
_ => (),
}
fn parse_elements_children<'a>(
arena: &mut Arena<Element<'a>>,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = skip_empty_lines(content);
if let Some(next_node) = self.next_node(node) {
node = next_node;
} else {
break;
}
}
if let Some((new_tail, element)) = parse_element(content, arena, containers) {
parent.append(element, arena).unwrap();
tail = skip_empty_lines(new_tail);
}
fn next_node(&self, mut node: NodeId) -> Option<NodeId> {
if let Some(child) = self.arena[node].first_child() {
return Some(child);
}
let mut text = tail;
let mut pos = 0;
loop {
if let Some(sibling) = self.arena[node].next_sibling() {
return Some(sibling);
} else if let Some(parent) = self.arena[node].parent() {
node = parent;
} else {
return None;
}
}
}
fn parse_elements_children(&mut self, input: &'a str, node: NodeId) {
let mut tail = skip_empty_lines(input);
if let Some((new_tail, element)) = self.parse_element(input) {
let new_node = self.arena.new_node(element);
node.append(new_node, &mut self.arena).unwrap();
tail = skip_empty_lines(new_tail);
}
let mut text = tail;
let mut pos = 0;
while !tail.is_empty() {
let i = memchr(b'\n', tail.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| tail.len());
if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) {
tail = skip_empty_lines(&tail[i..]);
let new_node = self.arena.new_node(Element::Paragraph {
contents: if text.as_bytes()[pos - 1] == b'\n' {
&text[0..pos - 1]
} else {
&text[0..pos]
},
});
node.append(new_node, &mut self.arena).unwrap();
text = tail;
pos = 0;
} else if let Some((new_tail, element)) = self.parse_element(tail) {
if pos != 0 {
let new_node = self.arena.new_node(Element::Paragraph {
contents: if text.as_bytes()[pos - 1] == b'\n' {
&text[0..pos - 1]
} else {
&text[0..pos]
},
});
node.append(new_node, &mut self.arena).unwrap();
pos = 0;
}
let new_node = self.arena.new_node(element);
node.append(new_node, &mut self.arena).unwrap();
tail = skip_empty_lines(new_tail);
text = tail;
} else {
tail = &tail[i..];
pos += i;
}
}
if !text.is_empty() {
let new_node = self.arena.new_node(Element::Paragraph {
contents: if text.as_bytes()[pos - 1] == b'\n' {
&text[0..pos - 1]
} else {
&text[0..pos]
},
while !tail.is_empty() {
let i = memchr(b'\n', tail.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| tail.len());
if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) {
tail = skip_empty_lines(&tail[i..]);
let node = arena.new_node(Element::Paragraph);
parent.append(node, arena).unwrap();
containers.push(Container::Inline {
content: &text[0..pos].trim_end_matches('\n'),
node,
});
node.append(new_node, &mut self.arena).unwrap();
}
}
fn parse_element(&self, contents: &'a str) -> Option<(&'a str, Element<'a>)> {
if let Some((tail, fn_def)) = FnDef::parse(contents) {
let fn_def = Element::FnDef(fn_def);
return Some((tail, fn_def));
} else if let Some((tail, list)) = List::parse(contents) {
let list = Element::List(list);
return Some((tail, list));
}
let tail = contents.trim_start();
if let Some((tail, clock)) = Clock::parse(tail) {
return Some((tail, clock));
}
// TODO: LaTeX environment
if tail.starts_with("\\begin{") {}
if tail.starts_with('-') {
if let Ok((tail, rule)) = Rule::parse(tail) {
return Some((tail, rule));
text = tail;
pos = 0;
} else if let Some((new_tail, element)) = parse_element(tail, arena, containers) {
if pos != 0 {
let node = arena.new_node(Element::Paragraph);
parent.append(node, arena).unwrap();
containers.push(Container::Inline {
content: &text[0..pos].trim_end_matches('\n'),
node,
});
pos = 0;
}
}
if tail.starts_with(':') {
if let Some((tail, drawer)) = Drawer::parse(tail) {
return Some((tail, drawer));
}
}
if tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n") {
let mut last_end = 1; // ":"
for i in memchr_iter(b'\n', contents.as_bytes()) {
last_end = i + 1;
let line = &contents[last_end..];
if !(line == ":" || line.starts_with(": ") || line.starts_with(":\n")) {
let fixed_width = Element::FixedWidth {
value: &contents[0..i + 1],
};
return Some((&contents[i + 1..], fixed_width));
}
}
let fixed_width = Element::FixedWidth {
value: &contents[0..last_end],
};
return Some((&contents[last_end..], fixed_width));
}
if tail == "#" || tail.starts_with("# ") || tail.starts_with("#\n") {
let mut last_end = 1; // "#"
for i in memchr_iter(b'\n', contents.as_bytes()) {
last_end = i + 1;
let line = &contents[last_end..];
if !(line == "#" || line.starts_with("# ") || line.starts_with("#\n")) {
let fixed_width = Element::Comment {
value: &contents[0..i + 1],
};
return Some((&contents[i + 1..], fixed_width));
}
}
let fixed_width = Element::Comment {
value: &contents[0..last_end],
};
return Some((&contents[last_end..], fixed_width));
}
if tail.starts_with("#+") {
Block::parse(tail)
.or_else(|| DynBlock::parse(tail))
.or_else(|| Keyword::parse(tail).ok())
parent.append(element, arena).unwrap();
tail = skip_empty_lines(new_tail);
text = tail;
} else {
None
tail = &tail[i..];
pos += i;
}
}
fn parse_objects_children(&mut self, contents: &'a str, node: NodeId) {
let mut tail = contents;
if !text.is_empty() {
let node = arena.new_node(Element::Paragraph);
parent.append(node, arena).unwrap();
containers.push(Container::Inline {
content: &text[0..pos].trim_end_matches('\n'),
node,
});
}
}
if let Some((new_tail, obj)) = self.parse_object(tail) {
let new_node = self.arena.new_node(obj);
node.append(new_node, &mut self.arena).unwrap();
tail = new_tail;
fn parse_element<'a>(
contents: &'a str,
arena: &mut Arena<Element<'a>>,
containers: &mut Vec<Container<'a>>,
) -> Option<(&'a str, NodeId)> {
if let Some((tail, fn_def, content)) = FnDef::parse(contents) {
let node = arena.new_node(Element::FnDef(fn_def));
containers.push(Container::Block { content, node });
return Some((tail, node));
} else if let Some((tail, list, content)) = List::parse(contents) {
let indent = list.indent;
let node = arena.new_node(Element::List(list));
containers.push(Container::List {
content,
node,
indent,
});
return Some((tail, node));
}
let tail = contents.trim_start();
if let Some((tail, clock)) = Clock::parse(tail) {
return Some((tail, arena.new_node(clock)));
}
// TODO: LaTeX environment
if tail.starts_with("\\begin{") {}
if tail.starts_with('-') {
if let Ok((tail, rule)) = Rule::parse(tail) {
return Some((tail, arena.new_node(rule)));
}
}
let mut text = tail;
let mut pos = 0;
if tail.starts_with(':') {
if let Some((tail, drawer, content)) = Drawer::parse(tail) {
return Some((tail, arena.new_node(drawer)));
}
}
let bs = bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n');
// FixedWidth
if tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n") {
let mut last_end = 1; // ":"
for i in memchr_iter(b'\n', contents.as_bytes()) {
last_end = i + 1;
let line = &contents[last_end..];
if !(line == ":" || line.starts_with(": ") || line.starts_with(":\n")) {
let fixed_width = arena.new_node(Element::FixedWidth {
value: &contents[0..i + 1],
});
return Some((&contents[i + 1..], fixed_width));
}
}
let fixed_width = arena.new_node(Element::FixedWidth {
value: &contents[0..last_end],
});
return Some((&contents[last_end..], fixed_width));
}
while let Some(off) = bs.find(tail.as_bytes()) {
match tail.as_bytes()[off] {
b'{' => {
if let Some((new_tail, obj)) = self.parse_object(&tail[off..]) {
if pos != 0 {
let new_node = self.arena.new_node(Element::Text {
value: &text[0..pos + off],
});
node.append(new_node, &mut self.arena).unwrap();
pos = 0;
}
let new_node = self.arena.new_node(obj);
node.append(new_node, &mut self.arena).unwrap();
tail = new_tail;
text = new_tail;
} else if let Some((new_tail, obj)) = self.parse_object(&tail[off + 1..]) {
let new_node = self.arena.new_node(Element::Text {
value: &text[0..pos + off + 1],
// Comment
if tail == "#" || tail.starts_with("# ") || tail.starts_with("#\n") {
let mut last_end = 1; // "#"
for i in memchr_iter(b'\n', contents.as_bytes()) {
last_end = i + 1;
let line = &contents[last_end..];
if !(line == "#" || line.starts_with("# ") || line.starts_with("#\n")) {
let comment = arena.new_node(Element::Comment {
value: &contents[0..i + 1],
});
return Some((&contents[i + 1..], comment));
}
}
let comment = arena.new_node(Element::Comment {
value: &contents[0..last_end],
});
return Some((&contents[last_end..], comment));
}
if tail.starts_with("#+") {
if let Some((tail, block, content)) = Block::parse(tail) {
let node = arena.new_node(block);
containers.push(Container::Block { content, node });
Some((tail, node))
} else if let Some((tail, dyn_block, content)) = DynBlock::parse(tail) {
let node = arena.new_node(dyn_block);
containers.push(Container::Block { content, node });
Some((tail, node))
} else {
Keyword::parse(tail)
.ok()
.map(|(tail, kw)| (tail, arena.new_node(kw)))
}
} else {
None
}
}
fn parse_objects_children<'a>(
arena: &mut Arena<Element<'a>>,
content: &'a str,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
let mut tail = content;
if let Some((new_tail, obj)) = parse_object(tail, arena, containers) {
parent.append(obj, arena).unwrap();
tail = new_tail;
}
let mut text = tail;
let mut pos = 0;
let bs = bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n');
while let Some(off) = bs.find(tail.as_bytes()) {
match tail.as_bytes()[off] {
b'{' => {
if let Some((new_tail, obj)) = parse_object(&tail[off..], arena, containers) {
if pos != 0 {
let node = arena.new_node(Element::Text {
value: &text[0..pos + off],
});
node.append(new_node, &mut self.arena).unwrap();
parent.append(node, arena).unwrap();
pos = 0;
let new_node = self.arena.new_node(obj);
node.append(new_node, &mut self.arena).unwrap();
tail = new_tail;
text = new_tail;
} else {
tail = &tail[off + 1..];
pos += off + 1;
}
parent.append(obj, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
} else if let Some((new_tail, obj)) =
parse_object(&tail[off + 1..], arena, containers)
{
let node = arena.new_node(Element::Text {
value: &text[0..pos + off + 1],
});
parent.append(node, arena).unwrap();
pos = 0;
parent.append(obj, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
}
b' ' | b'(' | b'\'' | b'"' | b'\n' => {
if let Some((new_tail, obj)) = self.parse_object(&tail[off + 1..]) {
let new_node = self.arena.new_node(Element::Text {
value: &text[0..pos + off + 1],
}
b' ' | b'(' | b'\'' | b'"' | b'\n' => {
if let Some((new_tail, obj)) = parse_object(&tail[off + 1..], arena, containers) {
let node = arena.new_node(Element::Text {
value: &text[0..pos + off + 1],
});
parent.append(node, arena).unwrap();
pos = 0;
parent.append(obj, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
}
}
_ => {
if let Some((new_tail, obj)) = parse_object(&tail[off..], arena, containers) {
if pos != 0 {
let node = arena.new_node(Element::Text {
value: &text[0..pos + off],
});
node.append(new_node, &mut self.arena).unwrap();
parent.append(node, arena).unwrap();
pos = 0;
let new_node = self.arena.new_node(obj);
node.append(new_node, &mut self.arena).unwrap();
tail = new_tail;
text = new_tail;
} else {
tail = &tail[off + 1..];
pos += off + 1;
}
}
_ => {
if let Some((new_tail, obj)) = self.parse_object(&tail[off..]) {
if pos != 0 {
let new_node = self.arena.new_node(Element::Text {
value: &text[0..pos + off],
});
node.append(new_node, &mut self.arena).unwrap();
pos = 0;
}
let new_node = self.arena.new_node(obj);
node.append(new_node, &mut self.arena).unwrap();
tail = new_tail;
text = new_tail;
} else {
tail = &tail[off + 1..];
pos += off + 1;
}
parent.append(obj, arena).unwrap();
tail = new_tail;
text = new_tail;
continue;
}
}
}
if !text.is_empty() {
let new_node = self.arena.new_node(Element::Text { value: text });
node.append(new_node, &mut self.arena).unwrap();
}
tail = &tail[off + 1..];
pos += off + 1;
}
fn parse_object(&self, contents: &'a str) -> Option<(&'a str, Element<'a>)> {
if contents.len() < 3 {
return None;
}
if !text.is_empty() {
let node = arena.new_node(Element::Text { value: text });
parent.append(node, arena).unwrap();
}
}
let bytes = contents.as_bytes();
match bytes[0] {
b'@' => Snippet::parse(contents).ok(),
b'{' => Macros::parse(contents).ok(),
b'<' => RadioTarget::parse(contents)
.or_else(|_| Target::parse(contents))
.or_else(|_| {
Timestamp::parse_active(contents)
.map(|(tail, timestamp)| (tail, timestamp.into()))
})
.or_else(|_| {
Timestamp::parse_diary(contents)
.map(|(tail, timestamp)| (tail, timestamp.into()))
})
.ok(),
b'[' => {
if contents[1..].starts_with("fn:") {
FnRef::parse(contents).map(|(tail, fn_ref)| (tail, fn_ref.into()))
} else if bytes[1] == b'[' {
Link::parse(contents).ok()
} else {
Cookie::parse(contents)
.map(|(tail, cookie)| (tail, cookie.into()))
.or_else(|| {
Timestamp::parse_inactive(contents)
.map(|(tail, timestamp)| (tail, timestamp.into()))
.ok()
})
}
}
b'*' => parse_emphasis(contents, b'*')
.map(|(tail, contents)| (tail, Element::Bold { contents })),
b'+' => parse_emphasis(contents, b'+')
.map(|(tail, contents)| (tail, Element::Strike { contents })),
b'/' => parse_emphasis(contents, b'/')
.map(|(tail, contents)| (tail, Element::Italic { contents })),
b'_' => parse_emphasis(contents, b'_')
.map(|(tail, contents)| (tail, Element::Underline { contents })),
b'=' => parse_emphasis(contents, b'=')
.map(|(tail, value)| (tail, Element::Verbatim { value })),
b'~' => {
parse_emphasis(contents, b'~').map(|(tail, value)| (tail, Element::Code { value }))
}
b's' if contents.starts_with("src_") => InlineSrc::parse(contents).ok(),
b'c' if contents.starts_with("call_") => InlineCall::parse(contents).ok(),
_ => None,
}
fn parse_object<'a>(
contents: &'a str,
arena: &mut Arena<Element<'a>>,
containers: &mut Vec<Container<'a>>,
) -> Option<(&'a str, NodeId)> {
if contents.len() < 3 {
return None;
}
fn parse_list_items(&mut self, mut contents: &'a str, indent: usize, node: NodeId) {
while !contents.is_empty() {
let (tail, list_item) = ListItem::parse(contents, indent);
let list_item = Element::ListItem(list_item);
let new_node = self.arena.new_node(list_item);
node.append(new_node, &mut self.arena).unwrap();
contents = tail;
let bytes = contents.as_bytes();
match bytes[0] {
b'@' => Snippet::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'{' => Macros::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'<' => RadioTarget::parse(contents)
.map(|(tail, (radio, content))| (tail, radio))
.or_else(|_| Target::parse(contents))
.or_else(|_| {
Timestamp::parse_active(contents).map(|(tail, timestamp)| (tail, timestamp.into()))
})
.or_else(|_| {
Timestamp::parse_diary(contents).map(|(tail, timestamp)| (tail, timestamp.into()))
})
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'[' => {
if contents[1..].starts_with("fn:") {
FnRef::parse(contents)
.map(|(tail, fn_ref)| (tail, fn_ref.into()))
.map(|(tail, element)| (tail, arena.new_node(element)))
} else if bytes[1] == b'[' {
Link::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element)))
} else {
Cookie::parse(contents)
.map(|(tail, cookie)| (tail, cookie.into()))
.or_else(|| {
Timestamp::parse_inactive(contents)
.map(|(tail, timestamp)| (tail, timestamp.into()))
.ok()
})
.map(|(tail, element)| (tail, arena.new_node(element)))
}
}
b'*' => {
if let Some((tail, content)) = parse_emphasis(contents, b'*') {
let node = arena.new_node(Element::Bold);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'+' => {
if let Some((tail, content)) = parse_emphasis(contents, b'+') {
let node = arena.new_node(Element::Strike);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'/' => {
if let Some((tail, content)) = parse_emphasis(contents, b'/') {
let node = arena.new_node(Element::Italic);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'_' => {
if let Some((tail, content)) = parse_emphasis(contents, b'_') {
let node = arena.new_node(Element::Underline);
containers.push(Container::Inline { content, node });
Some((tail, node))
} else {
None
}
}
b'=' => parse_emphasis(contents, b'=')
.map(|(tail, value)| (tail, arena.new_node(Element::Verbatim { value }))),
b'~' => parse_emphasis(contents, b'~')
.map(|(tail, value)| (tail, arena.new_node(Element::Code { value }))),
b's' => InlineSrc::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
b'c' => InlineCall::parse(contents)
.ok()
.map(|(tail, element)| (tail, arena.new_node(element))),
_ => None,
}
}
fn parse_list_items<'a>(
arena: &mut Arena<Element<'a>>,
mut contents: &'a str,
indent: usize,
parent: NodeId,
containers: &mut Vec<Container<'a>>,
) {
while !contents.is_empty() {
let (tail, list_item, content) = ListItem::parse(contents, indent);
let list_item = Element::ListItem(list_item);
let node = arena.new_node(list_item);
parent.append(node, arena).unwrap();
containers.push(Container::Block { content, node });
contents = tail;
}
}