diff --git a/src/elements/mod.rs b/src/elements/mod.rs index ad6d127..7f54147 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -18,6 +18,7 @@ mod planning; mod radio_target; mod rule; mod snippet; +mod table; mod target; mod timestamp; mod title; @@ -46,6 +47,7 @@ pub use self::{ radio_target::RadioTarget, rule::Rule, snippet::Snippet, + table::{Table, TableRow}, target::Target, timestamp::{Datetime, Timestamp}, title::Title, @@ -98,6 +100,9 @@ pub enum Element<'a> { Comment { value: &'a str }, FixedWidth { value: &'a str }, Title(Title<'a>), + Table(Table<'a>), + TableRow(TableRow), + TableCell, } impl Element<'_> { @@ -118,7 +123,10 @@ impl Element<'_> { | Element::Section | Element::Strike | Element::Underline - | Element::Title(_) => true, + | Element::Title(_) + | Element::Table(_) + | Element::TableRow(_) + | Element::TableCell => true, _ => false, } } @@ -167,7 +175,9 @@ impl_from!( SpecialBlock, Target, Timestamp, + Table, VerseBlock; RadioTarget, - List + List, + TableRow ); diff --git a/src/elements/table.rs b/src/elements/table.rs new file mode 100644 index 0000000..6eacffc --- /dev/null +++ b/src/elements/table.rs @@ -0,0 +1,34 @@ +#[derive(Debug)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] +#[cfg_attr(feature = "serde", serde(tag = "table_type"))] +pub enum Table<'a> { + #[cfg_attr(feature = "serde", serde(rename = "org"))] + Org { tblfm: Option<&'a str> }, + #[cfg_attr(feature = "serde", serde(rename = "table.el"))] + TableEl { value: &'a str }, +} + +#[derive(Debug)] +#[cfg_attr(test, derive(PartialEq))] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] +#[cfg_attr( + feature = "serde", + serde(tag = "table_row_type", rename_all = "kebab-case") +)] +pub enum TableRow { + Standard, + Rule, +} + +impl TableRow { + pub(crate) fn parse(input: &str) -> Option { + if input.starts_with("|-") { + Some(TableRow::Rule) + } else if input.starts_with('|') { + Some(TableRow::Standard) + } else { + None + } + } +} diff --git a/src/export/html.rs b/src/export/html.rs index 7adaa02..a07a126 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -158,6 +158,9 @@ pub trait HtmlHandler> { Rule => write!(w, "
")?, Cookie(cookie) => write!(w, "{}", cookie.value)?, Title(title) => write!(w, "", if title.level <= 6 { title.level } else { 6 })?, + Table(_) => (), + TableRow(_) => (), + TableCell => (), } Ok(()) @@ -189,6 +192,9 @@ pub trait HtmlHandler> { Strike => write!(w, "")?, Underline => write!(w, "")?, Title(title) => write!(w, "", if title.level <= 6 { title.level } else { 6 })?, + Table(_) => (), + TableRow(_) => (), + TableCell => (), // non-container elements _ => debug_assert!(!element.is_container()), } diff --git a/src/export/org.rs b/src/export/org.rs index 9fcabb8..fff11fe 100644 --- a/src/export/org.rs +++ b/src/export/org.rs @@ -149,6 +149,9 @@ pub trait OrgHandler> { } write!(&mut w, " ")?; } + Table(_) => (), + TableRow(_) => (), + TableCell => (), } Ok(()) @@ -184,6 +187,9 @@ pub trait OrgHandler> { } writeln!(&mut w)?; } + Table(_) => (), + TableRow(_) => (), + TableCell => (), // non-container elements _ => debug_assert!(!element.is_container()), } diff --git a/src/org.rs b/src/org.rs index b21ffd0..f41fd74 100644 --- a/src/org.rs +++ b/src/org.rs @@ -1,47 +1,16 @@ use indextree::{Arena, NodeEdge, NodeId}; -use jetscii::bytes; -use memchr::{memchr, memchr2, memchr_iter}; use std::io::{Error, Write}; use crate::config::ParseConfig; use crate::elements::*; use crate::export::*; -use crate::parsers::skip_empty_lines; +use crate::parsers::*; pub struct Org<'a> { pub(crate) arena: Arena>, pub(crate) document: NodeId, } -enum Container<'a> { - // List - List { - content: &'a str, - node: NodeId, - indent: usize, - }, - // Block, List Item - Block { - content: &'a str, - node: NodeId, - }, - // Pargraph, Inline Markup - Inline { - content: &'a str, - node: NodeId, - }, - // Headline - Headline { - content: &'a str, - node: NodeId, - }, - // Document - Document { - content: &'a str, - node: NodeId, - }, -} - #[derive(Debug)] pub enum Event<'a> { Start(&'a Element<'a>), @@ -57,69 +26,32 @@ impl Org<'_> { let mut arena = Arena::new(); let document = arena.new_node(Element::Document); - let mut containers = vec![Container::Document { + let containers = &mut vec![Container::Document { content, node: document, }]; while let Some(container) = containers.pop() { match container { - Container::Document { - content, - node: parent, - } => { - let mut tail = skip_empty_lines(content); - if let Some((new_tail, content)) = parse_section(tail) { - let node = arena.new_node(Element::Section); - parent.append(node, &mut arena).unwrap(); - containers.push(Container::Block { content, node }); - tail = new_tail; - } - while !tail.is_empty() { - let (new_tail, content) = parse_headline(tail); - let node = arena.new_node(Element::Headline); - parent.append(node, &mut arena).unwrap(); - containers.push(Container::Headline { content, node }); - tail = new_tail; - } + Container::Document { content, node } => { + parse_section_and_headlines(&mut arena, content, node, containers); } - Container::Headline { - content, - node: parent, - } => { - let mut tail = content; - let (new_tail, title) = Title::parse(tail, config).unwrap(); - let content = title.raw; - let node = arena.new_node(Element::Title(title)); - parent.append(node, &mut arena).unwrap(); - containers.push(Container::Inline { content, node }); - tail = skip_empty_lines(new_tail); - if let Some((new_tail, content)) = parse_section(tail) { - let node = arena.new_node(Element::Section); - parent.append(node, &mut arena).unwrap(); - containers.push(Container::Block { content, node }); - tail = new_tail; - } - while !tail.is_empty() { - let (new_tail, content) = parse_headline(tail); - let node = arena.new_node(Element::Headline); - parent.append(node, &mut arena).unwrap(); - containers.push(Container::Headline { content, node }); - tail = new_tail; - } + Container::Headline { content, node } => { + let content = parse_title(&mut arena, content, node, containers, config); + parse_section_and_headlines(&mut arena, content, node, containers); } Container::Block { content, node } => { - parse_blocks(&mut arena, content, node, &mut containers); + parse_blocks(&mut arena, content, node, containers); } Container::Inline { content, node } => { - parse_inlines(&mut arena, content, node, &mut containers); + parse_inlines(&mut arena, content, node, containers); } Container::List { content, node, indent, } => { - parse_list_items(&mut arena, content, indent, node, &mut containers); + parse_list_items(&mut arena, content, indent, node, containers); } } } @@ -176,467 +108,3 @@ impl Org<'_> { Ok(()) } } - -fn is_headline(text: &str) -> Option { - if let Some(off) = memchr2(b'\n', b' ', text.as_bytes()) { - if off > 0 && text[0..off].as_bytes().iter().all(|&c| c == b'*') { - Some(off) - } else { - None - } - } else if !text.is_empty() && text.as_bytes().iter().all(|&c| c == b'*') { - Some(text.len()) - } else { - None - } -} - -fn parse_section(text: &str) -> Option<(&str, &str)> { - if text.is_empty() || is_headline(text).is_some() { - return None; - } - - for i in memchr_iter(b'\n', text.as_bytes()) { - if is_headline(&text[i + 1..]).is_some() { - return Some((&text[i + 1..], &text[0..i + 1])); - } - } - - Some(("", text)) -} - -fn parse_headline(text: &str) -> (&str, &str) { - let level = is_headline(text).unwrap(); - - for i in memchr_iter(b'\n', text.as_bytes()) { - if let Some(l) = is_headline(&text[i + 1..]) { - if l <= level { - return (&text[i + 1..], &text[0..i + 1]); - } - } - } - - ("", text) -} - -fn parse_blocks<'a>( - arena: &mut Arena>, - content: &'a str, - parent: NodeId, - containers: &mut Vec>, -) { - let mut tail = skip_empty_lines(content); - - if let Some((new_tail, element)) = parse_block(content, arena, containers) { - parent.append(element, arena).unwrap(); - tail = skip_empty_lines(new_tail); - } - - let mut text = tail; - let mut pos = 0; - - while !tail.is_empty() { - let i = memchr(b'\n', tail.as_bytes()) - .map(|i| i + 1) - .unwrap_or_else(|| tail.len()); - if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) { - tail = skip_empty_lines(&tail[i..]); - let node = arena.new_node(Element::Paragraph); - parent.append(node, arena).unwrap(); - containers.push(Container::Inline { - content: &text[0..pos].trim_end_matches('\n'), - node, - }); - text = tail; - pos = 0; - } else if let Some((new_tail, element)) = parse_block(tail, arena, containers) { - if pos != 0 { - let node = arena.new_node(Element::Paragraph); - parent.append(node, arena).unwrap(); - containers.push(Container::Inline { - content: &text[0..pos].trim_end_matches('\n'), - node, - }); - pos = 0; - } - parent.append(element, arena).unwrap(); - tail = skip_empty_lines(new_tail); - text = tail; - } else { - tail = &tail[i..]; - pos += i; - } - } - - if !text.is_empty() { - let node = arena.new_node(Element::Paragraph); - parent.append(node, arena).unwrap(); - containers.push(Container::Inline { - content: &text[0..pos].trim_end_matches('\n'), - node, - }); - } -} - -fn parse_block<'a>( - contents: &'a str, - arena: &mut Arena>, - containers: &mut Vec>, -) -> Option<(&'a str, NodeId)> { - if let Some((tail, fn_def, content)) = FnDef::parse(contents) { - let node = arena.new_node(Element::FnDef(fn_def)); - containers.push(Container::Block { content, node }); - return Some((tail, node)); - } else if let Some((tail, list, content)) = List::parse(contents) { - let indent = list.indent; - let node = arena.new_node(Element::List(list)); - containers.push(Container::List { - content, - node, - indent, - }); - return Some((tail, node)); - } - - let tail = contents.trim_start(); - - if let Ok((tail, clock)) = Clock::parse(tail) { - return Some((tail, arena.new_node(clock))); - } - - // TODO: LaTeX environment - if tail.starts_with("\\begin{") {} - - if tail.starts_with('-') { - if let Ok((tail, rule)) = Rule::parse(tail) { - return Some((tail, arena.new_node(rule))); - } - } - - if tail.starts_with(':') { - if let Ok((tail, (drawer, content))) = Drawer::parse(tail) { - let node = arena.new_node(drawer.into()); - containers.push(Container::Block { content, node }); - return Some((tail, node)); - } - } - - // FixedWidth - if tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n") { - let mut last_end = 1; // ":" - for i in memchr_iter(b'\n', contents.as_bytes()) { - last_end = i + 1; - let tail = contents[last_end..].trim_start(); - if !(tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n")) { - let fixed_width = arena.new_node(Element::FixedWidth { - value: &contents[0..last_end], - }); - return Some((&contents[last_end..], fixed_width)); - } - } - let fixed_width = arena.new_node(Element::FixedWidth { - value: &contents[0..last_end], - }); - return Some((&contents[last_end..], fixed_width)); - } - - // Comment - if tail == "#" || tail.starts_with("# ") || tail.starts_with("#\n") { - let mut last_end = 1; // "#" - for i in memchr_iter(b'\n', contents.as_bytes()) { - last_end = i + 1; - let line = contents[last_end..].trim_start(); - if !(line == "#" || line.starts_with("# ") || line.starts_with("#\n")) { - let comment = arena.new_node(Element::Comment { - value: &contents[0..last_end], - }); - return Some((&contents[last_end..], comment)); - } - } - let comment = arena.new_node(Element::Comment { - value: &contents[0..last_end], - }); - return Some((&contents[last_end..], comment)); - } - - if tail.starts_with("#+") { - if let Ok((tail, (block, content))) = Block::parse(tail) { - match &*block.name.to_uppercase() { - "CENTER" => { - let node = arena.new_node(Element::CenterBlock(CenterBlock { - parameters: block.args, - })); - containers.push(Container::Block { content, node }); - Some((tail, node)) - } - "QUOTE" => { - let node = arena.new_node(Element::QuoteBlock(QuoteBlock { - parameters: block.args, - })); - containers.push(Container::Block { content, node }); - Some((tail, node)) - } - "COMMENT" => { - let node = arena.new_node(Element::CommentBlock(CommentBlock { - data: block.args, - contents: content, - })); - Some((tail, node)) - } - "EXAMPLE" => { - let node = arena.new_node(Element::ExampleBlock(ExampleBlock { - data: block.args, - contents: content, - })); - Some((tail, node)) - } - "EXPORT" => { - let node = arena.new_node(Element::ExportBlock(ExportBlock { - data: block.args.unwrap_or(""), - contents: content, - })); - Some((tail, node)) - } - "SRC" => { - let (language, arguments) = block - .args - .map(|args| args.split_at(args.find(' ').unwrap_or_else(|| args.len()))) - .unwrap_or(("", "")); - let node = arena.new_node(Element::SourceBlock(SourceBlock { - arguments, - language, - contents: content, - })); - Some((tail, node)) - } - "VERSE" => { - let node = arena.new_node(Element::VerseBlock(VerseBlock { - parameters: block.args, - })); - containers.push(Container::Block { content, node }); - Some((tail, node)) - } - _ => { - let node = arena.new_node(Element::SpecialBlock(SpecialBlock { - parameters: block.args, - name: block.name, - })); - containers.push(Container::Block { content, node }); - Some((tail, node)) - } - } - } else if let Ok((tail, (dyn_block, content))) = DynBlock::parse(tail) { - let node = arena.new_node(dyn_block); - containers.push(Container::Block { content, node }); - Some((tail, node)) - } else { - Keyword::parse(tail) - .ok() - .map(|(tail, kw)| (tail, arena.new_node(kw))) - } - } else { - None - } -} - -fn parse_inlines<'a>( - arena: &mut Arena>, - content: &'a str, - parent: NodeId, - containers: &mut Vec>, -) { - let mut tail = content; - - if let Some((new_tail, element)) = parse_inline(tail, arena, containers) { - parent.append(element, arena).unwrap(); - tail = new_tail; - } - - let mut text = tail; - let mut pos = 0; - - let bs = bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n'); - - while let Some(off) = bs.find(tail.as_bytes()) { - match tail.as_bytes()[off] { - b'{' => { - if let Some((new_tail, element)) = parse_inline(&tail[off..], arena, containers) { - if pos != 0 { - let node = arena.new_node(Element::Text { - value: &text[0..pos + off], - }); - parent.append(node, arena).unwrap(); - pos = 0; - } - parent.append(element, arena).unwrap(); - tail = new_tail; - text = new_tail; - continue; - } else if let Some((new_tail, element)) = - parse_inline(&tail[off + 1..], arena, containers) - { - let node = arena.new_node(Element::Text { - value: &text[0..pos + off + 1], - }); - parent.append(node, arena).unwrap(); - pos = 0; - parent.append(element, arena).unwrap(); - tail = new_tail; - text = new_tail; - continue; - } - } - b' ' | b'(' | b'\'' | b'"' | b'\n' => { - if let Some((new_tail, element)) = parse_inline(&tail[off + 1..], arena, containers) - { - let node = arena.new_node(Element::Text { - value: &text[0..pos + off + 1], - }); - parent.append(node, arena).unwrap(); - pos = 0; - parent.append(element, arena).unwrap(); - tail = new_tail; - text = new_tail; - continue; - } - } - _ => { - if let Some((new_tail, element)) = parse_inline(&tail[off..], arena, containers) { - if pos != 0 { - let node = arena.new_node(Element::Text { - value: &text[0..pos + off], - }); - parent.append(node, arena).unwrap(); - pos = 0; - } - parent.append(element, arena).unwrap(); - tail = new_tail; - text = new_tail; - continue; - } - } - } - tail = &tail[off + 1..]; - pos += off + 1; - } - - if !text.is_empty() { - let node = arena.new_node(Element::Text { value: text }); - parent.append(node, arena).unwrap(); - } -} - -fn parse_inline<'a>( - contents: &'a str, - arena: &mut Arena>, - containers: &mut Vec>, -) -> Option<(&'a str, NodeId)> { - if contents.len() < 3 { - return None; - } - - let bytes = contents.as_bytes(); - match bytes[0] { - b'@' => Snippet::parse(contents) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))), - b'{' => Macros::parse(contents) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))), - b'<' => RadioTarget::parse(contents) - .map(|(tail, (radio, _content))| (tail, radio)) - .or_else(|_| Target::parse(contents)) - .or_else(|_| { - Timestamp::parse_active(contents).map(|(tail, timestamp)| (tail, timestamp.into())) - }) - .or_else(|_| { - Timestamp::parse_diary(contents).map(|(tail, timestamp)| (tail, timestamp.into())) - }) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))), - b'[' => { - if contents[1..].starts_with("fn:") { - FnRef::parse(contents) - .ok() - .map(|(tail, fn_ref)| (tail, arena.new_node(fn_ref.into()))) - } else if bytes[1] == b'[' { - Link::parse(contents) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))) - } else { - Cookie::parse(contents) - .map(|(tail, cookie)| (tail, cookie.into())) - .or_else(|_| { - Timestamp::parse_inactive(contents) - .map(|(tail, timestamp)| (tail, timestamp.into())) - }) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))) - } - } - b'*' => { - if let Some((tail, content)) = parse_emphasis(contents, b'*') { - let node = arena.new_node(Element::Bold); - containers.push(Container::Inline { content, node }); - Some((tail, node)) - } else { - None - } - } - b'+' => { - if let Some((tail, content)) = parse_emphasis(contents, b'+') { - let node = arena.new_node(Element::Strike); - containers.push(Container::Inline { content, node }); - Some((tail, node)) - } else { - None - } - } - b'/' => { - if let Some((tail, content)) = parse_emphasis(contents, b'/') { - let node = arena.new_node(Element::Italic); - containers.push(Container::Inline { content, node }); - Some((tail, node)) - } else { - None - } - } - b'_' => { - if let Some((tail, content)) = parse_emphasis(contents, b'_') { - let node = arena.new_node(Element::Underline); - containers.push(Container::Inline { content, node }); - Some((tail, node)) - } else { - None - } - } - b'=' => parse_emphasis(contents, b'=') - .map(|(tail, value)| (tail, arena.new_node(Element::Verbatim { value }))), - b'~' => parse_emphasis(contents, b'~') - .map(|(tail, value)| (tail, arena.new_node(Element::Code { value }))), - b's' => InlineSrc::parse(contents) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))), - b'c' => InlineCall::parse(contents) - .ok() - .map(|(tail, element)| (tail, arena.new_node(element))), - _ => None, - } -} - -fn parse_list_items<'a>( - arena: &mut Arena>, - mut contents: &'a str, - indent: usize, - parent: NodeId, - containers: &mut Vec>, -) { - while !contents.is_empty() { - let (tail, list_item, content) = ListItem::parse(contents, indent); - let list_item = Element::ListItem(list_item); - let node = arena.new_node(list_item); - parent.append(node, arena).unwrap(); - containers.push(Container::Block { content, node }); - contents = tail; - } -} diff --git a/src/parsers.rs b/src/parsers.rs index a1ca1e6..d5f7d40 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -1,6 +1,8 @@ -// resued nom parsers +// parser related functions -use memchr::{memchr, memchr_iter}; +use indextree::{Arena, NodeId}; +use jetscii::bytes; +use memchr::{memchr, memchr2, memchr_iter}; use nom::{ branch::alt, bytes::complete::{tag, take_till}, @@ -9,7 +11,618 @@ use nom::{ error_position, Err, IResult, }; -pub(crate) fn eol(input: &str) -> IResult<&str, ()> { +use crate::config::ParseConfig; +use crate::elements::*; + +pub enum Container<'a> { + // List + List { + content: &'a str, + node: NodeId, + indent: usize, + }, + // Block, List Item + Block { + content: &'a str, + node: NodeId, + }, + // Pargraph, Inline Markup + Inline { + content: &'a str, + node: NodeId, + }, + // Headline + Headline { + content: &'a str, + node: NodeId, + }, + // Document + Document { + content: &'a str, + node: NodeId, + }, +} + +pub fn parse_title<'a>( + arena: &mut Arena>, + content: &'a str, + parent: NodeId, + containers: &mut Vec>, + config: &ParseConfig, +) -> &'a str { + let (tail, title) = Title::parse(content, config).unwrap(); + let content = title.raw; + let node = arena.new_node(Element::Title(title)); + parent.append(node, arena).unwrap(); + containers.push(Container::Inline { content, node }); + tail +} + +pub fn parse_section_and_headlines<'a>( + arena: &mut Arena>, + content: &'a str, + parent: NodeId, + containers: &mut Vec>, +) { + let content = skip_empty_lines(content); + if content.is_empty() { + return; + } + + let mut last_end = 0; + for i in memchr_iter(b'\n', content.as_bytes()) { + if let Some((mut tail, headline_content)) = parse_headline(&content[last_end..]) { + if last_end != 0 { + let node = arena.new_node(Element::Section); + parent.append(node, arena).unwrap(); + containers.push(Container::Block { + content: &content[0..last_end], + node, + }); + } + let node = arena.new_node(Element::Headline); + parent.append(node, arena).unwrap(); + containers.push(Container::Headline { + content: headline_content, + node, + }); + while let Some((new_tail, content)) = parse_headline(tail) { + let node = arena.new_node(Element::Headline); + parent.append(node, arena).unwrap(); + containers.push(Container::Headline { content, node }); + tail = new_tail; + } + return; + } + last_end = i + 1; + } + + let node = arena.new_node(Element::Section); + parent.append(node, arena).unwrap(); + containers.push(Container::Block { content, node }); +} + +pub fn parse_headline(text: &str) -> Option<(&str, &str)> { + let level = get_headline_level(text)?; + + for i in memchr_iter(b'\n', text.as_bytes()) { + if let Some(l) = get_headline_level(&text[i + 1..]) { + if l <= level { + return Some((&text[i + 1..], &text[0..i + 1])); + } + } + } + + Some(("", text)) +} + +pub fn get_headline_level(text: &str) -> Option { + if let Some(off) = memchr2(b'\n', b' ', text.as_bytes()) { + if off > 0 && text[0..off].as_bytes().iter().all(|&c| c == b'*') { + Some(off) + } else { + None + } + } else if !text.is_empty() && text.as_bytes().iter().all(|&c| c == b'*') { + Some(text.len()) + } else { + None + } +} + +pub fn parse_blocks<'a>( + arena: &mut Arena>, + content: &'a str, + parent: NodeId, + containers: &mut Vec>, +) { + let mut tail = skip_empty_lines(content); + + if let Some((new_tail, element)) = parse_block(content, arena, containers) { + parent.append(element, arena).unwrap(); + tail = skip_empty_lines(new_tail); + } + + let mut text = tail; + let mut pos = 0; + + while !tail.is_empty() { + let i = memchr(b'\n', tail.as_bytes()) + .map(|i| i + 1) + .unwrap_or_else(|| tail.len()); + if tail.as_bytes()[0..i].iter().all(u8::is_ascii_whitespace) { + tail = skip_empty_lines(&tail[i..]); + let node = arena.new_node(Element::Paragraph); + parent.append(node, arena).unwrap(); + containers.push(Container::Inline { + content: &text[0..pos].trim_end_matches('\n'), + node, + }); + text = tail; + pos = 0; + } else if let Some((new_tail, element)) = parse_block(tail, arena, containers) { + if pos != 0 { + let node = arena.new_node(Element::Paragraph); + parent.append(node, arena).unwrap(); + containers.push(Container::Inline { + content: &text[0..pos].trim_end_matches('\n'), + node, + }); + pos = 0; + } + parent.append(element, arena).unwrap(); + tail = skip_empty_lines(new_tail); + text = tail; + } else { + tail = &tail[i..]; + pos += i; + } + } + + if !text.is_empty() { + let node = arena.new_node(Element::Paragraph); + parent.append(node, arena).unwrap(); + containers.push(Container::Inline { + content: &text[0..pos].trim_end_matches('\n'), + node, + }); + } +} + +pub fn parse_block<'a>( + contents: &'a str, + arena: &mut Arena>, + containers: &mut Vec>, +) -> Option<(&'a str, NodeId)> { + if let Some((tail, node)) = prase_table(arena, contents, containers) { + return Some((tail, node)); + } + + if let Some((tail, fn_def, content)) = FnDef::parse(contents) { + let node = arena.new_node(Element::FnDef(fn_def)); + containers.push(Container::Block { content, node }); + return Some((tail, node)); + } else if let Some((tail, list, content)) = List::parse(contents) { + let indent = list.indent; + let node = arena.new_node(Element::List(list)); + containers.push(Container::List { + content, + node, + indent, + }); + return Some((tail, node)); + } + + let tail = contents.trim_start(); + + if let Ok((tail, clock)) = Clock::parse(tail) { + return Some((tail, arena.new_node(clock))); + } + + // TODO: LaTeX environment + if tail.starts_with("\\begin{") {} + + if tail.starts_with('-') { + if let Ok((tail, rule)) = Rule::parse(tail) { + return Some((tail, arena.new_node(rule))); + } + } + + if tail.starts_with(':') { + if let Ok((tail, (drawer, content))) = Drawer::parse(tail) { + let node = arena.new_node(drawer.into()); + containers.push(Container::Block { content, node }); + return Some((tail, node)); + } + } + + // FixedWidth + if tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n") { + let mut last_end = 1; // ":" + for i in memchr_iter(b'\n', contents.as_bytes()) { + last_end = i + 1; + let tail = contents[last_end..].trim_start(); + if !(tail == ":" || tail.starts_with(": ") || tail.starts_with(":\n")) { + let fixed_width = arena.new_node(Element::FixedWidth { + value: &contents[0..last_end], + }); + return Some((&contents[last_end..], fixed_width)); + } + } + let fixed_width = arena.new_node(Element::FixedWidth { + value: &contents[0..last_end], + }); + return Some((&contents[last_end..], fixed_width)); + } + + // Comment + if tail == "#" || tail.starts_with("# ") || tail.starts_with("#\n") { + let mut last_end = 1; // "#" + for i in memchr_iter(b'\n', contents.as_bytes()) { + last_end = i + 1; + let line = contents[last_end..].trim_start(); + if !(line == "#" || line.starts_with("# ") || line.starts_with("#\n")) { + let comment = arena.new_node(Element::Comment { + value: &contents[0..last_end], + }); + return Some((&contents[last_end..], comment)); + } + } + let comment = arena.new_node(Element::Comment { + value: &contents[0..last_end], + }); + return Some((&contents[last_end..], comment)); + } + + if tail.starts_with("#+") { + if let Ok((tail, (block, content))) = Block::parse(tail) { + match &*block.name.to_uppercase() { + "CENTER" => { + let node = arena.new_node(Element::CenterBlock(CenterBlock { + parameters: block.args, + })); + containers.push(Container::Block { content, node }); + Some((tail, node)) + } + "QUOTE" => { + let node = arena.new_node(Element::QuoteBlock(QuoteBlock { + parameters: block.args, + })); + containers.push(Container::Block { content, node }); + Some((tail, node)) + } + "COMMENT" => { + let node = arena.new_node(Element::CommentBlock(CommentBlock { + data: block.args, + contents: content, + })); + Some((tail, node)) + } + "EXAMPLE" => { + let node = arena.new_node(Element::ExampleBlock(ExampleBlock { + data: block.args, + contents: content, + })); + Some((tail, node)) + } + "EXPORT" => { + let node = arena.new_node(Element::ExportBlock(ExportBlock { + data: block.args.unwrap_or(""), + contents: content, + })); + Some((tail, node)) + } + "SRC" => { + let (language, arguments) = block + .args + .map(|args| args.split_at(args.find(' ').unwrap_or_else(|| args.len()))) + .unwrap_or(("", "")); + let node = arena.new_node(Element::SourceBlock(SourceBlock { + arguments, + language, + contents: content, + })); + Some((tail, node)) + } + "VERSE" => { + let node = arena.new_node(Element::VerseBlock(VerseBlock { + parameters: block.args, + })); + containers.push(Container::Block { content, node }); + Some((tail, node)) + } + _ => { + let node = arena.new_node(Element::SpecialBlock(SpecialBlock { + parameters: block.args, + name: block.name, + })); + containers.push(Container::Block { content, node }); + Some((tail, node)) + } + } + } else if let Ok((tail, (dyn_block, content))) = DynBlock::parse(tail) { + let node = arena.new_node(dyn_block); + containers.push(Container::Block { content, node }); + Some((tail, node)) + } else { + Keyword::parse(tail) + .ok() + .map(|(tail, kw)| (tail, arena.new_node(kw))) + } + } else { + None + } +} + +pub fn parse_inlines<'a>( + arena: &mut Arena>, + content: &'a str, + parent: NodeId, + containers: &mut Vec>, +) { + let mut tail = content; + + if let Some((new_tail, element)) = parse_inline(tail, arena, containers) { + parent.append(element, arena).unwrap(); + tail = new_tail; + } + + let mut text = tail; + let mut pos = 0; + + let bs = bytes!(b'@', b'<', b'[', b' ', b'(', b'{', b'\'', b'"', b'\n'); + + while let Some(off) = bs.find(tail.as_bytes()) { + match tail.as_bytes()[off] { + b'{' => { + if let Some((new_tail, element)) = parse_inline(&tail[off..], arena, containers) { + if pos != 0 { + let node = arena.new_node(Element::Text { + value: &text[0..pos + off], + }); + parent.append(node, arena).unwrap(); + pos = 0; + } + parent.append(element, arena).unwrap(); + tail = new_tail; + text = new_tail; + continue; + } else if let Some((new_tail, element)) = + parse_inline(&tail[off + 1..], arena, containers) + { + let node = arena.new_node(Element::Text { + value: &text[0..pos + off + 1], + }); + parent.append(node, arena).unwrap(); + pos = 0; + parent.append(element, arena).unwrap(); + tail = new_tail; + text = new_tail; + continue; + } + } + b' ' | b'(' | b'\'' | b'"' | b'\n' => { + if let Some((new_tail, element)) = parse_inline(&tail[off + 1..], arena, containers) + { + let node = arena.new_node(Element::Text { + value: &text[0..pos + off + 1], + }); + parent.append(node, arena).unwrap(); + pos = 0; + parent.append(element, arena).unwrap(); + tail = new_tail; + text = new_tail; + continue; + } + } + _ => { + if let Some((new_tail, element)) = parse_inline(&tail[off..], arena, containers) { + if pos != 0 { + let node = arena.new_node(Element::Text { + value: &text[0..pos + off], + }); + parent.append(node, arena).unwrap(); + pos = 0; + } + parent.append(element, arena).unwrap(); + tail = new_tail; + text = new_tail; + continue; + } + } + } + tail = &tail[off + 1..]; + pos += off + 1; + } + + if !text.is_empty() { + let node = arena.new_node(Element::Text { value: text }); + parent.append(node, arena).unwrap(); + } +} + +pub fn parse_inline<'a>( + contents: &'a str, + arena: &mut Arena>, + containers: &mut Vec>, +) -> Option<(&'a str, NodeId)> { + if contents.len() < 3 { + return None; + } + + let bytes = contents.as_bytes(); + match bytes[0] { + b'@' => Snippet::parse(contents) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))), + b'{' => Macros::parse(contents) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))), + b'<' => RadioTarget::parse(contents) + .map(|(tail, (radio, _content))| (tail, radio)) + .or_else(|_| Target::parse(contents)) + .or_else(|_| { + Timestamp::parse_active(contents).map(|(tail, timestamp)| (tail, timestamp.into())) + }) + .or_else(|_| { + Timestamp::parse_diary(contents).map(|(tail, timestamp)| (tail, timestamp.into())) + }) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))), + b'[' => { + if contents[1..].starts_with("fn:") { + FnRef::parse(contents) + .ok() + .map(|(tail, fn_ref)| (tail, arena.new_node(fn_ref.into()))) + } else if bytes[1] == b'[' { + Link::parse(contents) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))) + } else { + Cookie::parse(contents) + .map(|(tail, cookie)| (tail, cookie.into())) + .or_else(|_| { + Timestamp::parse_inactive(contents) + .map(|(tail, timestamp)| (tail, timestamp.into())) + }) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))) + } + } + b'*' => { + if let Some((tail, content)) = parse_emphasis(contents, b'*') { + let node = arena.new_node(Element::Bold); + containers.push(Container::Inline { content, node }); + Some((tail, node)) + } else { + None + } + } + b'+' => { + if let Some((tail, content)) = parse_emphasis(contents, b'+') { + let node = arena.new_node(Element::Strike); + containers.push(Container::Inline { content, node }); + Some((tail, node)) + } else { + None + } + } + b'/' => { + if let Some((tail, content)) = parse_emphasis(contents, b'/') { + let node = arena.new_node(Element::Italic); + containers.push(Container::Inline { content, node }); + Some((tail, node)) + } else { + None + } + } + b'_' => { + if let Some((tail, content)) = parse_emphasis(contents, b'_') { + let node = arena.new_node(Element::Underline); + containers.push(Container::Inline { content, node }); + Some((tail, node)) + } else { + None + } + } + b'=' => parse_emphasis(contents, b'=') + .map(|(tail, value)| (tail, arena.new_node(Element::Verbatim { value }))), + b'~' => parse_emphasis(contents, b'~') + .map(|(tail, value)| (tail, arena.new_node(Element::Code { value }))), + b's' => InlineSrc::parse(contents) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))), + b'c' => InlineCall::parse(contents) + .ok() + .map(|(tail, element)| (tail, arena.new_node(element))), + _ => None, + } +} + +pub fn parse_list_items<'a>( + arena: &mut Arena>, + mut contents: &'a str, + indent: usize, + parent: NodeId, + containers: &mut Vec>, +) { + while !contents.is_empty() { + let (tail, list_item, content) = ListItem::parse(contents, indent); + let list_item = Element::ListItem(list_item); + let node = arena.new_node(list_item); + parent.append(node, arena).unwrap(); + containers.push(Container::Block { content, node }); + contents = tail; + } +} + +pub fn prase_table<'a>( + arena: &mut Arena>, + contents: &'a str, + containers: &mut Vec>, +) -> Option<(&'a str, NodeId)> { + if contents.trim_start().starts_with('|') { + let table_node = arena.new_node(Element::Table(Table::Org { tblfm: None })); + + let mut last_end = 0; + for start in memchr_iter(b'\n', contents.as_bytes()) { + let line = contents[last_end..start].trim(); + match TableRow::parse(line) { + Some(TableRow::Standard) => { + let row_node = arena.new_node(Element::TableRow(TableRow::Standard)); + table_node.append(row_node, arena).unwrap(); + for cell in line[1..].split_terminator('|') { + let cell_node = arena.new_node(Element::TableCell); + row_node.append(cell_node, arena).unwrap(); + containers.push(Container::Inline { + content: cell.trim(), + node: cell_node, + }); + } + } + Some(TableRow::Rule) => { + let row_node = arena.new_node(Element::TableRow(TableRow::Rule)); + table_node.append(row_node, arena).unwrap(); + } + None => return Some((&contents[last_end..], table_node)), + } + last_end = start + 1; + } + + Some(("", table_node)) + } else if contents.trim_start().starts_with("+-") + && contents[0..memchr(b'\n', contents.as_bytes()).unwrap_or_else(|| contents.len())] + .trim() + .as_bytes() + .iter() + .any(|&c| c != b'+' || c != b'-') + { + let mut last_end = 0; + for start in memchr_iter(b'\n', contents.as_bytes()) { + let line = contents[last_end..start].trim(); + if !line.starts_with('|') && !line.starts_with('+') { + return { + Some(( + &contents[last_end..], + arena.new_node(Element::Table(Table::TableEl { + value: &contents[0..last_end], + })), + )) + }; + } + last_end = start + 1; + } + + Some(( + "", + arena.new_node(Element::Table(Table::TableEl { value: contents })), + )) + } else { + None + } +} + +pub fn eol(input: &str) -> IResult<&str, ()> { let (input, _) = space0(input)?; if input.is_empty() { Ok(("", ())) @@ -19,7 +632,7 @@ pub(crate) fn eol(input: &str) -> IResult<&str, ()> { } } -pub(crate) fn take_until_eol(input: &str) -> IResult<&str, &str> { +pub fn take_until_eol(input: &str) -> IResult<&str, &str> { if let Some(i) = memchr(b'\n', input.as_bytes()) { Ok((&input[i + 1..], input[0..i].trim())) } else { @@ -27,9 +640,7 @@ pub(crate) fn take_until_eol(input: &str) -> IResult<&str, &str> { } } -pub(crate) fn take_lines_till( - predicate: impl Fn(&str) -> bool, -) -> impl Fn(&str) -> IResult<&str, &str> { +pub fn take_lines_till(predicate: impl Fn(&str) -> bool) -> impl Fn(&str) -> IResult<&str, &str> { move |input| { let mut start = 0; for i in memchr_iter(b'\n', input.as_bytes()) { @@ -47,13 +658,13 @@ pub(crate) fn take_lines_till( } } -pub(crate) fn take_one_word(input: &str) -> IResult<&str, &str> { +pub fn take_one_word(input: &str) -> IResult<&str, &str> { alt((take_till(|c: char| c == ' ' || c == '\t'), |input| { Ok(("", input)) }))(input) } -pub(crate) fn skip_empty_lines(contents: &str) -> &str { +pub fn skip_empty_lines(contents: &str) -> &str { let mut i = 0; for pos in memchr_iter(b'\n', contents.as_bytes()) { if contents.as_bytes()[i..pos]