refactor: cleanup parse function

This commit is contained in:
PoiScript 2019-02-08 21:34:58 +08:00
parent c1154a1853
commit c5b14256f0
25 changed files with 1299 additions and 1234 deletions

View file

@ -1,6 +1,6 @@
[package]
name = "orgize"
version = "0.1.2"
version = "0.1.3"
authors = ["PoiScript <poiscript@gmail.com>"]
description = "A Rust library for parsing orgmode files."
repository = "https://github.com/PoiScript/orgize"

33
examples/convert.rs Normal file
View file

@ -0,0 +1,33 @@
use std::env;
use std::fs::File;
use std::io::Cursor;
use std::io::Read;
use orgize::export::{HtmlHandler, Render};
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() < 2 {
println!("Usage: {} <org-file>", args[0]);
return;
}
let mut file = File::open(&args[1]).expect(&format!("file {} not found", &args[1]));
let mut contents = String::new();
file.read_to_string(&mut contents)
.expect("something went wrong reading the file");
let cursor = Cursor::new(Vec::new());
let handler = HtmlHandler;
let mut render = Render::new(handler, cursor, &contents);
render
.render()
.expect("something went wrong rendering the file");
println!(
"{}",
String::from_utf8(render.into_wirter().into_inner()).expect("invalid utf-8")
);
}

View file

@ -1,16 +1,12 @@
use crate::lines::Lines;
use memchr::memchr2;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Block;
impl Block {
// return (name, args, contents-begin, contents-end, end)
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
// return (name, args, contents-begin, contents-end, end)
#[inline]
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(src.starts_with("#+"));
if !src[2..8].eq_ignore_ascii_case("BEGIN_") {
if src[2..8].to_uppercase() != "BEGIN_" {
return None;
}
@ -24,11 +20,11 @@ impl Block {
Some(&src[name..pre_cont_end])
};
let name = &src[8..name];
let end_line = format!(r"#+END_{}", name);
let end_line = format!(r"#+END_{}", name.to_uppercase());
let mut pre_end = cont_beg;
for (_, end, line) in lines {
if line.trim().eq_ignore_ascii_case(&end_line) {
if line.trim() == end_line {
return Some((name, args, cont_beg, pre_end, end));
} else {
pre_end = end;
@ -36,17 +32,20 @@ impl Block {
}
None
}
}
#[test]
fn parse() {
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
Block::parse("#+BEGIN_SRC\n#+END_SRC"),
parse("#+BEGIN_SRC\n#+END_SRC"),
Some(("SRC", None, 12, 12, 21))
);
assert_eq!(
Block::parse(
parse(
r#"#+BEGIN_SRC rust
fn main() {
// print "Hello World!" to the console
@ -58,4 +57,5 @@ fn main() {
Some(("SRC", Some(" rust"), 17, 104, 114))
);
// TODO: more testing
}
}

View file

@ -1,13 +1,9 @@
use crate::lines::Lines;
use memchr::memchr2;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct DynBlock;
impl DynBlock {
// return (name, parameters, contents-begin, contents-end, end)
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
// return (name, parameters, contents-begin, contents-end, end)
#[inline]
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(src.starts_with("#+"));
if !src[2..9].eq_ignore_ascii_case("BEGIN: ") {
@ -44,14 +40,17 @@ impl DynBlock {
}
None
}
}
#[test]
fn parse() {
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
// TODO: testing
assert_eq!(
DynBlock::parse(
parse(
r"#+BEGIN: clocktable :scope file
CONTENTS
#+END:
@ -59,4 +58,5 @@ CONTENTS
),
Some(("clocktable", Some(":scope file"), 31, 40, 48))
)
}
}

View file

@ -1,11 +1,7 @@
use memchr::memchr;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct FnDef;
impl FnDef {
pub fn parse(src: &str) -> Option<(&str, &str, usize)> {
#[inline]
pub fn parse(src: &str) -> Option<(&str, &str, usize)> {
debug_assert!(src.starts_with("[fn:"));
let label = memchr(b']', src.as_bytes()).filter(|&i| {
@ -18,13 +14,16 @@ impl FnDef {
let end = eol!(src);
Some((&src[4..label], &src[label + 1..end], end))
}
}
#[test]
fn parse() {
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
FnDef::parse("[fn:1] https://orgmode.org").unwrap(),
parse("[fn:1] https://orgmode.org").unwrap(),
(
"1",
" https://orgmode.org",
@ -32,7 +31,7 @@ fn parse() {
)
);
assert_eq!(
FnDef::parse("[fn:word_1] https://orgmode.org").unwrap(),
parse("[fn:word_1] https://orgmode.org").unwrap(),
(
"word_1",
" https://orgmode.org",
@ -40,18 +39,16 @@ fn parse() {
)
);
assert_eq!(
FnDef::parse("[fn:WORD-1] https://orgmode.org").unwrap(),
parse("[fn:WORD-1] https://orgmode.org").unwrap(),
(
"WORD-1",
" https://orgmode.org",
"[fn:WORD-1] https://orgmode.org".len()
)
);
assert_eq!(
FnDef::parse("[fn:WORD]").unwrap(),
("WORD", "", "[fn:WORD]".len())
);
assert!(FnDef::parse("[fn:] https://orgmode.org").is_none());
assert!(FnDef::parse("[fn:wor d] https://orgmode.org").is_none());
assert!(FnDef::parse("[fn:WORD https://orgmode.org").is_none());
assert_eq!(parse("[fn:WORD]").unwrap(), ("WORD", "", "[fn:WORD]".len()));
assert!(parse("[fn:] https://orgmode.org").is_none());
assert!(parse("[fn:wor d] https://orgmode.org").is_none());
assert!(parse("[fn:WORD https://orgmode.org").is_none());
}
}

View file

@ -1,7 +1,5 @@
use memchr::{memchr, memchr2};
pub struct Keyword;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Key<'a> {
@ -24,9 +22,7 @@ pub enum Key<'a> {
Call,
}
impl Keyword {
// return (key, value, offset)
pub fn parse(src: &str) -> Option<(Key<'_>, &str, usize)> {
pub fn parse(src: &str) -> Option<(Key<'_>, &str, usize)> {
debug_assert!(src.starts_with("#+"));
let bytes = src.as_bytes();
@ -51,67 +47,70 @@ impl Keyword {
.unwrap_or_else(|| src.len());
Some((
match &src[2..key_end] {
key if key.eq_ignore_ascii_case("CAPTION") => Key::Caption {
match src[2..key_end].to_uppercase().as_str() {
"AUTHOR" => Key::Author,
"CALL" => Key::Call,
"DATE" => Key::Date,
"HEADER" => Key::Header,
"NAME" => Key::Name,
"PLOT" => Key::Plot,
"TITLE" => Key::Title,
"RESULTS" => Key::Results {
option: if key_end == option {
None
} else {
Some(&src[key_end + 1..option - 1])
},
},
key if key.eq_ignore_ascii_case("HEADER") => Key::Header,
key if key.eq_ignore_ascii_case("NAME") => Key::Name,
key if key.eq_ignore_ascii_case("PLOT") => Key::Plot,
key if key.eq_ignore_ascii_case("RESULTS") => Key::Results {
"CAPTION" => Key::Caption {
option: if key_end == option {
None
} else {
Some(&src[key_end + 1..option - 1])
},
},
key if key.eq_ignore_ascii_case("AUTHOR") => Key::Author,
key if key.eq_ignore_ascii_case("DATE") => Key::Date,
key if key.eq_ignore_ascii_case("TITLE") => Key::Title,
key if key.eq_ignore_ascii_case("CALL") => Key::Call,
key if key.starts_with("ATTR_") => Key::Attr {
backend: &src["#+ATTR_".len()..key_end],
},
key => Key::Custom(key),
_ => Key::Custom(&src[2..key_end]),
},
&src[option + 1..end].trim(),
end,
))
}
}
#[test]
fn parse() {
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::*;
assert_eq!(
Keyword::parse("#+KEY:"),
parse("#+KEY:"),
Some((Key::Custom("KEY"), "", "#+KEY:".len()))
);
assert_eq!(
Keyword::parse("#+KEY: VALUE"),
parse("#+KEY: VALUE"),
Some((Key::Custom("KEY"), "VALUE", "#+KEY: VALUE".len()))
);
assert_eq!(
Keyword::parse("#+K_E_Y: VALUE"),
parse("#+K_E_Y: VALUE"),
Some((Key::Custom("K_E_Y"), "VALUE", "#+K_E_Y: VALUE".len()))
);
assert_eq!(
Keyword::parse("#+KEY:VALUE\n"),
parse("#+KEY:VALUE\n"),
Some((Key::Custom("KEY"), "VALUE", "#+KEY:VALUE\n".len()))
);
assert!(Keyword::parse("#+KE Y: VALUE").is_none());
assert!(Keyword::parse("#+ KEY: VALUE").is_none());
assert!(parse("#+KE Y: VALUE").is_none());
assert!(parse("#+ KEY: VALUE").is_none());
assert_eq!(
Keyword::parse("#+RESULTS:"),
parse("#+RESULTS:"),
Some((Key::Results { option: None }, "", "#+RESULTS:".len()))
);
assert_eq!(
Keyword::parse("#+ATTR_LATEX: :width 5cm"),
parse("#+ATTR_LATEX: :width 5cm"),
Some((
Key::Attr { backend: "LATEX" },
":width 5cm",
@ -120,12 +119,12 @@ fn parse() {
);
assert_eq!(
Keyword::parse("#+CALL: double(n=4)"),
parse("#+CALL: double(n=4)"),
Some((Key::Call, "double(n=4)", "#+CALL: double(n=4)".len()))
);
assert_eq!(
Keyword::parse("#+CAPTION[Short caption]: Longer caption."),
parse("#+CAPTION[Short caption]: Longer caption."),
Some((
Key::Caption {
option: Some("Short caption")
@ -134,4 +133,5 @@ fn parse() {
"#+CAPTION[Short caption]: Longer caption.".len()
))
);
}
}

View file

@ -1,10 +1,7 @@
use crate::lines::Lines;
pub struct List;
impl List {
#[inline]
pub fn is_item(src: &str) -> (bool, bool) {
#[inline]
pub fn is_item(src: &str) -> (bool, bool) {
if src.is_empty() {
return (false, false);
}
@ -31,11 +28,12 @@ impl List {
} else {
(false, false)
}
}
}
// returns (bullets, contents begin, contents end, end, has more)
pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
debug_assert!(Self::is_item(&src[ident..]).0);
// returns (bullets, contents begin, contents end, end, has more)
#[inline]
pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
debug_assert!(is_item(&src[ident..]).0);
debug_assert!(
src[..ident].chars().all(|c| c == ' ' || c == '\t'),
"{:?} doesn't starts with indentation {}",
@ -54,7 +52,7 @@ impl List {
len,
len,
len,
Self::is_item(lines.next().unwrap().2).0,
is_item(lines.next().unwrap().2).0,
);
}
};
@ -79,7 +77,7 @@ impl List {
}
}
let line_ident = Self::ident(line);
let line_ident = self::ident(line);
if line_ident < ident {
return (bullet, beg, pre_cont_end, pre_end, false);
@ -89,7 +87,7 @@ impl List {
beg,
pre_cont_end,
pre_end,
Self::is_item(&line[ident..]).0,
is_item(&line[ident..]).0,
);
}
@ -98,60 +96,64 @@ impl List {
}
(bullet, beg, src.len(), src.len(), false)
}
}
fn ident(src: &str) -> usize {
#[inline]
fn ident(src: &str) -> usize {
src.as_bytes()
.iter()
.position(|&c| c != b' ' && c != b'\t')
.unwrap_or(0)
}
#[cfg(test)]
mod tests {
#[test]
fn is_item() {
use super::is_item;
assert_eq!(is_item("+ item"), (true, false));
assert_eq!(is_item("- item"), (true, false));
assert_eq!(is_item("10. item"), (true, true));
assert_eq!(is_item("10) item"), (true, true));
assert_eq!(is_item("1. item"), (true, true));
assert_eq!(is_item("1) item"), (true, true));
assert_eq!(is_item("10. "), (true, true));
assert_eq!(is_item("10.\n"), (true, true));
assert_eq!(is_item("10."), (false, false));
assert_eq!(is_item("+"), (false, false));
assert_eq!(is_item("-item"), (false, false));
assert_eq!(is_item("+item"), (false, false));
}
}
#[test]
fn is_item() {
assert_eq!(List::is_item("+ item"), (true, false));
assert_eq!(List::is_item("- item"), (true, false));
assert_eq!(List::is_item("10. item"), (true, true));
assert_eq!(List::is_item("10) item"), (true, true));
assert_eq!(List::is_item("1. item"), (true, true));
assert_eq!(List::is_item("1) item"), (true, true));
assert_eq!(List::is_item("10. "), (true, true));
assert_eq!(List::is_item("10.\n"), (true, true));
assert_eq!(List::is_item("10."), (false, false));
assert_eq!(List::is_item("+"), (false, false));
assert_eq!(List::is_item("-item"), (false, false));
assert_eq!(List::is_item("+item"), (false, false));
}
#[test]
fn parse() {
use super::parse;
#[test]
fn parse() {
assert_eq!(parse("+ item1\n+ item2\n+ item3", 0), ("+ ", 2, 7, 8, true));
assert_eq!(
List::parse("+ item1\n+ item2\n+ item3", 0),
("+ ", 2, 7, 8, true)
);
assert_eq!(
List::parse("* item1\n\n* item2\n* item3", 0),
parse("* item1\n\n* item2\n* item3", 0),
("* ", 2, 7, 9, true)
);
assert_eq!(
List::parse("- item1\n\n\n- item2\n- item3", 0),
parse("- item1\n\n\n- item2\n- item3", 0),
("- ", 2, 7, 10, false)
);
assert_eq!(
List::parse("1. item1\n\n\n\n2. item2\n3. item3", 0),
parse("1. item1\n\n\n\n2. item2\n3. item3", 0),
("1. ", 3, 8, 11, false)
);
assert_eq!(
List::parse(" + item1\n + item2\n+ item3", 2),
parse(" + item1\n + item2\n+ item3", 2),
(" + ", 4, 21, 22, false)
);
assert_eq!(
List::parse(" + item1\n + item2\n + item3", 2),
parse(" + item1\n + item2\n + item3", 2),
(" + ", 4, 9, 10, true)
);
assert_eq!(List::parse("+\n", 0), ("+", 1, 1, 1, false));
assert_eq!(List::parse("+\n+ item2\n+ item3", 0), ("+", 1, 1, 1, true));
assert_eq!(List::parse("1) item1", 0), ("1) ", 3, 8, 8, false));
assert_eq!(List::parse("1) item1\n", 0), ("1) ", 3, 8, 9, false));
assert_eq!(parse("+\n", 0), ("+", 1, 1, 1, false));
assert_eq!(parse("+\n+ item2\n+ item3", 0), ("+", 1, 1, 1, true));
assert_eq!(parse("1) item1", 0), ("1) ", 3, 8, 8, false));
assert_eq!(parse("1) item1\n", 0), ("1) ", 3, 8, 9, false));
}
}

View file

@ -5,12 +5,7 @@ pub mod keyword;
pub mod list;
pub mod rule;
pub use self::block::Block;
pub use self::dyn_block::DynBlock;
pub use self::fn_def::FnDef;
pub use self::keyword::{Key, Keyword};
pub use self::list::List;
pub use self::rule::Rule;
pub use self::keyword::Key;
use memchr::memchr;
use memchr::memchr_iter;
@ -118,9 +113,9 @@ impl<'a> Element<'a> {
};
}
// Unlike other element, footnote definition must starts at column 0
// Unlike other element, footnote def must starts at column 0
if bytes[pos..].starts_with(b"[fn:") {
if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) {
if let Some((label, cont, off)) = fn_def::parse(&src[pos..]) {
brk!(Element::FnDef { label, cont }, off + 1);
}
}
@ -138,7 +133,7 @@ impl<'a> Element<'a> {
pos = skip_space!(src, pos);
let (is_item, ordered) = List::is_item(&src[pos..]);
let (is_item, ordered) = list::is_item(&src[pos..]);
if is_item {
let list = Element::List {
ident: pos - line_beg,
@ -163,7 +158,7 @@ impl<'a> Element<'a> {
// Rule
if bytes[pos] == b'-' {
let off = Rule::parse(&src[pos..]);
let off = rule::parse(&src[pos..]);
if off != 0 {
brk!(Element::Rule, off);
}
@ -178,7 +173,7 @@ impl<'a> Element<'a> {
}
if bytes[pos..].starts_with(b"#+") {
if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) {
if let Some((name, args, cont_beg, cont_end, end)) = block::parse(&src[pos..]) {
let cont = &src[pos + cont_beg..pos + cont_end];
match name.to_uppercase().as_str() {
"COMMENT" => brk!(Element::CommentBlock { args, cont }, end),
@ -214,7 +209,7 @@ impl<'a> Element<'a> {
};
}
if let Some((name, args, cont_beg, cont_end, end)) = DynBlock::parse(&src[pos..]) {
if let Some((name, args, cont_beg, cont_end, end)) = dyn_block::parse(&src[pos..]) {
brk!(
Element::DynBlock {
name,
@ -226,7 +221,7 @@ impl<'a> Element<'a> {
)
}
if let Some((key, value, off)) = Keyword::parse(&src[pos..]) {
if let Some((key, value, off)) = keyword::parse(&src[pos..]) {
brk!(
if let Key::Call = key {
Element::Call { value }

View file

@ -1,9 +1,5 @@
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Rule;
impl Rule {
pub fn parse(src: &str) -> usize {
#[inline]
pub fn parse(src: &str) -> usize {
let end = memchr::memchr(b'\n', src.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| src.len());
@ -13,25 +9,29 @@ impl Rule {
} else {
0
}
}
}
#[test]
fn parse() {
assert_eq!(Rule::parse("-----"), "-----".len());
assert_eq!(Rule::parse("--------"), "--------".len());
assert_eq!(Rule::parse(" -----"), " -----".len());
assert_eq!(Rule::parse("\t\t-----"), "\t\t-----".len());
assert_eq!(Rule::parse("\t\t-----\n"), "\t\t-----\n".len());
assert_eq!(Rule::parse("\t\t----- \n"), "\t\t----- \n".len());
assert_eq!(Rule::parse(""), 0);
assert_eq!(Rule::parse("----"), 0);
assert_eq!(Rule::parse(" ----"), 0);
assert_eq!(Rule::parse(" 0----"), 0);
assert_eq!(Rule::parse("0 ----"), 0);
assert_eq!(Rule::parse("0------"), 0);
assert_eq!(Rule::parse("----0----"), 0);
assert_eq!(Rule::parse("\t\t----"), 0);
assert_eq!(Rule::parse("------0"), 0);
assert_eq!(Rule::parse("----- 0"), 0);
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(parse("-----"), "-----".len());
assert_eq!(parse("--------"), "--------".len());
assert_eq!(parse(" -----"), " -----".len());
assert_eq!(parse("\t\t-----"), "\t\t-----".len());
assert_eq!(parse("\t\t-----\n"), "\t\t-----\n".len());
assert_eq!(parse("\t\t----- \n"), "\t\t----- \n".len());
assert_eq!(parse(""), 0);
assert_eq!(parse("----"), 0);
assert_eq!(parse(" ----"), 0);
assert_eq!(parse(" 0----"), 0);
assert_eq!(parse("0 ----"), 0);
assert_eq!(parse("0------"), 0);
assert_eq!(parse("----0----"), 0);
assert_eq!(parse("\t\t----"), 0);
assert_eq!(parse("------0"), 0);
assert_eq!(parse("----- 0"), 0);
}
}

View file

@ -3,9 +3,7 @@
use crate::elements::Key;
use crate::export::Handler;
use crate::headline::Headline;
use crate::objects::{
Cookie, FnRef, InlineCall, InlineSrc, Link, Macros, RadioTarget, Snippet, Target,
};
use crate::objects::Cookie;
use std::io::{Result, Write};
pub struct HtmlHandler;
@ -117,36 +115,49 @@ impl<W: Write> Handler<W> for HtmlHandler {
fn handle_cookie(&mut self, w: &mut W, cookie: Cookie) -> Result<()> {
Ok(())
}
fn handle_fn_ref(&mut self, w: &mut W, fn_ref: FnRef) -> Result<()> {
fn handle_fn_ref(&mut self, w: &mut W, label: Option<&str>, def: Option<&str>) -> Result<()> {
Ok(())
}
fn handle_inline_call(&mut self, w: &mut W, inline_call: InlineCall) -> Result<()> {
fn handle_inline_call(
&mut self,
w: &mut W,
name: &str,
args: &str,
inside_header: Option<&str>,
end_header: Option<&str>,
) -> Result<()> {
Ok(())
}
fn handle_inline_src(&mut self, w: &mut W, inline_src: InlineSrc) -> Result<()> {
write!(w, "<code>{}</code>", inline_src.body)
fn handle_inline_src(
&mut self,
w: &mut W,
lang: &str,
option: Option<&str>,
body: &str,
) -> Result<()> {
write!(w, "<code>{}</code>", body)
}
fn handle_link(&mut self, w: &mut W, link: Link) -> Result<()> {
if let Some(desc) = link.desc {
write!(w, r#"<a href="{}">{}</a>"#, link.path, desc)
fn handle_link(&mut self, w: &mut W, path: &str, desc: Option<&str>) -> Result<()> {
if let Some(desc) = desc {
write!(w, r#"<a href="{}">{}</a>"#, path, desc)
} else {
write!(w, r#"<a href="{0}">{0}</a>"#, link.path)
write!(w, r#"<a href="{0}">{0}</a>"#, path)
}
}
fn handle_macros(&mut self, w: &mut W, macros: Macros) -> Result<()> {
fn handle_macros(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<()> {
Ok(())
}
fn handle_radio_target(&mut self, w: &mut W, target: RadioTarget) -> Result<()> {
fn handle_radio_target(&mut self, w: &mut W, target: &str) -> Result<()> {
Ok(())
}
fn handle_snippet(&mut self, w: &mut W, snippet: Snippet) -> Result<()> {
if snippet.name.eq_ignore_ascii_case("HTML") {
write!(w, "{}", snippet.value)
fn handle_snippet(&mut self, w: &mut W, name: &str, value: &str) -> Result<()> {
if name.eq_ignore_ascii_case("HTML") {
write!(w, "{}", value)
} else {
Ok(())
}
}
fn handle_target(&mut self, w: &mut W, target: Target) -> Result<()> {
fn handle_target(&mut self, w: &mut W, target: &str) -> Result<()> {
Ok(())
}
fn handle_bold_beg(&mut self, w: &mut W) -> Result<()> {

View file

@ -4,9 +4,7 @@ pub use self::html::HtmlHandler;
use crate::elements::Key;
use crate::headline::Headline;
use crate::objects::{
Cookie, FnRef, InlineCall, InlineSrc, Link, Macros, RadioTarget, Snippet, Target,
};
use crate::objects::Cookie;
use crate::parser::Parser;
use std::io::{Result, Write};
@ -46,14 +44,27 @@ pub trait Handler<W: Write> {
fn handle_keyword(&mut self, w: &mut W, key: Key<'_>, value: &str) -> Result<()>;
fn handle_rule(&mut self, w: &mut W) -> Result<()>;
fn handle_cookie(&mut self, w: &mut W, cookie: Cookie) -> Result<()>;
fn handle_fn_ref(&mut self, w: &mut W, fn_ref: FnRef) -> Result<()>;
fn handle_inline_call(&mut self, w: &mut W, inline_call: InlineCall) -> Result<()>;
fn handle_inline_src(&mut self, w: &mut W, inline_src: InlineSrc) -> Result<()>;
fn handle_link(&mut self, w: &mut W, link: Link) -> Result<()>;
fn handle_macros(&mut self, w: &mut W, macros: Macros) -> Result<()>;
fn handle_radio_target(&mut self, w: &mut W, target: RadioTarget) -> Result<()>;
fn handle_snippet(&mut self, w: &mut W, snippet: Snippet) -> Result<()>;
fn handle_target(&mut self, w: &mut W, target: Target) -> Result<()>;
fn handle_fn_ref(&mut self, w: &mut W, label: Option<&str>, def: Option<&str>) -> Result<()>;
fn handle_inline_call(
&mut self,
w: &mut W,
name: &str,
args: &str,
inside_header: Option<&str>,
end_header: Option<&str>,
) -> Result<()>;
fn handle_inline_src(
&mut self,
w: &mut W,
lang: &str,
option: Option<&str>,
body: &str,
) -> Result<()>;
fn handle_link(&mut self, w: &mut W, path: &str, desc: Option<&str>) -> Result<()>;
fn handle_macros(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<()>;
fn handle_radio_target(&mut self, w: &mut W, target: &str) -> Result<()>;
fn handle_snippet(&mut self, w: &mut W, name: &str, value: &str) -> Result<()>;
fn handle_target(&mut self, w: &mut W, target: &str) -> Result<()>;
fn handle_bold_beg(&mut self, w: &mut W) -> Result<()>;
fn handle_bold_end(&mut self, w: &mut W) -> Result<()>;
fn handle_italic_beg(&mut self, w: &mut W) -> Result<()>;
@ -129,14 +140,19 @@ impl<'a, W: Write, H: Handler<W>> Render<'a, W, H> {
Keyword { key, value } => h.handle_keyword(w, key, value)?,
Rule => h.handle_rule(w)?,
Cookie(cookie) => h.handle_cookie(w, cookie)?,
FnRef(fnref) => h.handle_fn_ref(w, fnref)?,
InlineCall(inlinecall) => h.handle_inline_call(w, inlinecall)?,
InlineSrc(inlinesrc) => h.handle_inline_src(w, inlinesrc)?,
Link(link) => h.handle_link(w, link)?,
Macros(macros) => h.handle_macros(w, macros)?,
RadioTarget(radiotarget) => h.handle_radio_target(w, radiotarget)?,
Snippet(snippet) => h.handle_snippet(w, snippet)?,
Target(target) => h.handle_target(w, target)?,
FnRef { label, def } => h.handle_fn_ref(w, label, def)?,
InlineSrc { lang, option, body } => h.handle_inline_src(w, lang, option, body)?,
InlineCall {
name,
args,
inside_header,
end_header,
} => h.handle_inline_call(w, name, args, inside_header, end_header)?,
Link { path, desc } => h.handle_link(w, path, desc)?,
Macros { name, args } => h.handle_macros(w, name, args)?,
RadioTarget { target } => h.handle_radio_target(w, target)?,
Snippet { name, value } => h.handle_snippet(w, name, value)?,
Target { target } => h.handle_target(w, target)?,
BoldBeg => h.handle_bold_beg(w)?,
BoldEnd => h.handle_bold_end(w)?,
ItalicBeg => h.handle_italic_beg(w)?,

View file

@ -1,10 +1,17 @@
//! Headline
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Headline<'a> {
/// headline level, number of stars
pub level: usize,
/// priority cookie
pub priority: Option<char>,
/// headline tags, including the sparated colons
pub tags: Option<&'a str>,
/// headline title
pub title: &'a str,
/// headline keyword
pub keyword: Option<&'a str>,
}
@ -58,6 +65,20 @@ impl<'a> Headline<'a> {
(None, src.len())
}
/// parsing the input string and returning the parsed headline
/// and the content-begin and the end of headline container.
///
/// ```rust
/// use orgize::headline::Headline;
///
/// let (hdl, _, _) = Headline::parse("* DONE [#A] COMMENT Title :tag:a2%:");
///
/// assert_eq!(hdl.level, 1);
/// assert_eq!(hdl.priority, Some('A'));
/// assert_eq!(hdl.tags, Some(":tag:a2%:"));
/// assert_eq!(hdl.title, "COMMENT Title");
/// assert_eq!(hdl.keyword, Some("DONE"));
/// ```
pub fn parse(src: &'a str) -> (Headline<'a>, usize, usize) {
let mut level = 0;
loop {
@ -69,7 +90,11 @@ impl<'a> Headline<'a> {
}
let eol = eol!(src);
let end = Headline::find_level(&src[eol..], level) + eol;
let end = if eol == src.len() {
eol
} else {
Headline::find_level(&src[eol..], level) + eol
};
let mut title_start = skip_space!(src, level);
@ -89,88 +114,70 @@ impl<'a> Headline<'a> {
let (tags, title_off) = Headline::parse_tags(&src[title_start..eol]);
// println!("{:?} {:?} {:?}", keyword, priority, tags);
// println!("{:?} {}", title_start, title_off);
(
Headline::new(
Headline {
level,
keyword,
priority,
&src[title_start..title_start + title_off],
title: &src[title_start..title_start + title_off],
tags,
),
},
eol,
end,
)
}
// TODO: optimize
pub fn find_level(src: &str, level: usize) -> usize {
use jetscii::ByteSubstring;
use memchr::memchr2;
let bytes = src.as_bytes();
if bytes[0] == b'*' {
if let Some(stars) = memchr2(b'\n', b' ', bytes) {
if stars > 0 && stars <= level && bytes[0..stars].iter().all(|&c| c == b'*') {
return 0;
}
}
}
let mut pos = 0;
loop {
if pos >= src.len() {
return src.len();
}
if src.as_bytes()[pos] == b'*' && (pos == 0 || src.as_bytes()[pos - 1] == b'\n') {
let pos_ = pos;
loop {
if pos >= src.len() {
return src.len();
}
if src.as_bytes()[pos] == b'*' {
pos += 1;
} else if src.as_bytes()[pos] == b' ' && pos - pos_ <= level {
return pos_;
} else {
break;
while let Some(off) = ByteSubstring::new(b"\n*").find(&bytes[pos..]) {
pos += off + 1;
if let Some(stars) = memchr2(b'\n', b' ', &bytes[pos..]) {
if stars > 0 && stars <= level && bytes[pos..pos + stars].iter().all(|&c| c == b'*')
{
return pos;
}
}
}
pos += 1
}
src.len()
}
/// checks if this headline is "commented"
pub fn is_commented(&self) -> bool {
self.title.starts_with("COMMENT ")
}
/// checks if this headline is "archived"
pub fn is_archived(&self) -> bool {
self.tags
.map(|tags| tags[1..].split_terminator(':').any(|t| t == "ARCHIVE"))
.unwrap_or(false)
}
pub fn new(
level: usize,
keyword: Option<&'a str>,
priority: Option<char>,
title: &'a str,
tags: Option<&'a str>,
) -> Headline<'a> {
Headline {
level,
keyword,
priority,
title,
tags,
}
}
}
#[test]
fn parse() {
assert_eq!(
Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:").0,
Headline::new(
4,
Some("TODO"),
Some('A'),
"COMMENT Title",
Some(":tag:a2%:"),
),
Headline {
level: 4,
priority: Some('A'),
keyword: Some("TODO"),
title: "COMMENT Title",
tags: Some(":tag:a2%:"),
},
);
assert_eq!(
Headline::parse("**** ToDO [#A] COMMENT Title :tag:a2%:").0,
@ -262,3 +269,17 @@ fn is_archived() {
assert!(!Headline::parse("* Title :ARCHIVES:").0.is_archived());
assert!(!Headline::parse("* Title :archive:").0.is_archived());
}
#[test]
fn find_level() {
assert_eq!(
Headline::find_level(
r#"
** Title
* Title
** Title"#,
1
),
10
);
}

View file

@ -1,3 +1,58 @@
//! A Rust library for parsing orgmode files.
//!
//! ## Example
//!
//! ```rust
//! use orgize::Parser;
//!
//! fn main() {
//! let parser = Parser::new(
//! r#"* Title 1
//! *Section 1*
//! ** Title 2
//! _Section 2_
//! * Title 3
//! /Section 3/
//! * Title 4
//! =Section 4="#,
//! );
//!
//! for event in parser {
//! // handling the event
//! }
//! }
//! ```
//!
//! Alternatively, you can use the built-in render.
//!
//! ```rust
//! use orgize::export::{HtmlHandler, Render};
//! use std::io::Cursor;
//!
//! fn main() {
//! let contents = r#"* Title 1
//! *Section 1*
//! ** Title 2
//! _Section 2_
//! * Title 3
//! /Section 3/
//! * Title 4
//! =Section 4="#;
//!
//! let cursor = Cursor::new(Vec::new());
//! let mut render = Render::new(HtmlHandler, cursor, &contents);
//!
//! render
//! .render()
//! .expect("something went wrong rendering the file");
//!
//! println!(
//! "{}",
//! String::from_utf8(render.into_wirter().into_inner()).expect("invalid utf-8")
//! );
//! }
//! ```
#[macro_use]
mod utils;
@ -7,3 +62,5 @@ pub mod headline;
mod lines;
pub mod objects;
mod parser;
pub use parser::{Event, Parser};

View file

@ -2,74 +2,50 @@ use memchr::{memchr, memchr2};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Cookie<'a> {
value: &'a str,
pub enum Cookie<'a> {
Percent(&'a str),
Slash(&'a str, &'a str),
}
impl<'a> Cookie<'a> {
pub fn parse(src: &'a str) -> Option<(Cookie<'a>, usize)> {
#[inline]
pub fn parse(src: &str) -> Option<(Cookie<'_>, usize)> {
debug_assert!(src.starts_with('['));
let num1 = memchr2(b'%', b'/', src.as_bytes())
.filter(|&i| src.as_bytes()[1..i].iter().all(|c| c.is_ascii_digit()))?;
let bytes = src.as_bytes();
let num1 =
memchr2(b'%', b'/', bytes).filter(|&i| bytes[1..i].iter().all(|c| c.is_ascii_digit()))?;
if src.as_bytes()[num1] == b'%' && *src.as_bytes().get(num1 + 1)? == b']' {
Some((
Cookie {
value: &src[0..=num1 + 1],
},
num1 + 2,
))
if bytes[num1] == b'%' && *bytes.get(num1 + 1)? == b']' {
Some((Cookie::Percent(&src[1..num1]), num1 + 2))
} else {
let num2 = memchr(b']', src.as_bytes()).filter(|&i| {
src.as_bytes()[num1 + 1..i]
.iter()
.all(|c| c.is_ascii_digit())
})?;
let num2 = memchr(b']', bytes)
.filter(|&i| bytes[num1 + 1..i].iter().all(|c| c.is_ascii_digit()))?;
Some((
Cookie {
value: &src[0..=num2],
},
num2 + 1,
))
}
Some((Cookie::Slash(&src[1..num1], &src[num1 + 1..num2]), num2 + 1))
}
}
#[test]
fn parse() {
assert_eq!(
Cookie::parse("[1/10]").unwrap(),
(Cookie { value: "[1/10]" }, "[1/10]".len())
);
assert_eq!(
Cookie::parse("[1/1000]").unwrap(),
(Cookie { value: "[1/1000]" }, "[1/1000]".len())
);
assert_eq!(
Cookie::parse("[10%]").unwrap(),
(Cookie { value: "[10%]" }, "[10%]".len())
);
assert_eq!(
Cookie::parse("[%]").unwrap(),
(Cookie { value: "[%]" }, "[%]".len())
);
assert_eq!(
Cookie::parse("[/]").unwrap(),
(Cookie { value: "[/]" }, "[/]".len())
);
assert_eq!(
Cookie::parse("[100/]").unwrap(),
(Cookie { value: "[100/]" }, "[100/]".len())
);
assert_eq!(
Cookie::parse("[/100]").unwrap(),
(Cookie { value: "[/100]" }, "[/100]".len())
);
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
use super::Cookie::*;
assert!(Cookie::parse("[10% ]").is_none(),);
assert!(Cookie::parse("[1//100]").is_none(),);
assert!(Cookie::parse("[1\\100]").is_none(),);
assert!(Cookie::parse("[10%%]").is_none(),);
assert_eq!(parse("[1/10]").unwrap(), (Slash("1", "10"), "[1/10]".len()));
assert_eq!(
parse("[1/1000]").unwrap(),
(Slash("1", "1000"), "[1/1000]".len())
);
assert_eq!(parse("[10%]").unwrap(), (Percent("10"), "[10%]".len()));
assert_eq!(parse("[%]").unwrap(), (Percent(""), "[%]".len()));
assert_eq!(parse("[/]").unwrap(), (Slash("", ""), "[/]".len()));
assert_eq!(parse("[100/]").unwrap(), (Slash("100", ""), "[100/]".len()));
assert_eq!(parse("[/100]").unwrap(), (Slash("", "100"), "[/100]".len()));
assert!(parse("[10% ]").is_none(),);
assert!(parse("[1//100]").is_none(),);
assert!(parse("[1\\100]").is_none(),);
assert!(parse("[10%%]").is_none(),);
}
}

View file

@ -1,44 +1,59 @@
use memchr::memchr;
pub struct Emphasis;
impl Emphasis {
// TODO: return usize instead of Option<usize>
pub fn parse(src: &str, marker: u8) -> Option<usize> {
expect!(src, 1, |c: u8| !c.is_ascii_whitespace())?;
#[inline]
/// returns offset
pub fn parse(src: &str, marker: u8) -> Option<usize> {
debug_assert!(src.len() >= 3);
let bytes = src.as_bytes();
if bytes[1].is_ascii_whitespace() {
return None;
}
let end = memchr(marker, &bytes[1..])
.map(|i| i + 1)
.filter(|&i| bytes[1..i].iter().filter(|&&c| c == b'\n').count() < 2)?;
expect!(src, end - 1, |c: u8| !c.is_ascii_whitespace())?;
if end < src.len() - 1 {
expect!(src, end + 1, |ch| ch == b' '
|| ch == b'-'
|| ch == b'.'
|| ch == b','
|| ch == b':'
|| ch == b'!'
|| ch == b'?'
|| ch == b'\''
|| ch == b'\n'
|| ch == b')'
|| ch == b'}')?;
if bytes[end - 1].is_ascii_whitespace() {
return None;
}
if end < src.len() - 1 {
let post = bytes[end + 1];
if post == b' '
|| post == b'-'
|| post == b'.'
|| post == b','
|| post == b':'
|| post == b'!'
|| post == b'?'
|| post == b'\''
|| post == b'\n'
|| post == b')'
|| post == b'}'
{
Some(end)
} else {
None
}
} else {
Some(end)
}
}
#[test]
fn parse() {
assert_eq!(Emphasis::parse("*bold*", b'*').unwrap(), "*bold".len());
assert_eq!(Emphasis::parse("*bo\nld*", b'*').unwrap(), "*bo\nld".len());
assert!(Emphasis::parse("*bold*a", b'*').is_none());
assert!(Emphasis::parse("*bold*", b'/').is_none());
assert!(Emphasis::parse("*bold *", b'*').is_none());
assert!(Emphasis::parse("* bold*", b'*').is_none());
assert!(Emphasis::parse("*b\nol\nd*", b'*').is_none());
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(parse("*bold*", b'*').unwrap(), "*bold".len());
assert_eq!(parse("*bo\nld*", b'*').unwrap(), "*bo\nld".len());
assert!(parse("*bold*a", b'*').is_none());
assert!(parse("*bold*", b'/').is_none());
assert!(parse("*bold *", b'*').is_none());
assert!(parse("* bold*", b'*').is_none());
assert!(parse("*b\nol\nd*", b'*').is_none());
}
}

View file

@ -1,14 +1,8 @@
use memchr::{memchr2, memchr2_iter};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct FnRef<'a> {
label: Option<&'a str>,
definition: Option<&'a str>,
}
impl<'a> FnRef<'a> {
pub fn parse(src: &'a str) -> Option<(FnRef<'a>, usize)> {
/// returns (footnote reference label, footnote reference definition, offset)
#[inline]
pub fn parse(src: &str) -> Option<(Option<&str>, Option<&str>, usize)> {
debug_assert!(src.starts_with("[fn:"));
let bytes = src.as_bytes();
@ -34,73 +28,46 @@ impl<'a> FnRef<'a> {
})?;
Some((
FnRef {
label: if label == 4 {
if label == 4 {
None
} else {
Some(&src[4..label])
},
definition: Some(&src[label + 1..def]),
},
Some(&src[label + 1..def]),
def + 1,
))
} else {
Some((
FnRef {
label: if label == 4 {
if label == 4 {
None
} else {
Some(&src[4..label])
},
definition: None,
},
None,
label + 1,
))
}
}
}
#[test]
fn parse() {
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(parse("[fn:1]").unwrap(), (Some("1"), None, "[fn:1]".len()));
assert_eq!(
FnRef::parse("[fn:1]").unwrap(),
(
FnRef {
label: Some("1"),
definition: None,
},
"[fn:1]".len()
)
parse("[fn:1:2]").unwrap(),
(Some("1"), Some("2"), "[fn:1:2]".len())
);
assert_eq!(
FnRef::parse("[fn:1:2]").unwrap(),
(
FnRef {
label: Some("1"),
definition: Some("2"),
},
"[fn:1:2]".len()
)
parse("[fn::2]").unwrap(),
(None, Some("2"), "[fn::2]".len())
);
assert_eq!(
FnRef::parse("[fn::2]").unwrap(),
(
FnRef {
label: None,
definition: Some("2"),
},
"[fn::2]".len()
)
parse("[fn::[]]").unwrap(),
(None, Some("[]"), "[fn::[]]".len())
);
assert_eq!(
FnRef::parse("[fn::[]]").unwrap(),
(
FnRef {
label: None,
definition: Some("[]"),
},
"[fn::[]]".len()
)
);
assert!(FnRef::parse("[fn::[]").is_none());
assert!(parse("[fn::[]").is_none());
}
}

View file

@ -1,23 +1,14 @@
use memchr::{memchr, memchr2};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct InlineCall<'a> {
pub name: &'a str,
pub args: &'a str,
// header args for block
pub inside_header: Option<&'a str>,
// header args for call line
pub end_header: Option<&'a str>,
}
impl<'a> InlineCall<'a> {
pub fn parse(src: &'a str) -> Option<(InlineCall, usize)> {
/// returns (name, args, inside_header, end_header)
#[inline]
pub fn parse(src: &str) -> Option<(&str, &str, Option<&str>, Option<&str>, usize)> {
debug_assert!(src.starts_with("call_"));
// TODO: refactor
let bytes = src.as_bytes();
let mut pos = memchr2(b'[', b'(', bytes)
.filter(|&i| bytes[5..i].iter().all(|c| c.is_ascii_graphic()))?;
let mut pos =
memchr2(b'[', b'(', bytes).filter(|&i| bytes[5..i].iter().all(|c| c.is_ascii_graphic()))?;
let mut pos_;
let name = &src[5..pos];
@ -50,66 +41,48 @@ impl<'a> InlineCall<'a> {
None
};
Some((
InlineCall {
name,
inside_header,
args,
end_header,
},
pos + 1,
))
}
Some((name, args, inside_header, end_header, pos + 1))
}
#[test]
fn parse() {
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
InlineCall::parse("call_square(4)").unwrap(),
(
InlineCall {
name: "square",
args: "4",
inside_header: None,
end_header: None,
},
"call_square(4)".len()
)
parse("call_square(4)").unwrap(),
("square", "4", None, None, "call_square(4)".len())
);
assert_eq!(
InlineCall::parse("call_square[:results output](4)").unwrap(),
parse("call_square[:results output](4)").unwrap(),
(
InlineCall {
name: "square",
args: "4",
inside_header: Some(":results output"),
end_header: None,
},
"square",
"4",
Some(":results output"),
None,
"call_square[:results output](4)".len()
)
);
assert_eq!(
InlineCall::parse("call_square(4)[:results html]").unwrap(),
parse("call_square(4)[:results html]").unwrap(),
(
InlineCall {
name: "square",
args: "4",
inside_header: None,
end_header: Some(":results html"),
},
"square",
"4",
None,
Some(":results html"),
"call_square(4)[:results html]".len()
)
);
assert_eq!(
InlineCall::parse("call_square[:results output](4)[:results html]").unwrap(),
parse("call_square[:results output](4)[:results html]").unwrap(),
(
InlineCall {
name: "square",
args: "4",
inside_header: Some(":results output"),
end_header: Some(":results html"),
},
"square",
"4",
Some(":results output"),
Some(":results html"),
"call_square[:results output](4)[:results html]".len()
)
);
}
}

View file

@ -1,15 +1,8 @@
use memchr::{memchr, memchr2};
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct InlineSrc<'a> {
pub lang: &'a str,
pub option: Option<&'a str>,
pub body: &'a str,
}
impl<'a> InlineSrc<'a> {
pub fn parse(src: &'a str) -> Option<(InlineSrc, usize)> {
/// returns (language, option, body, offset)
#[inline]
pub fn parse(src: &str) -> Option<(&str, Option<&str>, &str, usize)> {
debug_assert!(src.starts_with("src_"));
let bytes = src.as_bytes();
@ -17,61 +10,45 @@ impl<'a> InlineSrc<'a> {
.filter(|&i| i != 4 && bytes[4..i].iter().all(|c| !c.is_ascii_whitespace()))?;
if bytes[lang] == b'[' {
let option =
memchr(b']', bytes).filter(|&i| bytes[lang..i].iter().all(|c| *c != b'\n'))?;
let option = memchr(b']', bytes).filter(|&i| bytes[lang..i].iter().all(|c| *c != b'\n'))?;
let body = memchr(b'}', &bytes[option..])
.map(|i| i + option)
.filter(|&i| bytes[option..i].iter().all(|c| *c != b'\n'))?;
Some((
InlineSrc {
lang: &src[4..lang],
option: Some(&src[lang + 1..option]),
body: &src[option + 2..body],
},
&src[4..lang],
Some(&src[lang + 1..option]),
&src[option + 2..body],
body + 1,
))
} else {
let body =
memchr(b'}', bytes).filter(|&i| bytes[lang..i].iter().all(|c| *c != b'\n'))?;
let body = memchr(b'}', bytes).filter(|&i| bytes[lang..i].iter().all(|c| *c != b'\n'))?;
Some((
InlineSrc {
lang: &src[4..lang],
option: None,
body: &src[lang + 1..body],
},
body + 1,
))
}
Some((&src[4..lang], None, &src[lang + 1..body], body + 1))
}
}
#[test]
fn parse() {
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
InlineSrc::parse("src_C{int a = 0;}").unwrap(),
(
InlineSrc {
lang: "C",
option: None,
body: "int a = 0;"
},
"src_C{int a = 0;}".len()
)
parse("src_C{int a = 0;}").unwrap(),
("C", None, "int a = 0;", "src_C{int a = 0;}".len())
);
assert_eq!(
InlineSrc::parse("src_xml[:exports code]{<tag>text</tag>}").unwrap(),
parse("src_xml[:exports code]{<tag>text</tag>}").unwrap(),
(
InlineSrc {
lang: "xml",
option: Some(":exports code"),
body: "<tag>text</tag>"
},
"xml",
Some(":exports code"),
"<tag>text</tag>",
"src_xml[:exports code]{<tag>text</tag>}".len()
)
);
assert!(InlineSrc::parse("src_xml[:exports code]{<tag>text</tag>").is_none());
assert!(InlineSrc::parse("src_[:exports code]{<tag>text</tag>}").is_none());
assert!(InlineSrc::parse("src_xml[:exports code]").is_none());
assert!(parse("src_xml[:exports code]{<tag>text</tag>").is_none());
assert!(parse("src_[:exports code]{<tag>text</tag>}").is_none());
assert!(parse("src_xml[:exports code]").is_none());
}
}

View file

@ -1,14 +1,8 @@
use memchr::memchr;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Link<'a> {
pub path: &'a str,
pub desc: Option<&'a str>,
}
impl<'a> Link<'a> {
pub fn parse(src: &'a str) -> Option<(Link<'a>, usize)> {
/// returns (link path, link description, offset)
#[inline]
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize)> {
debug_assert!(src.starts_with("[["));
let bytes = src.as_bytes();
@ -19,53 +13,30 @@ impl<'a> Link<'a> {
})?;
if *bytes.get(path + 1)? == b']' {
Some((
Link {
path: &src[2..path],
desc: None,
},
path + 2,
))
Some((&src[2..path], None, path + 2))
} else if bytes[path + 1] == b'[' {
let desc = memchr(b']', &bytes[path + 2..])
.map(|i| i + path + 2)
.filter(|&i| bytes[path + 2..i].iter().all(|&c| c != b'['))?;
expect!(src, desc + 1, b']')?;
Some((
Link {
path: &src[2..path],
desc: Some(&src[path + 2..desc]),
},
desc + 2,
))
Some((&src[2..path], Some(&src[path + 2..desc]), desc + 2))
} else {
None
}
}
}
#[test]
fn parse() {
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(parse("[[#id]]").unwrap(), ("#id", None, "[[#id]]".len()));
assert_eq!(
Link::parse("[[#id]]").unwrap(),
(
Link {
path: "#id",
desc: None,
},
"[[#id]]".len()
)
parse("[[#id][desc]]").unwrap(),
("#id", Some("desc"), "[[#id][desc]]".len())
);
assert_eq!(
Link::parse("[[#id][desc]]").unwrap(),
(
Link {
path: "#id",
desc: Some("desc"),
},
"[[#id][desc]]".len()
)
);
assert!(Link::parse("[[#id][desc]").is_none());
assert!(parse("[[#id][desc]").is_none());
}
}

View file

@ -1,15 +1,9 @@
use jetscii::Substring;
use memchr::memchr2;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Macros<'a> {
pub name: &'a str,
pub args: Option<&'a str>,
}
impl<'a> Macros<'a> {
pub fn parse(src: &'a str) -> Option<(Macros<'a>, usize)> {
/// returns (macros name, macros arguments, offset)
#[inline]
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize)> {
debug_assert!(src.starts_with("{{{"));
expect!(src, 3, |c: u8| c.is_ascii_alphabetic())?;
@ -24,67 +18,45 @@ impl<'a> Macros<'a> {
Some(if bytes[name] == b'}' {
expect!(src, name + 1, b'}')?;
expect!(src, name + 2, b'}')?;
(
Macros {
name: &src[3..name],
args: None,
},
name + 3,
)
(&src[3..name], None, name + 3)
} else {
let end = Substring::new(")}}}")
.find(&src[name..])
.map(|i| i + name)?;
(
Macros {
name: &src[3..name],
args: if name == end {
&src[3..name],
if name == end {
None
} else {
Some(&src[name + 1..end])
},
},
end + 4,
)
})
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse("{{{poem(red,blue)}}}"),
Some(("poem", Some("red,blue"), "{{{poem(red,blue)}}}".len()))
);
assert_eq!(
parse("{{{poem())}}}"),
Some(("poem", Some(")"), "{{{poem())}}}".len()))
);
assert_eq!(
parse("{{{author}}}"),
Some(("author", None, "{{{author}}}".len()))
);
assert_eq!(parse("{{{0uthor}}}"), None);
assert_eq!(parse("{{{author}}"), None);
assert_eq!(parse("{{{poem(}}}"), None);
assert_eq!(parse("{{{poem)}}}"), None);
}
}
#[test]
fn parse() {
assert_eq!(
Macros::parse("{{{poem(red,blue)}}}"),
Some((
Macros {
name: "poem",
args: Some("red,blue")
},
"{{{poem(red,blue)}}}".len()
))
);
assert_eq!(
Macros::parse("{{{poem())}}}"),
Some((
Macros {
name: "poem",
args: Some(")")
},
"{{{poem())}}}".len()
))
);
assert_eq!(
Macros::parse("{{{author}}}"),
Some((
Macros {
name: "author",
args: None
},
"{{{author}}}".len()
))
);
assert_eq!(Macros::parse("{{{0uthor}}}"), None);
assert_eq!(Macros::parse("{{{author}}"), None);
assert_eq!(Macros::parse("{{{poem(}}}"), None);
assert_eq!(Macros::parse("{{{poem)}}}"), None);
}

View file

@ -5,37 +5,63 @@ mod inline_call;
mod inline_src;
mod link;
mod macros;
mod radio_target;
mod snippet;
mod target;
pub use self::cookie::Cookie;
pub use self::emphasis::Emphasis;
pub use self::fn_ref::FnRef;
pub use self::inline_call::InlineCall;
pub use self::inline_src::InlineSrc;
pub use self::link::Link;
pub use self::macros::Macros;
pub use self::snippet::Snippet;
pub use self::target::{RadioTarget, Target};
use jetscii::bytes;
#[cfg_attr(test, derive(PartialEq, Debug))]
pub enum Object<'a> {
Cookie(Cookie<'a>),
FnRef(FnRef<'a>),
InlineCall(InlineCall<'a>),
InlineSrc(InlineSrc<'a>),
Link(Link<'a>),
Macros(Macros<'a>),
RadioTarget(RadioTarget<'a>),
Snippet(Snippet<'a>),
Target(Target<'a>),
FnRef {
label: Option<&'a str>,
def: Option<&'a str>,
},
InlineCall {
name: &'a str,
args: &'a str,
inside_header: Option<&'a str>,
end_header: Option<&'a str>,
},
InlineSrc {
lang: &'a str,
option: Option<&'a str>,
body: &'a str,
},
Link {
path: &'a str,
desc: Option<&'a str>,
},
Macros {
name: &'a str,
args: Option<&'a str>,
},
RadioTarget {
target: &'a str,
},
Snippet {
name: &'a str,
value: &'a str,
},
Target {
target: &'a str,
},
// `end` indicates the position of the second marker
Bold { end: usize },
Italic { end: usize },
Strike { end: usize },
Underline { end: usize },
Bold {
end: usize,
},
Italic {
end: usize,
},
Strike {
end: usize,
},
Underline {
end: usize,
},
Verbatim(&'a str),
Code(&'a str),
@ -68,40 +94,40 @@ impl<'a> Object<'a> {
match bytes[pos] {
b'@' if bytes[pos + 1] == b'@' => {
if let Some((snippet, off)) = Snippet::parse(&src[pos..]) {
brk!(Object::Snippet(snippet), off, pos);
if let Some((name, value, off)) = snippet::parse(&src[pos..]) {
brk!(Object::Snippet { name, value }, off, pos);
}
}
b'{' if bytes[pos + 1] == b'{' && bytes[pos + 2] == b'{' => {
if let Some((macros, off)) = Macros::parse(&src[pos..]) {
brk!(Object::Macros(macros), off, pos);
if let Some((name, args, off)) = macros::parse(&src[pos..]) {
brk!(Object::Macros { name, args }, off, pos);
}
}
b'<' if bytes[pos + 1] == b'<' => {
if bytes[pos + 2] == b'<' {
if let Some((target, off)) = RadioTarget::parse(&src[pos..]) {
brk!(Object::RadioTarget(target), off, pos);
if let Some((target, off)) = radio_target::parse(&src[pos..]) {
brk!(Object::RadioTarget { target }, off, pos);
}
} else if bytes[pos + 2] != b'\n' {
if let Some((target, off)) = Target::parse(&src[pos..]) {
brk!(Object::Target(target), off, pos);
if let Some((target, off)) = target::parse(&src[pos..]) {
brk!(Object::Target { target }, off, pos);
}
}
}
b'[' => {
if bytes[pos + 1..].starts_with(b"fn:") {
if let Some((fn_ref, off)) = FnRef::parse(&src[pos..]) {
brk!(Object::FnRef(fn_ref), off, pos);
if let Some((label, def, off)) = fn_ref::parse(&src[pos..]) {
brk!(Object::FnRef { label, def }, off, pos);
}
}
if bytes[pos + 1] == b'[' {
if let Some((link, off)) = Link::parse(&src[pos..]) {
brk!(Object::Link(link), off, pos);
if let Some((path, desc, off)) = link::parse(&src[pos..]) {
brk!(Object::Link { path, desc }, off, pos);
}
}
if let Some((cookie, off)) = Cookie::parse(&src[pos..]) {
if let Some((cookie, off)) = cookie::parse(&src[pos..]) {
brk!(Object::Cookie(cookie), off, pos);
}
// TODO: Timestamp
@ -112,43 +138,54 @@ impl<'a> Object<'a> {
match bytes[pre] {
b'*' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'*') {
if let Some(end) = emphasis::parse(&src[pre..], b'*') {
brk!(Object::Bold { end }, 1, pre);
}
}
b'+' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'+') {
if let Some(end) = emphasis::parse(&src[pre..], b'+') {
brk!(Object::Strike { end }, 1, pre);
}
}
b'/' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'/') {
if let Some(end) = emphasis::parse(&src[pre..], b'/') {
brk!(Object::Italic { end }, 1, pre);
}
}
b'_' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'_') {
if let Some(end) = emphasis::parse(&src[pre..], b'_') {
brk!(Object::Underline { end }, 1, pre);
}
}
b'=' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'=') {
if let Some(end) = emphasis::parse(&src[pre..], b'=') {
brk!(Object::Verbatim(&src[pre + 1..pre + end]), end + 1, pre);
}
}
b'~' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'~') {
if let Some(end) = emphasis::parse(&src[pre..], b'~') {
brk!(Object::Code(&src[pre + 1..pre + end]), end + 1, pre);
}
}
b'c' if src[pre..].starts_with("call_") => {
if let Some((call, off)) = InlineCall::parse(&src[pre..]) {
brk!(Object::InlineCall(call), off, pre);
if let Some((name, args, inside_header, end_header, off)) =
inline_call::parse(&src[pre..])
{
brk!(
Object::InlineCall {
name,
args,
inside_header,
end_header,
},
off,
pre
);
}
}
b's' if src[pre..].starts_with("src_") => {
if let Some((src, off)) = InlineSrc::parse(&src[pre..]) {
brk!(Object::InlineSrc(src), off, pre);
if let Some((lang, option, body, off)) = inline_src::parse(&src[pre..]) {
brk!(Object::InlineSrc { lang, option, body }, off, pre);
}
}
_ => (),

View file

@ -0,0 +1,44 @@
use jetscii::Substring;
// TODO: text-markup, entities, latex-fragments, subscript and superscript
#[inline]
pub fn parse(src: &str) -> Option<(&str, usize)> {
debug_assert!(src.starts_with("<<<"));
expect!(src, 3, |c| c != b' ')?;
let end = Substring::new(">>>").find(src).filter(|&i| {
src.as_bytes()[3..i]
.iter()
.all(|&c| c != b'<' && c != b'\n' && c != b'>')
})?;
if src.as_bytes()[end - 1] == b' ' {
return None;
}
Some((&src[3..end], end + 3))
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
parse("<<<target>>>").unwrap(),
("target", "<<<target>>>".len())
);
assert_eq!(
parse("<<<tar get>>>").unwrap(),
("tar get", "<<<tar get>>>".len())
);
assert_eq!(parse("<<<target >>>"), None);
assert_eq!(parse("<<< target>>>"), None);
assert_eq!(parse("<<<ta<get>>>"), None);
assert_eq!(parse("<<<ta>get>>>"), None);
assert_eq!(parse("<<<ta\nget>>>"), None);
assert_eq!(parse("<<<target>>"), None);
}
}

View file

@ -1,15 +1,9 @@
use jetscii::Substring;
use memchr::memchr;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Snippet<'a> {
pub name: &'a str,
pub value: &'a str,
}
impl<'a> Snippet<'a> {
pub fn parse(src: &'a str) -> Option<(Snippet<'a>, usize)> {
/// returns (snippet name, snippet value, offset)
#[inline]
pub fn parse(src: &str) -> Option<(&str, &str, usize)> {
debug_assert!(src.starts_with("@@"));
let name = memchr(b':', src.as_bytes()).filter(|&i| {
@ -23,49 +17,30 @@ impl<'a> Snippet<'a> {
.find(&src[name + 1..])
.map(|i| i + name + 1)?;
Some((
Snippet {
name: &src[2..name],
value: &src[name + 1..end],
},
end + 2,
))
}
Some((&src[2..name], &src[name + 1..end], end + 2))
}
#[test]
fn parse() {
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(
Snippet::parse("@@html:<b>@@").unwrap(),
(
Snippet {
name: "html",
value: "<b>"
},
"@@html:<b>@@".len()
)
parse("@@html:<b>@@").unwrap(),
("html", "<b>", "@@html:<b>@@".len())
);
assert_eq!(
Snippet::parse("@@latex:any arbitrary LaTeX code@@").unwrap(),
parse("@@latex:any arbitrary LaTeX code@@").unwrap(),
(
Snippet {
name: "latex",
value: "any arbitrary LaTeX code"
},
"latex",
"any arbitrary LaTeX code",
"@@latex:any arbitrary LaTeX code@@".len()
)
);
assert_eq!(
Snippet::parse("@@html:@@").unwrap(),
(
Snippet {
name: "html",
value: ""
},
"@@html:@@".len()
)
);
assert!(Snippet::parse("@@html:<b>@").is_none());
assert!(Snippet::parse("@@html<b>@@").is_none());
assert!(Snippet::parse("@@:<b>@@").is_none());
assert_eq!(parse("@@html:@@").unwrap(), ("html", "", "@@html:@@".len()));
assert!(parse("@@html:<b>@").is_none());
assert!(parse("@@html<b>@@").is_none());
assert!(parse("@@:<b>@@").is_none());
}
}

View file

@ -1,34 +1,7 @@
use jetscii::Substring;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
// TODO: text-markup, entities, latex-fragments, subscript and superscript
pub struct RadioTarget<'a>(&'a str);
impl<'a> RadioTarget<'a> {
pub fn parse(src: &'a str) -> Option<(RadioTarget<'a>, usize)> {
debug_assert!(src.starts_with("<<<"));
expect!(src, 3, |c| c != b' ')?;
let end = Substring::new(">>>").find(src).filter(|&i| {
src.as_bytes()[3..i]
.iter()
.all(|&c| c != b'<' && c != b'\n' && c != b'>')
})?;
expect!(src, end - 1, |c| c != b' ')?;
Some((RadioTarget(&src[3..end]), end + 3))
}
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Target<'a>(&'a str);
impl<'a> Target<'a> {
pub fn parse(src: &'a str) -> Option<(Target<'a>, usize)> {
#[inline]
pub fn parse(src: &str) -> Option<(&str, usize)> {
debug_assert!(src.starts_with("<<"));
expect!(src, 2, |c| c != b' ')?;
@ -39,41 +12,29 @@ impl<'a> Target<'a> {
.all(|&c| c != b'<' && c != b'\n' && c != b'>')
})?;
expect!(src, end - 1, |c| c != b' ')?;
if src.as_bytes()[end - 1] == b' ' {
return None;
}
Some((Target(&src[2..end]), end + 2))
Some((&src[2..end], end + 2))
}
#[cfg(test)]
mod tests {
#[test]
fn parse() {
use super::parse;
assert_eq!(parse("<<target>>").unwrap(), ("target", "<<target>>".len()));
assert_eq!(
parse("<<tar get>>").unwrap(),
("tar get", "<<tar get>>".len())
);
assert_eq!(parse("<<target >>"), None);
assert_eq!(parse("<< target>>"), None);
assert_eq!(parse("<<ta<get>>"), None);
assert_eq!(parse("<<ta>get>>"), None);
assert_eq!(parse("<<ta\nget>>"), None);
assert_eq!(parse("<<target>"), None);
}
}
#[test]
fn parse() {
assert_eq!(
RadioTarget::parse("<<<target>>>").unwrap(),
(RadioTarget("target"), "<<<target>>>".len())
);
assert_eq!(
RadioTarget::parse("<<<tar get>>>").unwrap(),
(RadioTarget("tar get"), "<<<tar get>>>".len())
);
assert_eq!(RadioTarget::parse("<<<target >>>"), None);
assert_eq!(RadioTarget::parse("<<< target>>>"), None);
assert_eq!(RadioTarget::parse("<<<ta<get>>>"), None);
assert_eq!(RadioTarget::parse("<<<ta>get>>>"), None);
assert_eq!(RadioTarget::parse("<<<ta\nget>>>"), None);
assert_eq!(RadioTarget::parse("<<<target>>"), None);
assert_eq!(
Target::parse("<<target>>").unwrap(),
(Target("target"), "<<target>>".len())
);
assert_eq!(
Target::parse("<<tar get>>").unwrap(),
(Target("tar get"), "<<tar get>>".len())
);
assert_eq!(Target::parse("<<target >>"), None);
assert_eq!(Target::parse("<< target>>"), None);
assert_eq!(Target::parse("<<ta<get>>"), None);
assert_eq!(Target::parse("<<ta>get>>"), None);
assert_eq!(Target::parse("<<ta\nget>>"), None);
assert_eq!(Target::parse("<<target>"), None);
}

View file

@ -1,10 +1,12 @@
//! Parser
use crate::elements::*;
use crate::headline::*;
use crate::objects::*;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Copy, Clone, Debug)]
pub enum Container {
enum Container {
Headline {
beg: usize,
end: usize,
@ -143,14 +145,39 @@ pub enum Event<'a> {
Rule,
Cookie(Cookie<'a>),
FnRef(FnRef<'a>),
InlineCall(InlineCall<'a>),
InlineSrc(InlineSrc<'a>),
Link(Link<'a>),
Macros(Macros<'a>),
RadioTarget(RadioTarget<'a>),
Snippet(Snippet<'a>),
Target(Target<'a>),
FnRef {
label: Option<&'a str>,
def: Option<&'a str>,
},
InlineCall {
name: &'a str,
args: &'a str,
inside_header: Option<&'a str>,
end_header: Option<&'a str>,
},
InlineSrc {
lang: &'a str,
option: Option<&'a str>,
body: &'a str,
},
Link {
path: &'a str,
desc: Option<&'a str>,
},
Macros {
name: &'a str,
args: Option<&'a str>,
},
RadioTarget {
target: &'a str,
},
Snippet {
name: &'a str,
value: &'a str,
},
Target {
target: &'a str,
},
BoldBeg,
BoldEnd,
@ -176,6 +203,7 @@ pub struct Parser<'a> {
}
impl<'a> Parser<'a> {
/// creates a new parser from string
pub fn new(text: &'a str) -> Parser<'a> {
Parser {
text,
@ -187,10 +215,12 @@ impl<'a> Parser<'a> {
}
}
/// returns current offset
pub fn offset(&self) -> usize {
self.off
}
/// returns current stack depth
pub fn stack_depth(&self) -> usize {
self.stack.len()
}
@ -359,16 +389,26 @@ impl<'a> Parser<'a> {
Object::Bold { .. } => Event::BoldBeg,
Object::Code(c) => Event::Code(c),
Object::Cookie(c) => Event::Cookie(c),
Object::FnRef(f) => Event::FnRef(f),
Object::InlineCall(i) => Event::InlineCall(i),
Object::InlineSrc(i) => Event::InlineSrc(i),
Object::FnRef { label, def } => Event::FnRef { label, def },
Object::InlineCall {
name,
args,
inside_header,
end_header,
} => Event::InlineCall {
name,
args,
inside_header,
end_header,
},
Object::InlineSrc { lang, option, body } => Event::InlineSrc { lang, option, body },
Object::Italic { .. } => Event::ItalicBeg,
Object::Link(l) => Event::Link(l),
Object::Macros(m) => Event::Macros(m),
Object::RadioTarget(r) => Event::RadioTarget(r),
Object::Snippet(s) => Event::Snippet(s),
Object::Link { path, desc } => Event::Link { path, desc },
Object::Macros { name, args } => Event::Macros { name, args },
Object::RadioTarget { target } => Event::RadioTarget { target },
Object::Snippet { name, value } => Event::Snippet { name, value },
Object::Strike { .. } => Event::StrikeBeg,
Object::Target(t) => Event::Target(t),
Object::Target { target } => Event::Target { target },
Object::Text(t) => Event::Text(t),
Object::Underline { .. } => Event::UnderlineBeg,
Object::Verbatim(v) => Event::Verbatim(v),
@ -376,7 +416,7 @@ impl<'a> Parser<'a> {
}
fn next_list_item(&mut self, ident: usize, end: usize) -> Event<'a> {
let (bullet, off, cont_end, end, has_more) = List::parse(&self.text[self.off..end], ident);
let (bullet, off, cont_end, end, has_more) = list::parse(&self.text[self.off..end], ident);
self.stack.push(Container::ListItem {
cont_end: self.off + cont_end,
end: self.off + end,
@ -386,6 +426,7 @@ impl<'a> Parser<'a> {
Event::ListItemBeg { bullet }
}
#[inline]
fn end(&mut self) -> Event<'a> {
match self.stack.pop().unwrap() {
Container::Bold { .. } => Event::BoldEnd,
@ -482,7 +523,13 @@ fn parse() {
use self::Event::*;
let expected = vec![
HeadlineBeg(Headline::new(1, None, None, "Title 1", None)),
HeadlineBeg(Headline {
level: 1,
priority: None,
keyword: None,
title: "Title 1",
tags: None,
}),
SectionBeg,
ParagraphBeg,
BoldBeg,
@ -490,7 +537,13 @@ fn parse() {
BoldEnd,
ParagraphEnd,
SectionEnd,
HeadlineBeg(Headline::new(2, None, None, "Title 2", None)),
HeadlineBeg(Headline {
level: 2,
priority: None,
keyword: None,
title: "Title 2",
tags: None,
}),
SectionBeg,
ParagraphBeg,
UnderlineBeg,
@ -500,7 +553,13 @@ fn parse() {
SectionEnd,
HeadlineEnd,
HeadlineEnd,
HeadlineBeg(Headline::new(1, None, None, "Title 3", None)),
HeadlineBeg(Headline {
level: 1,
priority: None,
keyword: None,
title: "Title 3",
tags: None,
}),
SectionBeg,
ParagraphBeg,
ItalicBeg,
@ -509,7 +568,13 @@ fn parse() {
ParagraphEnd,
SectionEnd,
HeadlineEnd,
HeadlineBeg(Headline::new(1, None, None, "Title 4", None)),
HeadlineBeg(Headline {
level: 1,
priority: None,
keyword: None,
title: "Title 4",
tags: None,
}),
SectionBeg,
ParagraphBeg,
Verbatim("Section 4"),