diff --git a/Cargo.toml b/Cargo.toml index 1760925..9f35dbc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "orgize" -version = "0.1.2" +version = "0.1.3" authors = ["PoiScript "] description = "A Rust library for parsing orgmode files." repository = "https://github.com/PoiScript/orgize" diff --git a/examples/convert.rs b/examples/convert.rs new file mode 100644 index 0000000..5e10cac --- /dev/null +++ b/examples/convert.rs @@ -0,0 +1,33 @@ +use std::env; +use std::fs::File; +use std::io::Cursor; +use std::io::Read; + +use orgize::export::{HtmlHandler, Render}; + +fn main() { + let args: Vec = env::args().collect(); + + if args.len() < 2 { + println!("Usage: {} ", args[0]); + return; + } + + let mut file = File::open(&args[1]).expect(&format!("file {} not found", &args[1])); + + let mut contents = String::new(); + file.read_to_string(&mut contents) + .expect("something went wrong reading the file"); + + let cursor = Cursor::new(Vec::new()); + let handler = HtmlHandler; + let mut render = Render::new(handler, cursor, &contents); + + render + .render() + .expect("something went wrong rendering the file"); + println!( + "{}", + String::from_utf8(render.into_wirter().into_inner()).expect("invalid utf-8") + ); +} diff --git a/src/elements/block.rs b/src/elements/block.rs index 1e75cdc..a737518 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -1,61 +1,61 @@ use crate::lines::Lines; use memchr::memchr2; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct Block; +// return (name, args, contents-begin, contents-end, end) +#[inline] +pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { + debug_assert!(src.starts_with("#+")); -impl Block { - // return (name, args, contents-begin, contents-end, end) - pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { - debug_assert!(src.starts_with("#+")); - - if !src[2..8].eq_ignore_ascii_case("BEGIN_") { - return None; - } - - let name = memchr2(b' ', b'\n', src.as_bytes()) - .filter(|&i| src.as_bytes()[8..i].iter().all(|c| c.is_ascii_alphabetic()))?; - let mut lines = Lines::new(src); - let (pre_cont_end, cont_beg, _) = lines.next()?; - let args = if pre_cont_end == name { - None - } else { - Some(&src[name..pre_cont_end]) - }; - let name = &src[8..name]; - let end_line = format!(r"#+END_{}", name); - let mut pre_end = cont_beg; - - for (_, end, line) in lines { - if line.trim().eq_ignore_ascii_case(&end_line) { - return Some((name, args, cont_beg, pre_end, end)); - } else { - pre_end = end; - } - } - - None + if src[2..8].to_uppercase() != "BEGIN_" { + return None; } + + let name = memchr2(b' ', b'\n', src.as_bytes()) + .filter(|&i| src.as_bytes()[8..i].iter().all(|c| c.is_ascii_alphabetic()))?; + let mut lines = Lines::new(src); + let (pre_cont_end, cont_beg, _) = lines.next()?; + let args = if pre_cont_end == name { + None + } else { + Some(&src[name..pre_cont_end]) + }; + let name = &src[8..name]; + let end_line = format!(r"#+END_{}", name.to_uppercase()); + let mut pre_end = cont_beg; + + for (_, end, line) in lines { + if line.trim() == end_line { + return Some((name, args, cont_beg, pre_end, end)); + } else { + pre_end = end; + } + } + + None } -#[test] -fn parse() { - assert_eq!( - Block::parse("#+BEGIN_SRC\n#+END_SRC"), - Some(("SRC", None, 12, 12, 21)) - ); - assert_eq!( - Block::parse( - r#"#+BEGIN_SRC rust +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + assert_eq!( + parse("#+BEGIN_SRC\n#+END_SRC"), + Some(("SRC", None, 12, 12, 21)) + ); + assert_eq!( + parse( + r#"#+BEGIN_SRC rust fn main() { // print "Hello World!" to the console println!("Hello World!"); } #+END_SRC "# - ), - Some(("SRC", Some(" rust"), 17, 104, 114)) - ); - // TODO: more testing + ), + Some(("SRC", Some(" rust"), 17, 104, 114)) + ); + // TODO: more testing + } } diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index 2dbed0c..78df32f 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -1,62 +1,62 @@ use crate::lines::Lines; use memchr::memchr2; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct DynBlock; +// return (name, parameters, contents-begin, contents-end, end) +#[inline] +pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { + debug_assert!(src.starts_with("#+")); -impl DynBlock { - // return (name, parameters, contents-begin, contents-end, end) - pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { - debug_assert!(src.starts_with("#+")); - - if !src[2..9].eq_ignore_ascii_case("BEGIN: ") { - return None; - } - - let bytes = src.as_bytes(); - let args = eol!(src); - let name = memchr2(b' ', b'\n', &bytes[9..]) - .map(|i| i + 9) - .filter(|&i| { - src.as_bytes()[9..i] - .iter() - .all(|&c| c.is_ascii_alphabetic()) - })?; - let mut lines = Lines::new(src); - let (mut pre_cont_end, _, _) = lines.next()?; - - for (cont_end, end, line) in lines { - if line.trim().eq_ignore_ascii_case("#+END:") { - return Some(( - &src[8..name].trim(), - if name == args { - None - } else { - Some(&src[name..args].trim()) - }, - args, - pre_cont_end, - end, - )); - } - pre_cont_end = cont_end; - } - - None + if !src[2..9].eq_ignore_ascii_case("BEGIN: ") { + return None; } + + let bytes = src.as_bytes(); + let args = eol!(src); + let name = memchr2(b' ', b'\n', &bytes[9..]) + .map(|i| i + 9) + .filter(|&i| { + src.as_bytes()[9..i] + .iter() + .all(|&c| c.is_ascii_alphabetic()) + })?; + let mut lines = Lines::new(src); + let (mut pre_cont_end, _, _) = lines.next()?; + + for (cont_end, end, line) in lines { + if line.trim().eq_ignore_ascii_case("#+END:") { + return Some(( + &src[8..name].trim(), + if name == args { + None + } else { + Some(&src[name..args].trim()) + }, + args, + pre_cont_end, + end, + )); + } + pre_cont_end = cont_end; + } + + None } -#[test] -fn parse() { - // TODO: testing - assert_eq!( - DynBlock::parse( - r"#+BEGIN: clocktable :scope file +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + // TODO: testing + assert_eq!( + parse( + r"#+BEGIN: clocktable :scope file CONTENTS #+END: " - ), - Some(("clocktable", Some(":scope file"), 31, 40, 48)) - ) + ), + Some(("clocktable", Some(":scope file"), 31, 40, 48)) + ) + } } diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs index 6d4b3eb..3026398 100644 --- a/src/elements/fn_def.rs +++ b/src/elements/fn_def.rs @@ -1,57 +1,54 @@ use memchr::memchr; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct FnDef; +#[inline] +pub fn parse(src: &str) -> Option<(&str, &str, usize)> { + debug_assert!(src.starts_with("[fn:")); -impl FnDef { - pub fn parse(src: &str) -> Option<(&str, &str, usize)> { - debug_assert!(src.starts_with("[fn:")); + let label = memchr(b']', src.as_bytes()).filter(|&i| { + i != 4 + && src.as_bytes()[4..i] + .iter() + .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') + })?; - let label = memchr(b']', src.as_bytes()).filter(|&i| { - i != 4 - && src.as_bytes()[4..i] - .iter() - .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') - })?; + let end = eol!(src); - let end = eol!(src); + Some((&src[4..label], &src[label + 1..end], end)) +} - Some((&src[4..label], &src[label + 1..end], end)) +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + assert_eq!( + parse("[fn:1] https://orgmode.org").unwrap(), + ( + "1", + " https://orgmode.org", + "[fn:1] https://orgmode.org".len() + ) + ); + assert_eq!( + parse("[fn:word_1] https://orgmode.org").unwrap(), + ( + "word_1", + " https://orgmode.org", + "[fn:word_1] https://orgmode.org".len() + ) + ); + assert_eq!( + parse("[fn:WORD-1] https://orgmode.org").unwrap(), + ( + "WORD-1", + " https://orgmode.org", + "[fn:WORD-1] https://orgmode.org".len() + ) + ); + assert_eq!(parse("[fn:WORD]").unwrap(), ("WORD", "", "[fn:WORD]".len())); + assert!(parse("[fn:] https://orgmode.org").is_none()); + assert!(parse("[fn:wor d] https://orgmode.org").is_none()); + assert!(parse("[fn:WORD https://orgmode.org").is_none()); } } - -#[test] -fn parse() { - assert_eq!( - FnDef::parse("[fn:1] https://orgmode.org").unwrap(), - ( - "1", - " https://orgmode.org", - "[fn:1] https://orgmode.org".len() - ) - ); - assert_eq!( - FnDef::parse("[fn:word_1] https://orgmode.org").unwrap(), - ( - "word_1", - " https://orgmode.org", - "[fn:word_1] https://orgmode.org".len() - ) - ); - assert_eq!( - FnDef::parse("[fn:WORD-1] https://orgmode.org").unwrap(), - ( - "WORD-1", - " https://orgmode.org", - "[fn:WORD-1] https://orgmode.org".len() - ) - ); - assert_eq!( - FnDef::parse("[fn:WORD]").unwrap(), - ("WORD", "", "[fn:WORD]".len()) - ); - assert!(FnDef::parse("[fn:] https://orgmode.org").is_none()); - assert!(FnDef::parse("[fn:wor d] https://orgmode.org").is_none()); - assert!(FnDef::parse("[fn:WORD https://orgmode.org").is_none()); -} diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs index 4bf7fd2..dfbe9a6 100644 --- a/src/elements/keyword.rs +++ b/src/elements/keyword.rs @@ -1,7 +1,5 @@ use memchr::{memchr, memchr2}; -pub struct Keyword; - #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub enum Key<'a> { @@ -24,114 +22,116 @@ pub enum Key<'a> { Call, } -impl Keyword { - // return (key, value, offset) - pub fn parse(src: &str) -> Option<(Key<'_>, &str, usize)> { - debug_assert!(src.starts_with("#+")); +pub fn parse(src: &str) -> Option<(Key<'_>, &str, usize)> { + debug_assert!(src.starts_with("#+")); - let bytes = src.as_bytes(); - let key_end = memchr2(b':', b'[', bytes).filter(|&i| { - bytes[2..i] - .iter() - .all(|&c| c.is_ascii_alphabetic() || c == b'_') - })?; + let bytes = src.as_bytes(); + let key_end = memchr2(b':', b'[', bytes).filter(|&i| { + bytes[2..i] + .iter() + .all(|&c| c.is_ascii_alphabetic() || c == b'_') + })?; - let option = if bytes[key_end] == b'[' { - let option = - memchr(b']', bytes).filter(|&i| bytes[key_end..i].iter().all(|&c| c != b'\n'))?; - expect!(src, option + 1, b':')?; - option + 1 - } else { - key_end - }; + let option = if bytes[key_end] == b'[' { + let option = + memchr(b']', bytes).filter(|&i| bytes[key_end..i].iter().all(|&c| c != b'\n'))?; + expect!(src, option + 1, b':')?; + option + 1 + } else { + key_end + }; - // includes the eol character - let end = memchr::memchr(b'\n', src.as_bytes()) - .map(|i| i + 1) - .unwrap_or_else(|| src.len()); + // includes the eol character + let end = memchr::memchr(b'\n', src.as_bytes()) + .map(|i| i + 1) + .unwrap_or_else(|| src.len()); - Some(( - match &src[2..key_end] { - key if key.eq_ignore_ascii_case("CAPTION") => Key::Caption { - option: if key_end == option { - None - } else { - Some(&src[key_end + 1..option - 1]) - }, + Some(( + match src[2..key_end].to_uppercase().as_str() { + "AUTHOR" => Key::Author, + "CALL" => Key::Call, + "DATE" => Key::Date, + "HEADER" => Key::Header, + "NAME" => Key::Name, + "PLOT" => Key::Plot, + "TITLE" => Key::Title, + "RESULTS" => Key::Results { + option: if key_end == option { + None + } else { + Some(&src[key_end + 1..option - 1]) }, - key if key.eq_ignore_ascii_case("HEADER") => Key::Header, - key if key.eq_ignore_ascii_case("NAME") => Key::Name, - key if key.eq_ignore_ascii_case("PLOT") => Key::Plot, - key if key.eq_ignore_ascii_case("RESULTS") => Key::Results { - option: if key_end == option { - None - } else { - Some(&src[key_end + 1..option - 1]) - }, - }, - key if key.eq_ignore_ascii_case("AUTHOR") => Key::Author, - key if key.eq_ignore_ascii_case("DATE") => Key::Date, - key if key.eq_ignore_ascii_case("TITLE") => Key::Title, - key if key.eq_ignore_ascii_case("CALL") => Key::Call, - key if key.starts_with("ATTR_") => Key::Attr { - backend: &src["#+ATTR_".len()..key_end], - }, - key => Key::Custom(key), }, - &src[option + 1..end].trim(), - end, - )) + "CAPTION" => Key::Caption { + option: if key_end == option { + None + } else { + Some(&src[key_end + 1..option - 1]) + }, + }, + key if key.starts_with("ATTR_") => Key::Attr { + backend: &src["#+ATTR_".len()..key_end], + }, + _ => Key::Custom(&src[2..key_end]), + }, + &src[option + 1..end].trim(), + end, + )) +} + +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::*; + + assert_eq!( + parse("#+KEY:"), + Some((Key::Custom("KEY"), "", "#+KEY:".len())) + ); + assert_eq!( + parse("#+KEY: VALUE"), + Some((Key::Custom("KEY"), "VALUE", "#+KEY: VALUE".len())) + ); + assert_eq!( + parse("#+K_E_Y: VALUE"), + Some((Key::Custom("K_E_Y"), "VALUE", "#+K_E_Y: VALUE".len())) + ); + assert_eq!( + parse("#+KEY:VALUE\n"), + Some((Key::Custom("KEY"), "VALUE", "#+KEY:VALUE\n".len())) + ); + assert!(parse("#+KE Y: VALUE").is_none()); + assert!(parse("#+ KEY: VALUE").is_none()); + + assert_eq!( + parse("#+RESULTS:"), + Some((Key::Results { option: None }, "", "#+RESULTS:".len())) + ); + + assert_eq!( + parse("#+ATTR_LATEX: :width 5cm"), + Some(( + Key::Attr { backend: "LATEX" }, + ":width 5cm", + "#+ATTR_LATEX: :width 5cm".len() + )) + ); + + assert_eq!( + parse("#+CALL: double(n=4)"), + Some((Key::Call, "double(n=4)", "#+CALL: double(n=4)".len())) + ); + + assert_eq!( + parse("#+CAPTION[Short caption]: Longer caption."), + Some(( + Key::Caption { + option: Some("Short caption") + }, + "Longer caption.", + "#+CAPTION[Short caption]: Longer caption.".len() + )) + ); } } - -#[test] -fn parse() { - assert_eq!( - Keyword::parse("#+KEY:"), - Some((Key::Custom("KEY"), "", "#+KEY:".len())) - ); - assert_eq!( - Keyword::parse("#+KEY: VALUE"), - Some((Key::Custom("KEY"), "VALUE", "#+KEY: VALUE".len())) - ); - assert_eq!( - Keyword::parse("#+K_E_Y: VALUE"), - Some((Key::Custom("K_E_Y"), "VALUE", "#+K_E_Y: VALUE".len())) - ); - assert_eq!( - Keyword::parse("#+KEY:VALUE\n"), - Some((Key::Custom("KEY"), "VALUE", "#+KEY:VALUE\n".len())) - ); - assert!(Keyword::parse("#+KE Y: VALUE").is_none()); - assert!(Keyword::parse("#+ KEY: VALUE").is_none()); - - assert_eq!( - Keyword::parse("#+RESULTS:"), - Some((Key::Results { option: None }, "", "#+RESULTS:".len())) - ); - - assert_eq!( - Keyword::parse("#+ATTR_LATEX: :width 5cm"), - Some(( - Key::Attr { backend: "LATEX" }, - ":width 5cm", - "#+ATTR_LATEX: :width 5cm".len() - )) - ); - - assert_eq!( - Keyword::parse("#+CALL: double(n=4)"), - Some((Key::Call, "double(n=4)", "#+CALL: double(n=4)".len())) - ); - - assert_eq!( - Keyword::parse("#+CAPTION[Short caption]: Longer caption."), - Some(( - Key::Caption { - option: Some("Short caption") - }, - "Longer caption.", - "#+CAPTION[Short caption]: Longer caption.".len() - )) - ); -} diff --git a/src/elements/list.rs b/src/elements/list.rs index 11101fc..8bb9965 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -1,157 +1,159 @@ use crate::lines::Lines; -pub struct List; - -impl List { - #[inline] - pub fn is_item(src: &str) -> (bool, bool) { - if src.is_empty() { - return (false, false); - } - let bytes = src.as_bytes(); - let (i, ordered) = match bytes[0] { - b'*' | b'-' | b'+' => (1, false), - b'0'...b'9' => { - let i = bytes - .iter() - .position(|&c| !c.is_ascii_digit()) - .unwrap_or_else(|| src.len() - 1); - let c = bytes[i]; - if !(c == b'.' || c == b')') { - return (false, false); - } - (i + 1, true) - } - _ => return (false, false), - }; - - if i < src.len() { - // bullet is follwed by a space or line ending - (bytes[i] == b' ' || bytes[i] == b'\n', ordered) - } else { - (false, false) - } +#[inline] +pub fn is_item(src: &str) -> (bool, bool) { + if src.is_empty() { + return (false, false); } - - // returns (bullets, contents begin, contents end, end, has more) - pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) { - debug_assert!(Self::is_item(&src[ident..]).0); - debug_assert!( - src[..ident].chars().all(|c| c == ' ' || c == '\t'), - "{:?} doesn't starts with indentation {}", - src, - ident - ); - - let mut lines = Lines::new(src); - let (mut pre_cont_end, mut pre_end, first_line) = lines.next().unwrap(); - let beg = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) { - Some(i) => i + ident + 1, - None => { - let len = first_line.len(); - return ( - &first_line, - len, - len, - len, - Self::is_item(lines.next().unwrap().2).0, - ); + let bytes = src.as_bytes(); + let (i, ordered) = match bytes[0] { + b'*' | b'-' | b'+' => (1, false), + b'0'...b'9' => { + let i = bytes + .iter() + .position(|&c| !c.is_ascii_digit()) + .unwrap_or_else(|| src.len() - 1); + let c = bytes[i]; + if !(c == b'.' || c == b')') { + return (false, false); } - }; - let bullet = &src[0..beg]; + (i + 1, true) + } + _ => return (false, false), + }; - while let Some((mut cont_end, mut end, mut line)) = lines.next() { - // this line is emtpy - if line.is_empty() { - if let Some((next_cont_end, next_end, next_line)) = lines.next() { - // next line is emtpy, too - if next_line.is_empty() { - return (bullet, beg, pre_cont_end, next_end, false); - } else { - // move to next line - pre_end = end; - cont_end = next_cont_end; - end = next_end; - line = next_line; - } + if i < src.len() { + // bullet is follwed by a space or line ending + (bytes[i] == b' ' || bytes[i] == b'\n', ordered) + } else { + (false, false) + } +} + +// returns (bullets, contents begin, contents end, end, has more) +#[inline] +pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) { + debug_assert!(is_item(&src[ident..]).0); + debug_assert!( + src[..ident].chars().all(|c| c == ' ' || c == '\t'), + "{:?} doesn't starts with indentation {}", + src, + ident + ); + + let mut lines = Lines::new(src); + let (mut pre_cont_end, mut pre_end, first_line) = lines.next().unwrap(); + let beg = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) { + Some(i) => i + ident + 1, + None => { + let len = first_line.len(); + return ( + &first_line, + len, + len, + len, + is_item(lines.next().unwrap().2).0, + ); + } + }; + let bullet = &src[0..beg]; + + while let Some((mut cont_end, mut end, mut line)) = lines.next() { + // this line is emtpy + if line.is_empty() { + if let Some((next_cont_end, next_end, next_line)) = lines.next() { + // next line is emtpy, too + if next_line.is_empty() { + return (bullet, beg, pre_cont_end, next_end, false); } else { - return (bullet, beg, pre_cont_end, end, false); + // move to next line + pre_end = end; + cont_end = next_cont_end; + end = next_end; + line = next_line; } + } else { + return (bullet, beg, pre_cont_end, end, false); } - - let line_ident = Self::ident(line); - - if line_ident < ident { - return (bullet, beg, pre_cont_end, pre_end, false); - } else if line_ident == ident { - return ( - bullet, - beg, - pre_cont_end, - pre_end, - Self::is_item(&line[ident..]).0, - ); - } - - pre_end = end; - pre_cont_end = cont_end; } - (bullet, beg, src.len(), src.len(), false) + let line_ident = self::ident(line); + + if line_ident < ident { + return (bullet, beg, pre_cont_end, pre_end, false); + } else if line_ident == ident { + return ( + bullet, + beg, + pre_cont_end, + pre_end, + is_item(&line[ident..]).0, + ); + } + + pre_end = end; + pre_cont_end = cont_end; } - fn ident(src: &str) -> usize { - src.as_bytes() - .iter() - .position(|&c| c != b' ' && c != b'\t') - .unwrap_or(0) + (bullet, beg, src.len(), src.len(), false) +} + +#[inline] +fn ident(src: &str) -> usize { + src.as_bytes() + .iter() + .position(|&c| c != b' ' && c != b'\t') + .unwrap_or(0) +} + +#[cfg(test)] +mod tests { + #[test] + fn is_item() { + use super::is_item; + + assert_eq!(is_item("+ item"), (true, false)); + assert_eq!(is_item("- item"), (true, false)); + assert_eq!(is_item("10. item"), (true, true)); + assert_eq!(is_item("10) item"), (true, true)); + assert_eq!(is_item("1. item"), (true, true)); + assert_eq!(is_item("1) item"), (true, true)); + assert_eq!(is_item("10. "), (true, true)); + assert_eq!(is_item("10.\n"), (true, true)); + assert_eq!(is_item("10."), (false, false)); + assert_eq!(is_item("+"), (false, false)); + assert_eq!(is_item("-item"), (false, false)); + assert_eq!(is_item("+item"), (false, false)); + } + + #[test] + fn parse() { + use super::parse; + + assert_eq!(parse("+ item1\n+ item2\n+ item3", 0), ("+ ", 2, 7, 8, true)); + assert_eq!( + parse("* item1\n\n* item2\n* item3", 0), + ("* ", 2, 7, 9, true) + ); + assert_eq!( + parse("- item1\n\n\n- item2\n- item3", 0), + ("- ", 2, 7, 10, false) + ); + assert_eq!( + parse("1. item1\n\n\n\n2. item2\n3. item3", 0), + ("1. ", 3, 8, 11, false) + ); + assert_eq!( + parse(" + item1\n + item2\n+ item3", 2), + (" + ", 4, 21, 22, false) + ); + assert_eq!( + parse(" + item1\n + item2\n + item3", 2), + (" + ", 4, 9, 10, true) + ); + assert_eq!(parse("+\n", 0), ("+", 1, 1, 1, false)); + assert_eq!(parse("+\n+ item2\n+ item3", 0), ("+", 1, 1, 1, true)); + assert_eq!(parse("1) item1", 0), ("1) ", 3, 8, 8, false)); + assert_eq!(parse("1) item1\n", 0), ("1) ", 3, 8, 9, false)); } } - -#[test] -fn is_item() { - assert_eq!(List::is_item("+ item"), (true, false)); - assert_eq!(List::is_item("- item"), (true, false)); - assert_eq!(List::is_item("10. item"), (true, true)); - assert_eq!(List::is_item("10) item"), (true, true)); - assert_eq!(List::is_item("1. item"), (true, true)); - assert_eq!(List::is_item("1) item"), (true, true)); - assert_eq!(List::is_item("10. "), (true, true)); - assert_eq!(List::is_item("10.\n"), (true, true)); - assert_eq!(List::is_item("10."), (false, false)); - assert_eq!(List::is_item("+"), (false, false)); - assert_eq!(List::is_item("-item"), (false, false)); - assert_eq!(List::is_item("+item"), (false, false)); -} - -#[test] -fn parse() { - assert_eq!( - List::parse("+ item1\n+ item2\n+ item3", 0), - ("+ ", 2, 7, 8, true) - ); - assert_eq!( - List::parse("* item1\n\n* item2\n* item3", 0), - ("* ", 2, 7, 9, true) - ); - assert_eq!( - List::parse("- item1\n\n\n- item2\n- item3", 0), - ("- ", 2, 7, 10, false) - ); - assert_eq!( - List::parse("1. item1\n\n\n\n2. item2\n3. item3", 0), - ("1. ", 3, 8, 11, false) - ); - assert_eq!( - List::parse(" + item1\n + item2\n+ item3", 2), - (" + ", 4, 21, 22, false) - ); - assert_eq!( - List::parse(" + item1\n + item2\n + item3", 2), - (" + ", 4, 9, 10, true) - ); - assert_eq!(List::parse("+\n", 0), ("+", 1, 1, 1, false)); - assert_eq!(List::parse("+\n+ item2\n+ item3", 0), ("+", 1, 1, 1, true)); - assert_eq!(List::parse("1) item1", 0), ("1) ", 3, 8, 8, false)); - assert_eq!(List::parse("1) item1\n", 0), ("1) ", 3, 8, 9, false)); -} diff --git a/src/elements/mod.rs b/src/elements/mod.rs index ec3bee4..bb4ef75 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -5,12 +5,7 @@ pub mod keyword; pub mod list; pub mod rule; -pub use self::block::Block; -pub use self::dyn_block::DynBlock; -pub use self::fn_def::FnDef; -pub use self::keyword::{Key, Keyword}; -pub use self::list::List; -pub use self::rule::Rule; +pub use self::keyword::Key; use memchr::memchr; use memchr::memchr_iter; @@ -118,9 +113,9 @@ impl<'a> Element<'a> { }; } - // Unlike other element, footnote definition must starts at column 0 + // Unlike other element, footnote def must starts at column 0 if bytes[pos..].starts_with(b"[fn:") { - if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) { + if let Some((label, cont, off)) = fn_def::parse(&src[pos..]) { brk!(Element::FnDef { label, cont }, off + 1); } } @@ -138,7 +133,7 @@ impl<'a> Element<'a> { pos = skip_space!(src, pos); - let (is_item, ordered) = List::is_item(&src[pos..]); + let (is_item, ordered) = list::is_item(&src[pos..]); if is_item { let list = Element::List { ident: pos - line_beg, @@ -163,7 +158,7 @@ impl<'a> Element<'a> { // Rule if bytes[pos] == b'-' { - let off = Rule::parse(&src[pos..]); + let off = rule::parse(&src[pos..]); if off != 0 { brk!(Element::Rule, off); } @@ -178,7 +173,7 @@ impl<'a> Element<'a> { } if bytes[pos..].starts_with(b"#+") { - if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) { + if let Some((name, args, cont_beg, cont_end, end)) = block::parse(&src[pos..]) { let cont = &src[pos + cont_beg..pos + cont_end]; match name.to_uppercase().as_str() { "COMMENT" => brk!(Element::CommentBlock { args, cont }, end), @@ -214,7 +209,7 @@ impl<'a> Element<'a> { }; } - if let Some((name, args, cont_beg, cont_end, end)) = DynBlock::parse(&src[pos..]) { + if let Some((name, args, cont_beg, cont_end, end)) = dyn_block::parse(&src[pos..]) { brk!( Element::DynBlock { name, @@ -226,7 +221,7 @@ impl<'a> Element<'a> { ) } - if let Some((key, value, off)) = Keyword::parse(&src[pos..]) { + if let Some((key, value, off)) = keyword::parse(&src[pos..]) { brk!( if let Key::Call = key { Element::Call { value } diff --git a/src/elements/rule.rs b/src/elements/rule.rs index f7e0d40..13d93f2 100644 --- a/src/elements/rule.rs +++ b/src/elements/rule.rs @@ -1,37 +1,37 @@ -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct Rule; - -impl Rule { - pub fn parse(src: &str) -> usize { - let end = memchr::memchr(b'\n', src.as_bytes()) - .map(|i| i + 1) - .unwrap_or_else(|| src.len()); - let rules = &src[0..end].trim(); - if rules.len() >= 5 && rules.chars().all(|c| c == '-') { - end - } else { - 0 - } +#[inline] +pub fn parse(src: &str) -> usize { + let end = memchr::memchr(b'\n', src.as_bytes()) + .map(|i| i + 1) + .unwrap_or_else(|| src.len()); + let rules = &src[0..end].trim(); + if rules.len() >= 5 && rules.chars().all(|c| c == '-') { + end + } else { + 0 } } -#[test] -fn parse() { - assert_eq!(Rule::parse("-----"), "-----".len()); - assert_eq!(Rule::parse("--------"), "--------".len()); - assert_eq!(Rule::parse(" -----"), " -----".len()); - assert_eq!(Rule::parse("\t\t-----"), "\t\t-----".len()); - assert_eq!(Rule::parse("\t\t-----\n"), "\t\t-----\n".len()); - assert_eq!(Rule::parse("\t\t----- \n"), "\t\t----- \n".len()); - assert_eq!(Rule::parse(""), 0); - assert_eq!(Rule::parse("----"), 0); - assert_eq!(Rule::parse(" ----"), 0); - assert_eq!(Rule::parse(" 0----"), 0); - assert_eq!(Rule::parse("0 ----"), 0); - assert_eq!(Rule::parse("0------"), 0); - assert_eq!(Rule::parse("----0----"), 0); - assert_eq!(Rule::parse("\t\t----"), 0); - assert_eq!(Rule::parse("------0"), 0); - assert_eq!(Rule::parse("----- 0"), 0); +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + assert_eq!(parse("-----"), "-----".len()); + assert_eq!(parse("--------"), "--------".len()); + assert_eq!(parse(" -----"), " -----".len()); + assert_eq!(parse("\t\t-----"), "\t\t-----".len()); + assert_eq!(parse("\t\t-----\n"), "\t\t-----\n".len()); + assert_eq!(parse("\t\t----- \n"), "\t\t----- \n".len()); + assert_eq!(parse(""), 0); + assert_eq!(parse("----"), 0); + assert_eq!(parse(" ----"), 0); + assert_eq!(parse(" 0----"), 0); + assert_eq!(parse("0 ----"), 0); + assert_eq!(parse("0------"), 0); + assert_eq!(parse("----0----"), 0); + assert_eq!(parse("\t\t----"), 0); + assert_eq!(parse("------0"), 0); + assert_eq!(parse("----- 0"), 0); + } } diff --git a/src/export/html.rs b/src/export/html.rs index 25afc71..7c4c301 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -3,9 +3,7 @@ use crate::elements::Key; use crate::export::Handler; use crate::headline::Headline; -use crate::objects::{ - Cookie, FnRef, InlineCall, InlineSrc, Link, Macros, RadioTarget, Snippet, Target, -}; +use crate::objects::Cookie; use std::io::{Result, Write}; pub struct HtmlHandler; @@ -117,36 +115,49 @@ impl Handler for HtmlHandler { fn handle_cookie(&mut self, w: &mut W, cookie: Cookie) -> Result<()> { Ok(()) } - fn handle_fn_ref(&mut self, w: &mut W, fn_ref: FnRef) -> Result<()> { + fn handle_fn_ref(&mut self, w: &mut W, label: Option<&str>, def: Option<&str>) -> Result<()> { Ok(()) } - fn handle_inline_call(&mut self, w: &mut W, inline_call: InlineCall) -> Result<()> { + fn handle_inline_call( + &mut self, + w: &mut W, + name: &str, + args: &str, + inside_header: Option<&str>, + end_header: Option<&str>, + ) -> Result<()> { Ok(()) } - fn handle_inline_src(&mut self, w: &mut W, inline_src: InlineSrc) -> Result<()> { - write!(w, "{}", inline_src.body) + fn handle_inline_src( + &mut self, + w: &mut W, + lang: &str, + option: Option<&str>, + body: &str, + ) -> Result<()> { + write!(w, "{}", body) } - fn handle_link(&mut self, w: &mut W, link: Link) -> Result<()> { - if let Some(desc) = link.desc { - write!(w, r#"{}"#, link.path, desc) + fn handle_link(&mut self, w: &mut W, path: &str, desc: Option<&str>) -> Result<()> { + if let Some(desc) = desc { + write!(w, r#"{}"#, path, desc) } else { - write!(w, r#"{0}"#, link.path) + write!(w, r#"{0}"#, path) } } - fn handle_macros(&mut self, w: &mut W, macros: Macros) -> Result<()> { + fn handle_macros(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<()> { Ok(()) } - fn handle_radio_target(&mut self, w: &mut W, target: RadioTarget) -> Result<()> { + fn handle_radio_target(&mut self, w: &mut W, target: &str) -> Result<()> { Ok(()) } - fn handle_snippet(&mut self, w: &mut W, snippet: Snippet) -> Result<()> { - if snippet.name.eq_ignore_ascii_case("HTML") { - write!(w, "{}", snippet.value) + fn handle_snippet(&mut self, w: &mut W, name: &str, value: &str) -> Result<()> { + if name.eq_ignore_ascii_case("HTML") { + write!(w, "{}", value) } else { Ok(()) } } - fn handle_target(&mut self, w: &mut W, target: Target) -> Result<()> { + fn handle_target(&mut self, w: &mut W, target: &str) -> Result<()> { Ok(()) } fn handle_bold_beg(&mut self, w: &mut W) -> Result<()> { diff --git a/src/export/mod.rs b/src/export/mod.rs index 15dbfc8..fbb53a8 100644 --- a/src/export/mod.rs +++ b/src/export/mod.rs @@ -4,9 +4,7 @@ pub use self::html::HtmlHandler; use crate::elements::Key; use crate::headline::Headline; -use crate::objects::{ - Cookie, FnRef, InlineCall, InlineSrc, Link, Macros, RadioTarget, Snippet, Target, -}; +use crate::objects::Cookie; use crate::parser::Parser; use std::io::{Result, Write}; @@ -46,14 +44,27 @@ pub trait Handler { fn handle_keyword(&mut self, w: &mut W, key: Key<'_>, value: &str) -> Result<()>; fn handle_rule(&mut self, w: &mut W) -> Result<()>; fn handle_cookie(&mut self, w: &mut W, cookie: Cookie) -> Result<()>; - fn handle_fn_ref(&mut self, w: &mut W, fn_ref: FnRef) -> Result<()>; - fn handle_inline_call(&mut self, w: &mut W, inline_call: InlineCall) -> Result<()>; - fn handle_inline_src(&mut self, w: &mut W, inline_src: InlineSrc) -> Result<()>; - fn handle_link(&mut self, w: &mut W, link: Link) -> Result<()>; - fn handle_macros(&mut self, w: &mut W, macros: Macros) -> Result<()>; - fn handle_radio_target(&mut self, w: &mut W, target: RadioTarget) -> Result<()>; - fn handle_snippet(&mut self, w: &mut W, snippet: Snippet) -> Result<()>; - fn handle_target(&mut self, w: &mut W, target: Target) -> Result<()>; + fn handle_fn_ref(&mut self, w: &mut W, label: Option<&str>, def: Option<&str>) -> Result<()>; + fn handle_inline_call( + &mut self, + w: &mut W, + name: &str, + args: &str, + inside_header: Option<&str>, + end_header: Option<&str>, + ) -> Result<()>; + fn handle_inline_src( + &mut self, + w: &mut W, + lang: &str, + option: Option<&str>, + body: &str, + ) -> Result<()>; + fn handle_link(&mut self, w: &mut W, path: &str, desc: Option<&str>) -> Result<()>; + fn handle_macros(&mut self, w: &mut W, name: &str, args: Option<&str>) -> Result<()>; + fn handle_radio_target(&mut self, w: &mut W, target: &str) -> Result<()>; + fn handle_snippet(&mut self, w: &mut W, name: &str, value: &str) -> Result<()>; + fn handle_target(&mut self, w: &mut W, target: &str) -> Result<()>; fn handle_bold_beg(&mut self, w: &mut W) -> Result<()>; fn handle_bold_end(&mut self, w: &mut W) -> Result<()>; fn handle_italic_beg(&mut self, w: &mut W) -> Result<()>; @@ -129,14 +140,19 @@ impl<'a, W: Write, H: Handler> Render<'a, W, H> { Keyword { key, value } => h.handle_keyword(w, key, value)?, Rule => h.handle_rule(w)?, Cookie(cookie) => h.handle_cookie(w, cookie)?, - FnRef(fnref) => h.handle_fn_ref(w, fnref)?, - InlineCall(inlinecall) => h.handle_inline_call(w, inlinecall)?, - InlineSrc(inlinesrc) => h.handle_inline_src(w, inlinesrc)?, - Link(link) => h.handle_link(w, link)?, - Macros(macros) => h.handle_macros(w, macros)?, - RadioTarget(radiotarget) => h.handle_radio_target(w, radiotarget)?, - Snippet(snippet) => h.handle_snippet(w, snippet)?, - Target(target) => h.handle_target(w, target)?, + FnRef { label, def } => h.handle_fn_ref(w, label, def)?, + InlineSrc { lang, option, body } => h.handle_inline_src(w, lang, option, body)?, + InlineCall { + name, + args, + inside_header, + end_header, + } => h.handle_inline_call(w, name, args, inside_header, end_header)?, + Link { path, desc } => h.handle_link(w, path, desc)?, + Macros { name, args } => h.handle_macros(w, name, args)?, + RadioTarget { target } => h.handle_radio_target(w, target)?, + Snippet { name, value } => h.handle_snippet(w, name, value)?, + Target { target } => h.handle_target(w, target)?, BoldBeg => h.handle_bold_beg(w)?, BoldEnd => h.handle_bold_end(w)?, ItalicBeg => h.handle_italic_beg(w)?, diff --git a/src/headline.rs b/src/headline.rs index 54ea134..238c6d9 100644 --- a/src/headline.rs +++ b/src/headline.rs @@ -1,10 +1,17 @@ +//! Headline + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct Headline<'a> { + /// headline level, number of stars pub level: usize, + /// priority cookie pub priority: Option, + /// headline tags, including the sparated colons pub tags: Option<&'a str>, + /// headline title pub title: &'a str, + /// headline keyword pub keyword: Option<&'a str>, } @@ -58,6 +65,20 @@ impl<'a> Headline<'a> { (None, src.len()) } + /// parsing the input string and returning the parsed headline + /// and the content-begin and the end of headline container. + /// + /// ```rust + /// use orgize::headline::Headline; + /// + /// let (hdl, _, _) = Headline::parse("* DONE [#A] COMMENT Title :tag:a2%:"); + /// + /// assert_eq!(hdl.level, 1); + /// assert_eq!(hdl.priority, Some('A')); + /// assert_eq!(hdl.tags, Some(":tag:a2%:")); + /// assert_eq!(hdl.title, "COMMENT Title"); + /// assert_eq!(hdl.keyword, Some("DONE")); + /// ``` pub fn parse(src: &'a str) -> (Headline<'a>, usize, usize) { let mut level = 0; loop { @@ -69,7 +90,11 @@ impl<'a> Headline<'a> { } let eol = eol!(src); - let end = Headline::find_level(&src[eol..], level) + eol; + let end = if eol == src.len() { + eol + } else { + Headline::find_level(&src[eol..], level) + eol + }; let mut title_start = skip_space!(src, level); @@ -89,88 +114,70 @@ impl<'a> Headline<'a> { let (tags, title_off) = Headline::parse_tags(&src[title_start..eol]); - // println!("{:?} {:?} {:?}", keyword, priority, tags); - // println!("{:?} {}", title_start, title_off); - ( - Headline::new( + Headline { level, keyword, priority, - &src[title_start..title_start + title_off], + title: &src[title_start..title_start + title_off], tags, - ), + }, eol, end, ) } - // TODO: optimize pub fn find_level(src: &str, level: usize) -> usize { - let mut pos = 0; - loop { - if pos >= src.len() { - return src.len(); - } + use jetscii::ByteSubstring; + use memchr::memchr2; - if src.as_bytes()[pos] == b'*' && (pos == 0 || src.as_bytes()[pos - 1] == b'\n') { - let pos_ = pos; - loop { - if pos >= src.len() { - return src.len(); - } - if src.as_bytes()[pos] == b'*' { - pos += 1; - } else if src.as_bytes()[pos] == b' ' && pos - pos_ <= level { - return pos_; - } else { - break; - } + let bytes = src.as_bytes(); + if bytes[0] == b'*' { + if let Some(stars) = memchr2(b'\n', b' ', bytes) { + if stars > 0 && stars <= level && bytes[0..stars].iter().all(|&c| c == b'*') { + return 0; } } - - pos += 1 } + + let mut pos = 0; + while let Some(off) = ByteSubstring::new(b"\n*").find(&bytes[pos..]) { + pos += off + 1; + if let Some(stars) = memchr2(b'\n', b' ', &bytes[pos..]) { + if stars > 0 && stars <= level && bytes[pos..pos + stars].iter().all(|&c| c == b'*') + { + return pos; + } + } + } + + src.len() } + /// checks if this headline is "commented" pub fn is_commented(&self) -> bool { self.title.starts_with("COMMENT ") } + /// checks if this headline is "archived" pub fn is_archived(&self) -> bool { self.tags .map(|tags| tags[1..].split_terminator(':').any(|t| t == "ARCHIVE")) .unwrap_or(false) } - - pub fn new( - level: usize, - keyword: Option<&'a str>, - priority: Option, - title: &'a str, - tags: Option<&'a str>, - ) -> Headline<'a> { - Headline { - level, - keyword, - priority, - title, - tags, - } - } } #[test] fn parse() { assert_eq!( Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:").0, - Headline::new( - 4, - Some("TODO"), - Some('A'), - "COMMENT Title", - Some(":tag:a2%:"), - ), + Headline { + level: 4, + priority: Some('A'), + keyword: Some("TODO"), + title: "COMMENT Title", + tags: Some(":tag:a2%:"), + }, ); assert_eq!( Headline::parse("**** ToDO [#A] COMMENT Title :tag:a2%:").0, @@ -262,3 +269,17 @@ fn is_archived() { assert!(!Headline::parse("* Title :ARCHIVES:").0.is_archived()); assert!(!Headline::parse("* Title :archive:").0.is_archived()); } + +#[test] +fn find_level() { + assert_eq!( + Headline::find_level( + r#" +** Title +* Title +** Title"#, + 1 + ), + 10 + ); +} diff --git a/src/lib.rs b/src/lib.rs index a1e21c5..1b191a3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,58 @@ +//! A Rust library for parsing orgmode files. +//! +//! ## Example +//! +//! ```rust +//! use orgize::Parser; +//! +//! fn main() { +//! let parser = Parser::new( +//! r#"* Title 1 +//! *Section 1* +//! ** Title 2 +//! _Section 2_ +//! * Title 3 +//! /Section 3/ +//! * Title 4 +//! =Section 4="#, +//! ); +//! +//! for event in parser { +//! // handling the event +//! } +//! } +//! ``` +//! +//! Alternatively, you can use the built-in render. +//! +//! ```rust +//! use orgize::export::{HtmlHandler, Render}; +//! use std::io::Cursor; +//! +//! fn main() { +//! let contents = r#"* Title 1 +//! *Section 1* +//! ** Title 2 +//! _Section 2_ +//! * Title 3 +//! /Section 3/ +//! * Title 4 +//! =Section 4="#; +//! +//! let cursor = Cursor::new(Vec::new()); +//! let mut render = Render::new(HtmlHandler, cursor, &contents); +//! +//! render +//! .render() +//! .expect("something went wrong rendering the file"); +//! +//! println!( +//! "{}", +//! String::from_utf8(render.into_wirter().into_inner()).expect("invalid utf-8") +//! ); +//! } +//! ``` + #[macro_use] mod utils; @@ -7,3 +62,5 @@ pub mod headline; mod lines; pub mod objects; mod parser; + +pub use parser::{Event, Parser}; diff --git a/src/objects/cookie.rs b/src/objects/cookie.rs index d255845..53cac3a 100644 --- a/src/objects/cookie.rs +++ b/src/objects/cookie.rs @@ -2,74 +2,50 @@ use memchr::{memchr, memchr2}; #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] -pub struct Cookie<'a> { - value: &'a str, +pub enum Cookie<'a> { + Percent(&'a str), + Slash(&'a str, &'a str), } -impl<'a> Cookie<'a> { - pub fn parse(src: &'a str) -> Option<(Cookie<'a>, usize)> { - debug_assert!(src.starts_with('[')); +#[inline] +pub fn parse(src: &str) -> Option<(Cookie<'_>, usize)> { + debug_assert!(src.starts_with('[')); - let num1 = memchr2(b'%', b'/', src.as_bytes()) - .filter(|&i| src.as_bytes()[1..i].iter().all(|c| c.is_ascii_digit()))?; + let bytes = src.as_bytes(); + let num1 = + memchr2(b'%', b'/', bytes).filter(|&i| bytes[1..i].iter().all(|c| c.is_ascii_digit()))?; - if src.as_bytes()[num1] == b'%' && *src.as_bytes().get(num1 + 1)? == b']' { - Some(( - Cookie { - value: &src[0..=num1 + 1], - }, - num1 + 2, - )) - } else { - let num2 = memchr(b']', src.as_bytes()).filter(|&i| { - src.as_bytes()[num1 + 1..i] - .iter() - .all(|c| c.is_ascii_digit()) - })?; + if bytes[num1] == b'%' && *bytes.get(num1 + 1)? == b']' { + Some((Cookie::Percent(&src[1..num1]), num1 + 2)) + } else { + let num2 = memchr(b']', bytes) + .filter(|&i| bytes[num1 + 1..i].iter().all(|c| c.is_ascii_digit()))?; - Some(( - Cookie { - value: &src[0..=num2], - }, - num2 + 1, - )) - } + Some((Cookie::Slash(&src[1..num1], &src[num1 + 1..num2]), num2 + 1)) } } -#[test] -fn parse() { - assert_eq!( - Cookie::parse("[1/10]").unwrap(), - (Cookie { value: "[1/10]" }, "[1/10]".len()) - ); - assert_eq!( - Cookie::parse("[1/1000]").unwrap(), - (Cookie { value: "[1/1000]" }, "[1/1000]".len()) - ); - assert_eq!( - Cookie::parse("[10%]").unwrap(), - (Cookie { value: "[10%]" }, "[10%]".len()) - ); - assert_eq!( - Cookie::parse("[%]").unwrap(), - (Cookie { value: "[%]" }, "[%]".len()) - ); - assert_eq!( - Cookie::parse("[/]").unwrap(), - (Cookie { value: "[/]" }, "[/]".len()) - ); - assert_eq!( - Cookie::parse("[100/]").unwrap(), - (Cookie { value: "[100/]" }, "[100/]".len()) - ); - assert_eq!( - Cookie::parse("[/100]").unwrap(), - (Cookie { value: "[/100]" }, "[/100]".len()) - ); +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + use super::Cookie::*; - assert!(Cookie::parse("[10% ]").is_none(),); - assert!(Cookie::parse("[1//100]").is_none(),); - assert!(Cookie::parse("[1\\100]").is_none(),); - assert!(Cookie::parse("[10%%]").is_none(),); + assert_eq!(parse("[1/10]").unwrap(), (Slash("1", "10"), "[1/10]".len())); + assert_eq!( + parse("[1/1000]").unwrap(), + (Slash("1", "1000"), "[1/1000]".len()) + ); + assert_eq!(parse("[10%]").unwrap(), (Percent("10"), "[10%]".len())); + assert_eq!(parse("[%]").unwrap(), (Percent(""), "[%]".len())); + assert_eq!(parse("[/]").unwrap(), (Slash("", ""), "[/]".len())); + assert_eq!(parse("[100/]").unwrap(), (Slash("100", ""), "[100/]".len())); + assert_eq!(parse("[/100]").unwrap(), (Slash("", "100"), "[/100]".len())); + + assert!(parse("[10% ]").is_none(),); + assert!(parse("[1//100]").is_none(),); + assert!(parse("[1\\100]").is_none(),); + assert!(parse("[10%%]").is_none(),); + } } diff --git a/src/objects/emphasis.rs b/src/objects/emphasis.rs index 75b1602..d7f52c8 100644 --- a/src/objects/emphasis.rs +++ b/src/objects/emphasis.rs @@ -1,44 +1,59 @@ use memchr::memchr; -pub struct Emphasis; +#[inline] +/// returns offset +pub fn parse(src: &str, marker: u8) -> Option { + debug_assert!(src.len() >= 3); -impl Emphasis { - // TODO: return usize instead of Option - pub fn parse(src: &str, marker: u8) -> Option { - expect!(src, 1, |c: u8| !c.is_ascii_whitespace())?; + let bytes = src.as_bytes(); - let bytes = src.as_bytes(); - let end = memchr(marker, &bytes[1..]) - .map(|i| i + 1) - .filter(|&i| bytes[1..i].iter().filter(|&&c| c == b'\n').count() < 2)?; + if bytes[1].is_ascii_whitespace() { + return None; + } - expect!(src, end - 1, |c: u8| !c.is_ascii_whitespace())?; + let end = memchr(marker, &bytes[1..]) + .map(|i| i + 1) + .filter(|&i| bytes[1..i].iter().filter(|&&c| c == b'\n').count() < 2)?; - if end < src.len() - 1 { - expect!(src, end + 1, |ch| ch == b' ' - || ch == b'-' - || ch == b'.' - || ch == b',' - || ch == b':' - || ch == b'!' - || ch == b'?' - || ch == b'\'' - || ch == b'\n' - || ch == b')' - || ch == b'}')?; + if bytes[end - 1].is_ascii_whitespace() { + return None; + } + + if end < src.len() - 1 { + let post = bytes[end + 1]; + if post == b' ' + || post == b'-' + || post == b'.' + || post == b',' + || post == b':' + || post == b'!' + || post == b'?' + || post == b'\'' + || post == b'\n' + || post == b')' + || post == b'}' + { + Some(end) + } else { + None } - + } else { Some(end) } } -#[test] -fn parse() { - assert_eq!(Emphasis::parse("*bold*", b'*').unwrap(), "*bold".len()); - assert_eq!(Emphasis::parse("*bo\nld*", b'*').unwrap(), "*bo\nld".len()); - assert!(Emphasis::parse("*bold*a", b'*').is_none()); - assert!(Emphasis::parse("*bold*", b'/').is_none()); - assert!(Emphasis::parse("*bold *", b'*').is_none()); - assert!(Emphasis::parse("* bold*", b'*').is_none()); - assert!(Emphasis::parse("*b\nol\nd*", b'*').is_none()); +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + assert_eq!(parse("*bold*", b'*').unwrap(), "*bold".len()); + assert_eq!(parse("*bo\nld*", b'*').unwrap(), "*bo\nld".len()); + assert!(parse("*bold*a", b'*').is_none()); + assert!(parse("*bold*", b'/').is_none()); + assert!(parse("*bold *", b'*').is_none()); + assert!(parse("* bold*", b'*').is_none()); + assert!(parse("*b\nol\nd*", b'*').is_none()); + } } diff --git a/src/objects/fn_ref.rs b/src/objects/fn_ref.rs index 1627eea..733801e 100644 --- a/src/objects/fn_ref.rs +++ b/src/objects/fn_ref.rs @@ -1,106 +1,73 @@ use memchr::{memchr2, memchr2_iter}; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct FnRef<'a> { - label: Option<&'a str>, - definition: Option<&'a str>, -} +/// returns (footnote reference label, footnote reference definition, offset) +#[inline] +pub fn parse(src: &str) -> Option<(Option<&str>, Option<&str>, usize)> { + debug_assert!(src.starts_with("[fn:")); -impl<'a> FnRef<'a> { - pub fn parse(src: &'a str) -> Option<(FnRef<'a>, usize)> { - debug_assert!(src.starts_with("[fn:")); + let bytes = src.as_bytes(); + let label = memchr2(b']', b':', &bytes[4..]) + .map(|i| i + 4) + .filter(|&i| { + bytes[4..i] + .iter() + .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') + })?; - let bytes = src.as_bytes(); - let label = memchr2(b']', b':', &bytes[4..]) - .map(|i| i + 4) - .filter(|&i| { - bytes[4..i] - .iter() - .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') + if bytes[label] == b':' { + let mut pairs = 1; + let def = memchr2_iter(b'[', b']', &bytes[label..]) + .map(|i| i + label) + .find(|&i| { + if bytes[i] == b'[' { + pairs += 1; + } else { + pairs -= 1; + } + pairs == 0 })?; - if bytes[label] == b':' { - let mut pairs = 1; - let def = memchr2_iter(b'[', b']', &bytes[label..]) - .map(|i| i + label) - .find(|&i| { - if bytes[i] == b'[' { - pairs += 1; - } else { - pairs -= 1; - } - pairs == 0 - })?; - - Some(( - FnRef { - label: if label == 4 { - None - } else { - Some(&src[4..label]) - }, - definition: Some(&src[label + 1..def]), - }, - def + 1, - )) - } else { - Some(( - FnRef { - label: if label == 4 { - None - } else { - Some(&src[4..label]) - }, - definition: None, - }, - label + 1, - )) - } + Some(( + if label == 4 { + None + } else { + Some(&src[4..label]) + }, + Some(&src[label + 1..def]), + def + 1, + )) + } else { + Some(( + if label == 4 { + None + } else { + Some(&src[4..label]) + }, + None, + label + 1, + )) } } -#[test] -fn parse() { - assert_eq!( - FnRef::parse("[fn:1]").unwrap(), - ( - FnRef { - label: Some("1"), - definition: None, - }, - "[fn:1]".len() - ) - ); - assert_eq!( - FnRef::parse("[fn:1:2]").unwrap(), - ( - FnRef { - label: Some("1"), - definition: Some("2"), - }, - "[fn:1:2]".len() - ) - ); - assert_eq!( - FnRef::parse("[fn::2]").unwrap(), - ( - FnRef { - label: None, - definition: Some("2"), - }, - "[fn::2]".len() - ) - ); - assert_eq!( - FnRef::parse("[fn::[]]").unwrap(), - ( - FnRef { - label: None, - definition: Some("[]"), - }, - "[fn::[]]".len() - ) - ); - assert!(FnRef::parse("[fn::[]").is_none()); +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + assert_eq!(parse("[fn:1]").unwrap(), (Some("1"), None, "[fn:1]".len())); + assert_eq!( + parse("[fn:1:2]").unwrap(), + (Some("1"), Some("2"), "[fn:1:2]".len()) + ); + assert_eq!( + parse("[fn::2]").unwrap(), + (None, Some("2"), "[fn::2]".len()) + ); + assert_eq!( + parse("[fn::[]]").unwrap(), + (None, Some("[]"), "[fn::[]]".len()) + ); + assert!(parse("[fn::[]").is_none()); + } } diff --git a/src/objects/inline_call.rs b/src/objects/inline_call.rs index fabeba2..cc07a08 100644 --- a/src/objects/inline_call.rs +++ b/src/objects/inline_call.rs @@ -1,115 +1,88 @@ use memchr::{memchr, memchr2}; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct InlineCall<'a> { - pub name: &'a str, - pub args: &'a str, - // header args for block - pub inside_header: Option<&'a str>, - // header args for call line - pub end_header: Option<&'a str>, +/// returns (name, args, inside_header, end_header) +#[inline] +pub fn parse(src: &str) -> Option<(&str, &str, Option<&str>, Option<&str>, usize)> { + debug_assert!(src.starts_with("call_")); + + // TODO: refactor + let bytes = src.as_bytes(); + let mut pos = + memchr2(b'[', b'(', bytes).filter(|&i| bytes[5..i].iter().all(|c| c.is_ascii_graphic()))?; + let mut pos_; + + let name = &src[5..pos]; + + let inside_header = if bytes[pos] == b'[' { + pos_ = pos; + pos = memchr(b']', &bytes[pos..]) + .map(|i| i + pos) + .filter(|&i| bytes[pos..i].iter().all(|&c| c != b'\n'))? + + 1; + expect!(src, pos, b'(')?; + Some(&src[pos_ + 1..pos - 1]) + } else { + None + }; + + pos_ = pos; + pos = memchr(b')', &bytes[pos..]) + .map(|i| i + pos) + .filter(|&i| bytes[pos..i].iter().all(|&c| c != b'\n'))?; + let args = &src[pos_ + 1..pos]; + + let end_header = if src.len() > pos + 1 && src.as_bytes()[pos + 1] == b'[' { + pos_ = pos; + pos = memchr(b']', &bytes[pos_ + 1..]) + .map(|i| i + pos_ + 1) + .filter(|&i| bytes[pos_ + 1..i].iter().all(|&c| c != b'\n' && c != b')'))?; + Some(&src[pos_ + 2..pos]) + } else { + None + }; + + Some((name, args, inside_header, end_header, pos + 1)) } -impl<'a> InlineCall<'a> { - pub fn parse(src: &'a str) -> Option<(InlineCall, usize)> { - debug_assert!(src.starts_with("call_")); +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; - let bytes = src.as_bytes(); - let mut pos = memchr2(b'[', b'(', bytes) - .filter(|&i| bytes[5..i].iter().all(|c| c.is_ascii_graphic()))?; - let mut pos_; - - let name = &src[5..pos]; - - let inside_header = if bytes[pos] == b'[' { - pos_ = pos; - pos = memchr(b']', &bytes[pos..]) - .map(|i| i + pos) - .filter(|&i| bytes[pos..i].iter().all(|&c| c != b'\n'))? - + 1; - expect!(src, pos, b'(')?; - Some(&src[pos_ + 1..pos - 1]) - } else { - None - }; - - pos_ = pos; - pos = memchr(b')', &bytes[pos..]) - .map(|i| i + pos) - .filter(|&i| bytes[pos..i].iter().all(|&c| c != b'\n'))?; - let args = &src[pos_ + 1..pos]; - - let end_header = if src.len() > pos + 1 && src.as_bytes()[pos + 1] == b'[' { - pos_ = pos; - pos = memchr(b']', &bytes[pos_ + 1..]) - .map(|i| i + pos_ + 1) - .filter(|&i| bytes[pos_ + 1..i].iter().all(|&c| c != b'\n' && c != b')'))?; - Some(&src[pos_ + 2..pos]) - } else { - None - }; - - Some(( - InlineCall { - name, - inside_header, - args, - end_header, - }, - pos + 1, - )) + assert_eq!( + parse("call_square(4)").unwrap(), + ("square", "4", None, None, "call_square(4)".len()) + ); + assert_eq!( + parse("call_square[:results output](4)").unwrap(), + ( + "square", + "4", + Some(":results output"), + None, + "call_square[:results output](4)".len() + ) + ); + assert_eq!( + parse("call_square(4)[:results html]").unwrap(), + ( + "square", + "4", + None, + Some(":results html"), + "call_square(4)[:results html]".len() + ) + ); + assert_eq!( + parse("call_square[:results output](4)[:results html]").unwrap(), + ( + "square", + "4", + Some(":results output"), + Some(":results html"), + "call_square[:results output](4)[:results html]".len() + ) + ); } } - -#[test] -fn parse() { - assert_eq!( - InlineCall::parse("call_square(4)").unwrap(), - ( - InlineCall { - name: "square", - args: "4", - inside_header: None, - end_header: None, - }, - "call_square(4)".len() - ) - ); - assert_eq!( - InlineCall::parse("call_square[:results output](4)").unwrap(), - ( - InlineCall { - name: "square", - args: "4", - inside_header: Some(":results output"), - end_header: None, - }, - "call_square[:results output](4)".len() - ) - ); - assert_eq!( - InlineCall::parse("call_square(4)[:results html]").unwrap(), - ( - InlineCall { - name: "square", - args: "4", - inside_header: None, - end_header: Some(":results html"), - }, - "call_square(4)[:results html]".len() - ) - ); - assert_eq!( - InlineCall::parse("call_square[:results output](4)[:results html]").unwrap(), - ( - InlineCall { - name: "square", - args: "4", - inside_header: Some(":results output"), - end_header: Some(":results html"), - }, - "call_square[:results output](4)[:results html]".len() - ) - ); -} diff --git a/src/objects/inline_src.rs b/src/objects/inline_src.rs index a481593..38a97ce 100644 --- a/src/objects/inline_src.rs +++ b/src/objects/inline_src.rs @@ -1,77 +1,54 @@ use memchr::{memchr, memchr2}; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct InlineSrc<'a> { - pub lang: &'a str, - pub option: Option<&'a str>, - pub body: &'a str, -} +/// returns (language, option, body, offset) +#[inline] +pub fn parse(src: &str) -> Option<(&str, Option<&str>, &str, usize)> { + debug_assert!(src.starts_with("src_")); -impl<'a> InlineSrc<'a> { - pub fn parse(src: &'a str) -> Option<(InlineSrc, usize)> { - debug_assert!(src.starts_with("src_")); + let bytes = src.as_bytes(); + let lang = memchr2(b'[', b'{', bytes) + .filter(|&i| i != 4 && bytes[4..i].iter().all(|c| !c.is_ascii_whitespace()))?; - let bytes = src.as_bytes(); - let lang = memchr2(b'[', b'{', bytes) - .filter(|&i| i != 4 && bytes[4..i].iter().all(|c| !c.is_ascii_whitespace()))?; + if bytes[lang] == b'[' { + let option = memchr(b']', bytes).filter(|&i| bytes[lang..i].iter().all(|c| *c != b'\n'))?; + let body = memchr(b'}', &bytes[option..]) + .map(|i| i + option) + .filter(|&i| bytes[option..i].iter().all(|c| *c != b'\n'))?; - if bytes[lang] == b'[' { - let option = - memchr(b']', bytes).filter(|&i| bytes[lang..i].iter().all(|c| *c != b'\n'))?; - let body = memchr(b'}', &bytes[option..]) - .map(|i| i + option) - .filter(|&i| bytes[option..i].iter().all(|c| *c != b'\n'))?; + Some(( + &src[4..lang], + Some(&src[lang + 1..option]), + &src[option + 2..body], + body + 1, + )) + } else { + let body = memchr(b'}', bytes).filter(|&i| bytes[lang..i].iter().all(|c| *c != b'\n'))?; - Some(( - InlineSrc { - lang: &src[4..lang], - option: Some(&src[lang + 1..option]), - body: &src[option + 2..body], - }, - body + 1, - )) - } else { - let body = - memchr(b'}', bytes).filter(|&i| bytes[lang..i].iter().all(|c| *c != b'\n'))?; - - Some(( - InlineSrc { - lang: &src[4..lang], - option: None, - body: &src[lang + 1..body], - }, - body + 1, - )) - } + Some((&src[4..lang], None, &src[lang + 1..body], body + 1)) } } -#[test] -fn parse() { - assert_eq!( - InlineSrc::parse("src_C{int a = 0;}").unwrap(), - ( - InlineSrc { - lang: "C", - option: None, - body: "int a = 0;" - }, - "src_C{int a = 0;}".len() - ) - ); - assert_eq!( - InlineSrc::parse("src_xml[:exports code]{text}").unwrap(), - ( - InlineSrc { - lang: "xml", - option: Some(":exports code"), - body: "text" - }, - "src_xml[:exports code]{text}".len() - ) - ); - assert!(InlineSrc::parse("src_xml[:exports code]{text").is_none()); - assert!(InlineSrc::parse("src_[:exports code]{text}").is_none()); - assert!(InlineSrc::parse("src_xml[:exports code]").is_none()); +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + assert_eq!( + parse("src_C{int a = 0;}").unwrap(), + ("C", None, "int a = 0;", "src_C{int a = 0;}".len()) + ); + assert_eq!( + parse("src_xml[:exports code]{text}").unwrap(), + ( + "xml", + Some(":exports code"), + "text", + "src_xml[:exports code]{text}".len() + ) + ); + assert!(parse("src_xml[:exports code]{text").is_none()); + assert!(parse("src_[:exports code]{text}").is_none()); + assert!(parse("src_xml[:exports code]").is_none()); + } } diff --git a/src/objects/link.rs b/src/objects/link.rs index 70a96b8..ead534d 100644 --- a/src/objects/link.rs +++ b/src/objects/link.rs @@ -1,71 +1,42 @@ use memchr::memchr; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct Link<'a> { - pub path: &'a str, - pub desc: Option<&'a str>, -} +/// returns (link path, link description, offset) +#[inline] +pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize)> { + debug_assert!(src.starts_with("[[")); -impl<'a> Link<'a> { - pub fn parse(src: &'a str) -> Option<(Link<'a>, usize)> { - debug_assert!(src.starts_with("[[")); + let bytes = src.as_bytes(); + let path = memchr(b']', bytes).filter(|&i| { + bytes[2..i] + .iter() + .all(|&c| c != b'<' && c != b'>' && c != b'\n') + })?; - let bytes = src.as_bytes(); - let path = memchr(b']', bytes).filter(|&i| { - bytes[2..i] - .iter() - .all(|&c| c != b'<' && c != b'>' && c != b'\n') - })?; + if *bytes.get(path + 1)? == b']' { + Some((&src[2..path], None, path + 2)) + } else if bytes[path + 1] == b'[' { + let desc = memchr(b']', &bytes[path + 2..]) + .map(|i| i + path + 2) + .filter(|&i| bytes[path + 2..i].iter().all(|&c| c != b'['))?; + expect!(src, desc + 1, b']')?; - if *bytes.get(path + 1)? == b']' { - Some(( - Link { - path: &src[2..path], - desc: None, - }, - path + 2, - )) - } else if bytes[path + 1] == b'[' { - let desc = memchr(b']', &bytes[path + 2..]) - .map(|i| i + path + 2) - .filter(|&i| bytes[path + 2..i].iter().all(|&c| c != b'['))?; - expect!(src, desc + 1, b']')?; - - Some(( - Link { - path: &src[2..path], - desc: Some(&src[path + 2..desc]), - }, - desc + 2, - )) - } else { - None - } + Some((&src[2..path], Some(&src[path + 2..desc]), desc + 2)) + } else { + None } } -#[test] -fn parse() { - assert_eq!( - Link::parse("[[#id]]").unwrap(), - ( - Link { - path: "#id", - desc: None, - }, - "[[#id]]".len() - ) - ); - assert_eq!( - Link::parse("[[#id][desc]]").unwrap(), - ( - Link { - path: "#id", - desc: Some("desc"), - }, - "[[#id][desc]]".len() - ) - ); - assert!(Link::parse("[[#id][desc]").is_none()); +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + assert_eq!(parse("[[#id]]").unwrap(), ("#id", None, "[[#id]]".len())); + assert_eq!( + parse("[[#id][desc]]").unwrap(), + ("#id", Some("desc"), "[[#id][desc]]".len()) + ); + assert!(parse("[[#id][desc]").is_none()); + } } diff --git a/src/objects/macros.rs b/src/objects/macros.rs index 88befa8..0cbfd0a 100644 --- a/src/objects/macros.rs +++ b/src/objects/macros.rs @@ -1,90 +1,62 @@ use jetscii::Substring; use memchr::memchr2; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct Macros<'a> { - pub name: &'a str, - pub args: Option<&'a str>, +/// returns (macros name, macros arguments, offset) +#[inline] +pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize)> { + debug_assert!(src.starts_with("{{{")); + + expect!(src, 3, |c: u8| c.is_ascii_alphabetic())?; + + let bytes = src.as_bytes(); + let name = memchr2(b'}', b'(', bytes).filter(|&i| { + bytes[3..i] + .iter() + .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') + })?; + + Some(if bytes[name] == b'}' { + expect!(src, name + 1, b'}')?; + expect!(src, name + 2, b'}')?; + (&src[3..name], None, name + 3) + } else { + let end = Substring::new(")}}}") + .find(&src[name..]) + .map(|i| i + name)?; + ( + &src[3..name], + if name == end { + None + } else { + Some(&src[name + 1..end]) + }, + end + 4, + ) + }) } -impl<'a> Macros<'a> { - pub fn parse(src: &'a str) -> Option<(Macros<'a>, usize)> { - debug_assert!(src.starts_with("{{{")); +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; - expect!(src, 3, |c: u8| c.is_ascii_alphabetic())?; + assert_eq!( + parse("{{{poem(red,blue)}}}"), + Some(("poem", Some("red,blue"), "{{{poem(red,blue)}}}".len())) + ); + assert_eq!( + parse("{{{poem())}}}"), + Some(("poem", Some(")"), "{{{poem())}}}".len())) + ); + assert_eq!( + parse("{{{author}}}"), + Some(("author", None, "{{{author}}}".len())) + ); - let bytes = src.as_bytes(); - let name = memchr2(b'}', b'(', bytes).filter(|&i| { - bytes[3..i] - .iter() - .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') - })?; - - Some(if bytes[name] == b'}' { - expect!(src, name + 1, b'}')?; - expect!(src, name + 2, b'}')?; - ( - Macros { - name: &src[3..name], - args: None, - }, - name + 3, - ) - } else { - let end = Substring::new(")}}}") - .find(&src[name..]) - .map(|i| i + name)?; - ( - Macros { - name: &src[3..name], - args: if name == end { - None - } else { - Some(&src[name + 1..end]) - }, - }, - end + 4, - ) - }) + assert_eq!(parse("{{{0uthor}}}"), None); + assert_eq!(parse("{{{author}}"), None); + assert_eq!(parse("{{{poem(}}}"), None); + assert_eq!(parse("{{{poem)}}}"), None); } } - -#[test] -fn parse() { - assert_eq!( - Macros::parse("{{{poem(red,blue)}}}"), - Some(( - Macros { - name: "poem", - args: Some("red,blue") - }, - "{{{poem(red,blue)}}}".len() - )) - ); - assert_eq!( - Macros::parse("{{{poem())}}}"), - Some(( - Macros { - name: "poem", - args: Some(")") - }, - "{{{poem())}}}".len() - )) - ); - assert_eq!( - Macros::parse("{{{author}}}"), - Some(( - Macros { - name: "author", - args: None - }, - "{{{author}}}".len() - )) - ); - - assert_eq!(Macros::parse("{{{0uthor}}}"), None); - assert_eq!(Macros::parse("{{{author}}"), None); - assert_eq!(Macros::parse("{{{poem(}}}"), None); - assert_eq!(Macros::parse("{{{poem)}}}"), None); -} diff --git a/src/objects/mod.rs b/src/objects/mod.rs index f96f252..21607e6 100644 --- a/src/objects/mod.rs +++ b/src/objects/mod.rs @@ -5,37 +5,63 @@ mod inline_call; mod inline_src; mod link; mod macros; +mod radio_target; mod snippet; mod target; pub use self::cookie::Cookie; -pub use self::emphasis::Emphasis; -pub use self::fn_ref::FnRef; -pub use self::inline_call::InlineCall; -pub use self::inline_src::InlineSrc; -pub use self::link::Link; -pub use self::macros::Macros; -pub use self::snippet::Snippet; -pub use self::target::{RadioTarget, Target}; use jetscii::bytes; #[cfg_attr(test, derive(PartialEq, Debug))] pub enum Object<'a> { Cookie(Cookie<'a>), - FnRef(FnRef<'a>), - InlineCall(InlineCall<'a>), - InlineSrc(InlineSrc<'a>), - Link(Link<'a>), - Macros(Macros<'a>), - RadioTarget(RadioTarget<'a>), - Snippet(Snippet<'a>), - Target(Target<'a>), + FnRef { + label: Option<&'a str>, + def: Option<&'a str>, + }, + InlineCall { + name: &'a str, + args: &'a str, + inside_header: Option<&'a str>, + end_header: Option<&'a str>, + }, + InlineSrc { + lang: &'a str, + option: Option<&'a str>, + body: &'a str, + }, + Link { + path: &'a str, + desc: Option<&'a str>, + }, + Macros { + name: &'a str, + args: Option<&'a str>, + }, + RadioTarget { + target: &'a str, + }, + Snippet { + name: &'a str, + value: &'a str, + }, + Target { + target: &'a str, + }, // `end` indicates the position of the second marker - Bold { end: usize }, - Italic { end: usize }, - Strike { end: usize }, - Underline { end: usize }, + Bold { + end: usize, + }, + Italic { + end: usize, + }, + Strike { + end: usize, + }, + Underline { + end: usize, + }, Verbatim(&'a str), Code(&'a str), @@ -68,40 +94,40 @@ impl<'a> Object<'a> { match bytes[pos] { b'@' if bytes[pos + 1] == b'@' => { - if let Some((snippet, off)) = Snippet::parse(&src[pos..]) { - brk!(Object::Snippet(snippet), off, pos); + if let Some((name, value, off)) = snippet::parse(&src[pos..]) { + brk!(Object::Snippet { name, value }, off, pos); } } b'{' if bytes[pos + 1] == b'{' && bytes[pos + 2] == b'{' => { - if let Some((macros, off)) = Macros::parse(&src[pos..]) { - brk!(Object::Macros(macros), off, pos); + if let Some((name, args, off)) = macros::parse(&src[pos..]) { + brk!(Object::Macros { name, args }, off, pos); } } b'<' if bytes[pos + 1] == b'<' => { if bytes[pos + 2] == b'<' { - if let Some((target, off)) = RadioTarget::parse(&src[pos..]) { - brk!(Object::RadioTarget(target), off, pos); + if let Some((target, off)) = radio_target::parse(&src[pos..]) { + brk!(Object::RadioTarget { target }, off, pos); } } else if bytes[pos + 2] != b'\n' { - if let Some((target, off)) = Target::parse(&src[pos..]) { - brk!(Object::Target(target), off, pos); + if let Some((target, off)) = target::parse(&src[pos..]) { + brk!(Object::Target { target }, off, pos); } } } b'[' => { if bytes[pos + 1..].starts_with(b"fn:") { - if let Some((fn_ref, off)) = FnRef::parse(&src[pos..]) { - brk!(Object::FnRef(fn_ref), off, pos); + if let Some((label, def, off)) = fn_ref::parse(&src[pos..]) { + brk!(Object::FnRef { label, def }, off, pos); } } if bytes[pos + 1] == b'[' { - if let Some((link, off)) = Link::parse(&src[pos..]) { - brk!(Object::Link(link), off, pos); + if let Some((path, desc, off)) = link::parse(&src[pos..]) { + brk!(Object::Link { path, desc }, off, pos); } } - if let Some((cookie, off)) = Cookie::parse(&src[pos..]) { + if let Some((cookie, off)) = cookie::parse(&src[pos..]) { brk!(Object::Cookie(cookie), off, pos); } // TODO: Timestamp @@ -112,43 +138,54 @@ impl<'a> Object<'a> { match bytes[pre] { b'*' => { - if let Some(end) = Emphasis::parse(&src[pre..], b'*') { + if let Some(end) = emphasis::parse(&src[pre..], b'*') { brk!(Object::Bold { end }, 1, pre); } } b'+' => { - if let Some(end) = Emphasis::parse(&src[pre..], b'+') { + if let Some(end) = emphasis::parse(&src[pre..], b'+') { brk!(Object::Strike { end }, 1, pre); } } b'/' => { - if let Some(end) = Emphasis::parse(&src[pre..], b'/') { + if let Some(end) = emphasis::parse(&src[pre..], b'/') { brk!(Object::Italic { end }, 1, pre); } } b'_' => { - if let Some(end) = Emphasis::parse(&src[pre..], b'_') { + if let Some(end) = emphasis::parse(&src[pre..], b'_') { brk!(Object::Underline { end }, 1, pre); } } b'=' => { - if let Some(end) = Emphasis::parse(&src[pre..], b'=') { + if let Some(end) = emphasis::parse(&src[pre..], b'=') { brk!(Object::Verbatim(&src[pre + 1..pre + end]), end + 1, pre); } } b'~' => { - if let Some(end) = Emphasis::parse(&src[pre..], b'~') { + if let Some(end) = emphasis::parse(&src[pre..], b'~') { brk!(Object::Code(&src[pre + 1..pre + end]), end + 1, pre); } } b'c' if src[pre..].starts_with("call_") => { - if let Some((call, off)) = InlineCall::parse(&src[pre..]) { - brk!(Object::InlineCall(call), off, pre); + if let Some((name, args, inside_header, end_header, off)) = + inline_call::parse(&src[pre..]) + { + brk!( + Object::InlineCall { + name, + args, + inside_header, + end_header, + }, + off, + pre + ); } } b's' if src[pre..].starts_with("src_") => { - if let Some((src, off)) = InlineSrc::parse(&src[pre..]) { - brk!(Object::InlineSrc(src), off, pre); + if let Some((lang, option, body, off)) = inline_src::parse(&src[pre..]) { + brk!(Object::InlineSrc { lang, option, body }, off, pre); } } _ => (), diff --git a/src/objects/radio_target.rs b/src/objects/radio_target.rs new file mode 100644 index 0000000..4a78eb4 --- /dev/null +++ b/src/objects/radio_target.rs @@ -0,0 +1,44 @@ +use jetscii::Substring; + +// TODO: text-markup, entities, latex-fragments, subscript and superscript +#[inline] +pub fn parse(src: &str) -> Option<(&str, usize)> { + debug_assert!(src.starts_with("<<<")); + + expect!(src, 3, |c| c != b' ')?; + + let end = Substring::new(">>>").find(src).filter(|&i| { + src.as_bytes()[3..i] + .iter() + .all(|&c| c != b'<' && c != b'\n' && c != b'>') + })?; + + if src.as_bytes()[end - 1] == b' ' { + return None; + } + + Some((&src[3..end], end + 3)) +} + +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + assert_eq!( + parse("<<>>").unwrap(), + ("target", "<<>>".len()) + ); + assert_eq!( + parse("<<>>").unwrap(), + ("tar get", "<<>>".len()) + ); + assert_eq!(parse("<<>>"), None); + assert_eq!(parse("<<< target>>>"), None); + assert_eq!(parse("<<>>"), None); + assert_eq!(parse("<<get>>>"), None); + assert_eq!(parse("<<>>"), None); + assert_eq!(parse("<<>"), None); + } +} diff --git a/src/objects/snippet.rs b/src/objects/snippet.rs index c782133..7228be4 100644 --- a/src/objects/snippet.rs +++ b/src/objects/snippet.rs @@ -1,71 +1,46 @@ use jetscii::Substring; use memchr::memchr; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct Snippet<'a> { - pub name: &'a str, - pub value: &'a str, +/// returns (snippet name, snippet value, offset) +#[inline] +pub fn parse(src: &str) -> Option<(&str, &str, usize)> { + debug_assert!(src.starts_with("@@")); + + let name = memchr(b':', src.as_bytes()).filter(|&i| { + i != 2 + && src.as_bytes()[2..i] + .iter() + .all(|&c| c.is_ascii_alphanumeric() || c == b'-') + })?; + + let end = Substring::new("@@") + .find(&src[name + 1..]) + .map(|i| i + name + 1)?; + + Some((&src[2..name], &src[name + 1..end], end + 2)) } -impl<'a> Snippet<'a> { - pub fn parse(src: &'a str) -> Option<(Snippet<'a>, usize)> { - debug_assert!(src.starts_with("@@")); +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; - let name = memchr(b':', src.as_bytes()).filter(|&i| { - i != 2 - && src.as_bytes()[2..i] - .iter() - .all(|&c| c.is_ascii_alphanumeric() || c == b'-') - })?; - - let end = Substring::new("@@") - .find(&src[name + 1..]) - .map(|i| i + name + 1)?; - - Some(( - Snippet { - name: &src[2..name], - value: &src[name + 1..end], - }, - end + 2, - )) + assert_eq!( + parse("@@html:@@").unwrap(), + ("html", "", "@@html:@@".len()) + ); + assert_eq!( + parse("@@latex:any arbitrary LaTeX code@@").unwrap(), + ( + "latex", + "any arbitrary LaTeX code", + "@@latex:any arbitrary LaTeX code@@".len() + ) + ); + assert_eq!(parse("@@html:@@").unwrap(), ("html", "", "@@html:@@".len())); + assert!(parse("@@html:@").is_none()); + assert!(parse("@@html@@").is_none()); + assert!(parse("@@:@@").is_none()); } } - -#[test] -fn parse() { - assert_eq!( - Snippet::parse("@@html:@@").unwrap(), - ( - Snippet { - name: "html", - value: "" - }, - "@@html:@@".len() - ) - ); - assert_eq!( - Snippet::parse("@@latex:any arbitrary LaTeX code@@").unwrap(), - ( - Snippet { - name: "latex", - value: "any arbitrary LaTeX code" - }, - "@@latex:any arbitrary LaTeX code@@".len() - ) - ); - assert_eq!( - Snippet::parse("@@html:@@").unwrap(), - ( - Snippet { - name: "html", - value: "" - }, - "@@html:@@".len() - ) - ); - assert!(Snippet::parse("@@html:@").is_none()); - assert!(Snippet::parse("@@html@@").is_none()); - assert!(Snippet::parse("@@:@@").is_none()); -} diff --git a/src/objects/target.rs b/src/objects/target.rs index 24a8bca..50a6d23 100644 --- a/src/objects/target.rs +++ b/src/objects/target.rs @@ -1,79 +1,40 @@ use jetscii::Substring; -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -// TODO: text-markup, entities, latex-fragments, subscript and superscript -pub struct RadioTarget<'a>(&'a str); +#[inline] +pub fn parse(src: &str) -> Option<(&str, usize)> { + debug_assert!(src.starts_with("<<")); -impl<'a> RadioTarget<'a> { - pub fn parse(src: &'a str) -> Option<(RadioTarget<'a>, usize)> { - debug_assert!(src.starts_with("<<<")); + expect!(src, 2, |c| c != b' ')?; - expect!(src, 3, |c| c != b' ')?; + let end = Substring::new(">>").find(src).filter(|&i| { + src.as_bytes()[2..i] + .iter() + .all(|&c| c != b'<' && c != b'\n' && c != b'>') + })?; - let end = Substring::new(">>>").find(src).filter(|&i| { - src.as_bytes()[3..i] - .iter() - .all(|&c| c != b'<' && c != b'\n' && c != b'>') - })?; + if src.as_bytes()[end - 1] == b' ' { + return None; + } - expect!(src, end - 1, |c| c != b' ')?; + Some((&src[2..end], end + 2)) +} - Some((RadioTarget(&src[3..end]), end + 3)) +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + + assert_eq!(parse("<>").unwrap(), ("target", "<>".len())); + assert_eq!( + parse("<>").unwrap(), + ("tar get", "<>".len()) + ); + assert_eq!(parse("<>"), None); + assert_eq!(parse("<< target>>"), None); + assert_eq!(parse("<>"), None); + assert_eq!(parse("<get>>"), None); + assert_eq!(parse("<>"), None); + assert_eq!(parse("<"), None); } } - -#[cfg_attr(test, derive(PartialEq))] -#[derive(Debug)] -pub struct Target<'a>(&'a str); - -impl<'a> Target<'a> { - pub fn parse(src: &'a str) -> Option<(Target<'a>, usize)> { - debug_assert!(src.starts_with("<<")); - - expect!(src, 2, |c| c != b' ')?; - - let end = Substring::new(">>").find(src).filter(|&i| { - src.as_bytes()[2..i] - .iter() - .all(|&c| c != b'<' && c != b'\n' && c != b'>') - })?; - - expect!(src, end - 1, |c| c != b' ')?; - - Some((Target(&src[2..end]), end + 2)) - } -} - -#[test] -fn parse() { - assert_eq!( - RadioTarget::parse("<<>>").unwrap(), - (RadioTarget("target"), "<<>>".len()) - ); - assert_eq!( - RadioTarget::parse("<<>>").unwrap(), - (RadioTarget("tar get"), "<<>>".len()) - ); - assert_eq!(RadioTarget::parse("<<>>"), None); - assert_eq!(RadioTarget::parse("<<< target>>>"), None); - assert_eq!(RadioTarget::parse("<<>>"), None); - assert_eq!(RadioTarget::parse("<<get>>>"), None); - assert_eq!(RadioTarget::parse("<<>>"), None); - assert_eq!(RadioTarget::parse("<<>"), None); - - assert_eq!( - Target::parse("<>").unwrap(), - (Target("target"), "<>".len()) - ); - assert_eq!( - Target::parse("<>").unwrap(), - (Target("tar get"), "<>".len()) - ); - assert_eq!(Target::parse("<>"), None); - assert_eq!(Target::parse("<< target>>"), None); - assert_eq!(Target::parse("<>"), None); - assert_eq!(Target::parse("<get>>"), None); - assert_eq!(Target::parse("<>"), None); - assert_eq!(Target::parse("<"), None); -} diff --git a/src/parser.rs b/src/parser.rs index 196ec78..bb52e85 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,10 +1,12 @@ +//! Parser + use crate::elements::*; use crate::headline::*; use crate::objects::*; #[cfg_attr(test, derive(PartialEq))] #[derive(Copy, Clone, Debug)] -pub enum Container { +enum Container { Headline { beg: usize, end: usize, @@ -143,14 +145,39 @@ pub enum Event<'a> { Rule, Cookie(Cookie<'a>), - FnRef(FnRef<'a>), - InlineCall(InlineCall<'a>), - InlineSrc(InlineSrc<'a>), - Link(Link<'a>), - Macros(Macros<'a>), - RadioTarget(RadioTarget<'a>), - Snippet(Snippet<'a>), - Target(Target<'a>), + FnRef { + label: Option<&'a str>, + def: Option<&'a str>, + }, + InlineCall { + name: &'a str, + args: &'a str, + inside_header: Option<&'a str>, + end_header: Option<&'a str>, + }, + InlineSrc { + lang: &'a str, + option: Option<&'a str>, + body: &'a str, + }, + Link { + path: &'a str, + desc: Option<&'a str>, + }, + Macros { + name: &'a str, + args: Option<&'a str>, + }, + RadioTarget { + target: &'a str, + }, + Snippet { + name: &'a str, + value: &'a str, + }, + Target { + target: &'a str, + }, BoldBeg, BoldEnd, @@ -176,6 +203,7 @@ pub struct Parser<'a> { } impl<'a> Parser<'a> { + /// creates a new parser from string pub fn new(text: &'a str) -> Parser<'a> { Parser { text, @@ -187,10 +215,12 @@ impl<'a> Parser<'a> { } } + /// returns current offset pub fn offset(&self) -> usize { self.off } + /// returns current stack depth pub fn stack_depth(&self) -> usize { self.stack.len() } @@ -359,16 +389,26 @@ impl<'a> Parser<'a> { Object::Bold { .. } => Event::BoldBeg, Object::Code(c) => Event::Code(c), Object::Cookie(c) => Event::Cookie(c), - Object::FnRef(f) => Event::FnRef(f), - Object::InlineCall(i) => Event::InlineCall(i), - Object::InlineSrc(i) => Event::InlineSrc(i), + Object::FnRef { label, def } => Event::FnRef { label, def }, + Object::InlineCall { + name, + args, + inside_header, + end_header, + } => Event::InlineCall { + name, + args, + inside_header, + end_header, + }, + Object::InlineSrc { lang, option, body } => Event::InlineSrc { lang, option, body }, Object::Italic { .. } => Event::ItalicBeg, - Object::Link(l) => Event::Link(l), - Object::Macros(m) => Event::Macros(m), - Object::RadioTarget(r) => Event::RadioTarget(r), - Object::Snippet(s) => Event::Snippet(s), + Object::Link { path, desc } => Event::Link { path, desc }, + Object::Macros { name, args } => Event::Macros { name, args }, + Object::RadioTarget { target } => Event::RadioTarget { target }, + Object::Snippet { name, value } => Event::Snippet { name, value }, Object::Strike { .. } => Event::StrikeBeg, - Object::Target(t) => Event::Target(t), + Object::Target { target } => Event::Target { target }, Object::Text(t) => Event::Text(t), Object::Underline { .. } => Event::UnderlineBeg, Object::Verbatim(v) => Event::Verbatim(v), @@ -376,7 +416,7 @@ impl<'a> Parser<'a> { } fn next_list_item(&mut self, ident: usize, end: usize) -> Event<'a> { - let (bullet, off, cont_end, end, has_more) = List::parse(&self.text[self.off..end], ident); + let (bullet, off, cont_end, end, has_more) = list::parse(&self.text[self.off..end], ident); self.stack.push(Container::ListItem { cont_end: self.off + cont_end, end: self.off + end, @@ -386,6 +426,7 @@ impl<'a> Parser<'a> { Event::ListItemBeg { bullet } } + #[inline] fn end(&mut self) -> Event<'a> { match self.stack.pop().unwrap() { Container::Bold { .. } => Event::BoldEnd, @@ -482,7 +523,13 @@ fn parse() { use self::Event::*; let expected = vec![ - HeadlineBeg(Headline::new(1, None, None, "Title 1", None)), + HeadlineBeg(Headline { + level: 1, + priority: None, + keyword: None, + title: "Title 1", + tags: None, + }), SectionBeg, ParagraphBeg, BoldBeg, @@ -490,7 +537,13 @@ fn parse() { BoldEnd, ParagraphEnd, SectionEnd, - HeadlineBeg(Headline::new(2, None, None, "Title 2", None)), + HeadlineBeg(Headline { + level: 2, + priority: None, + keyword: None, + title: "Title 2", + tags: None, + }), SectionBeg, ParagraphBeg, UnderlineBeg, @@ -500,7 +553,13 @@ fn parse() { SectionEnd, HeadlineEnd, HeadlineEnd, - HeadlineBeg(Headline::new(1, None, None, "Title 3", None)), + HeadlineBeg(Headline { + level: 1, + priority: None, + keyword: None, + title: "Title 3", + tags: None, + }), SectionBeg, ParagraphBeg, ItalicBeg, @@ -509,7 +568,13 @@ fn parse() { ParagraphEnd, SectionEnd, HeadlineEnd, - HeadlineBeg(Headline::new(1, None, None, "Title 4", None)), + HeadlineBeg(Headline { + level: 1, + priority: None, + keyword: None, + title: "Title 4", + tags: None, + }), SectionBeg, ParagraphBeg, Verbatim("Section 4"),