diff --git a/src/elements/block.rs b/src/elements/block.rs new file mode 100644 index 0000000..80a64ed --- /dev/null +++ b/src/elements/block.rs @@ -0,0 +1,14 @@ +pub enum BlockStart { + name: BlockName, +} + +pub enum BlockName { + Center, + Comment, + Example, + Export, + Quote, + Src, + Verbose, + Special +} diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs new file mode 100644 index 0000000..4be6a59 --- /dev/null +++ b/src/elements/fn_def.rs @@ -0,0 +1,79 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct FnDef<'a> { + pub label: &'a str, + pub contents: &'a str, +} + +#[inline] +fn valid_label(ch: u8) -> bool { + ch.is_ascii_alphanumeric() || ch == b'-' || ch == b'_' +} + +impl<'a> FnDef<'a> { + pub fn parse(src: &'a str) -> Option<(FnDef<'a>, usize)> { + starts_with!(src, "[fn:"); + + let label = until_while!(src, 4, b']', valid_label); + + if label == 4 { + return None; + } + + let end = eol!(src); + + Some(( + FnDef { + label: &src[4..label], + contents: &src[label + 1..end], + }, + end, + )) + } +} + +#[test] +fn parse() { + assert_eq!( + FnDef::parse("[fn:1] https://orgmode.org").unwrap(), + ( + FnDef { + label: "1", + contents: " https://orgmode.org", + }, + "[fn:1] https://orgmode.org".len() + ) + ); + assert_eq!( + FnDef::parse("[fn:word_1] https://orgmode.org").unwrap(), + ( + FnDef { + label: "word_1", + contents: " https://orgmode.org", + }, + "[fn:word_1] https://orgmode.org".len() + ) + ); + assert_eq!( + FnDef::parse("[fn:WORD-1] https://orgmode.org").unwrap(), + ( + FnDef { + label: "WORD-1", + contents: " https://orgmode.org", + }, + "[fn:WORD-1] https://orgmode.org".len() + ) + ); + assert_eq!( + FnDef::parse("[fn:WORD]").unwrap(), + ( + FnDef { + label: "WORD", + contents: "", + }, + "[fn:WORD]".len() + ) + ); + assert!(FnDef::parse("[fn:] https://orgmode.org").is_none()); + assert!(FnDef::parse("[fn:wor d] https://orgmode.org").is_none()); + assert!(FnDef::parse("[fn:WORD https://orgmode.org").is_none()); +} diff --git a/src/elements/keyword.rs b/src/elements/keyword.rs new file mode 100644 index 0000000..3df61f1 --- /dev/null +++ b/src/elements/keyword.rs @@ -0,0 +1,175 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct Keyword<'a> { + pub key: &'a str, + pub value: &'a str, +} + +impl<'a> Keyword<'a> { + pub fn parse(src: &'a str) -> Option<(Keyword<'a>, usize)> { + starts_with!(src, "#+"); + + let key = until_while!(src, 2, b':', |c: u8| c.is_ascii_uppercase() || c == b'_'); + + let end = eol!(src); + + if end == key + 1 { + Some(( + Keyword { + key: &src[2..key], + value: "", + }, + end, + )) + } else { + let space = position!(src, key + 1, |c| !c.is_ascii_whitespace()); + + Some(( + Keyword { + key: &src[2..key], + value: &src[space..end], + }, + end, + )) + } + } +} + +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct AffKeyword<'a> { + pub key: AffKeywordKey<'a>, + pub option: Option<&'a str>, + pub value: &'a str, +} + +#[cfg_attr(test, derive(PartialEq, Debug))] +pub enum AffKeywordKey<'a> { + Caption, + Header, + Name, + Plot, + Results, + AttrBackend(&'a str), +} + +// impl<'a> AffKeyword<'a> { +// pub fn parse(src: &'a str) -> Option> { +// if src.len() < 3 && !src.starts_with("#+") { +// return None; +// } + +// let end = src.nextline(); +// let colon = src[2..end].until(b':'); +// let key_index = src[2..end] +// .as_bytes() +// .iter() +// .position(|&c| !(c.is_ascii_alphanumeric() || c == b'-' || c == b'_')); +// // .unwrap_or(2); + +// // let key = match parse_key(&src[2..key_index]) { + +// // } + +// // if key.is_none() { +// // return None; +// // } + +// if let Some(key_index) = key { +// // if src.as_bytes()[key_index] = b':' +// parse_key(&src[2..key_index]) +// .filter(|_| src.as_bytes()[colon + 1] == b' ') +// .map(|key| { +// if src.as_bytes()[key_index + 1] == b'[' && src.as_bytes()[colon - 1] == b']' { +// AffKeyword { +// key, +// value: &s[colon + 2..end], +// option: Some(&s[key_index + 2..colon - 1]), +// } +// } else { +// AffKeyword { +// key, +// value: &s[colon + 2..end], +// option: None, +// } +// } +// }) +// } else { +// None +// } +// } +// } + +fn parse_key<'a>(key: &'a str) -> Option> { + match key { + "CAPTION" => Some(AffKeywordKey::Caption), + "HEADER" => Some(AffKeywordKey::Header), + "NAME" => Some(AffKeywordKey::Name), + "PLOT" => Some(AffKeywordKey::Plot), + "RESULTS" => Some(AffKeywordKey::Results), + k => { + if k.starts_with("ATTR_") + && k[5..] + .as_bytes() + .iter() + .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') + { + Some(AffKeywordKey::AttrBackend(&k[5..])) + } else { + None + } + } + } +} + +#[test] +fn parse() { + assert_eq!( + Keyword::parse("#+KEY:").unwrap(), + ( + Keyword { + key: "KEY", + value: "", + }, + "#+KEY:".len() + ) + ); + assert_eq!( + Keyword::parse("#+KEY: VALUE").unwrap(), + ( + Keyword { + key: "KEY", + value: "VALUE", + }, + "#+KEY: VALUE".len() + ) + ); + assert_eq!( + Keyword::parse("#+K_E_Y: VALUE").unwrap(), + ( + Keyword { + key: "K_E_Y", + value: "VALUE", + }, + "#+K_E_Y: VALUE".len() + ) + ); + assert_eq!( + Keyword::parse("#+KEY:VALUE").unwrap(), + ( + Keyword { + key: "KEY", + value: "VALUE", + }, + "#+KEY:VALUE".len() + ) + ); + assert!(Keyword::parse("#+KE Y: VALUE").is_none()); + assert!(Keyword::parse("#+ KEY: VALUE").is_none()); + assert!(Keyword::parse("# +KEY: VALUE").is_none()); + assert!(Keyword::parse(" #+KEY: VALUE").is_none()); +} + +// #[test] +// fn parse_affiliated_keyword() { +// assert_eq!(AffKeyword::parse("#+KEY: VALUE"), None); +// assert_eq!(AffKeyword::parse("#+CAPTION: VALUE"), None); +// } diff --git a/src/elements/mod.rs b/src/elements/mod.rs new file mode 100644 index 0000000..437e372 --- /dev/null +++ b/src/elements/mod.rs @@ -0,0 +1,18 @@ +pub mod fn_def; +pub mod keyword; +pub mod rule; + +pub use self::fn_def::FnDef; +pub use self::keyword::Keyword; +pub use self::rule::Rule; + +pub enum Element<'a> { + Paragraph(&'a str), +} + +impl<'a> Element<'a> { + pub fn find_elem(src: &'a str) -> (Element<'a>, usize) { + // TODO + (Element::Paragraph(src), src.len()) + } +} diff --git a/src/elements/rule.rs b/src/elements/rule.rs new file mode 100644 index 0000000..4623941 --- /dev/null +++ b/src/elements/rule.rs @@ -0,0 +1,32 @@ +pub struct Rule; + +impl Rule { + pub fn parse(src: &str) -> Option { + let end = eol!(src); + let leading = until_while!(src, 0, b'-', |c| c == b' ' || c == b'\t'); + if src[leading..end].chars().all(|c| c == '-') && end - leading > 4 { + Some(end) + } else { + None + } + } +} + +#[test] +fn parse() { + assert!(Rule::parse("-----").is_some()); + assert!(Rule::parse("--------").is_some()); + assert!(Rule::parse(" -----").is_some()); + assert!(Rule::parse("\t\t-----").is_some()); + + assert!(Rule::parse("").is_none()); + assert!(Rule::parse("----").is_none()); + assert!(Rule::parse(" ----").is_none()); + assert!(Rule::parse(" 0----").is_none()); + assert!(Rule::parse("0 ----").is_none()); + assert!(Rule::parse("0------").is_none()); + assert!(Rule::parse("----0----").is_none()); + assert!(Rule::parse("\t\t----").is_none()); + assert!(Rule::parse("------0").is_none()); + assert!(Rule::parse("----- 0").is_none()); +} diff --git a/src/headline.rs b/src/headline.rs new file mode 100644 index 0000000..755e27d --- /dev/null +++ b/src/headline.rs @@ -0,0 +1,274 @@ +#[derive(PartialEq, Debug)] +pub struct Headline<'a> { + pub level: usize, + pub priority: Option, + pub tags: Option<&'a str>, + pub title: &'a str, + pub keyword: Option<&'a str>, +} + +impl<'a> Headline<'a> { + #[inline] + fn parse_priority(src: &str) -> Option { + let bytes = src.as_bytes(); + if bytes.len() > 4 + && bytes[0] == b'[' + && bytes[1] == b'#' + && bytes[2].is_ascii_uppercase() + && bytes[3] == b']' + && bytes[4] == b' ' + { + Some(bytes[2] as char) + } else { + None + } + } + + #[inline] + fn parse_keyword(src: &'a str) -> Option<(&'a str, usize)> { + let mut pos = 0; + while pos < src.len() { + if src.as_bytes()[pos] == b' ' { + break; + } else if src.as_bytes()[pos].is_ascii_uppercase() { + pos += 1; + } else { + return None; + } + } + if pos == src.len() || src[0..pos] == *"COMMENT" { + None + } else { + Some((&src[0..pos], pos)) + } + } + + #[inline] + fn parse_tags(src: &'a str) -> (Option<&'a str>, usize) { + if let Some(last) = src.split_whitespace().last() { + if last.starts_with(':') && last.ends_with(':') { + (Some(last), src.rfind(':').unwrap() - last.len()) + } else { + (None, src.len()) + } + } else { + (None, src.len()) + } + } + + pub fn parse(src: &'a str) -> (Headline<'a>, usize, usize) { + let mut level = 0; + loop { + if src.as_bytes()[level] == b'*' { + level += 1; + } else { + break; + } + } + + let eol = eol!(src); + let end = Headline::find_level(&src[eol..], level) + eol; + + let mut title_start = skip_whitespace!(src, level); + + let keyword = match Headline::parse_keyword(&src[title_start..eol]) { + Some((k, l)) => { + title_start += l; + Some(k) + } + None => None, + }; + + title_start = skip_whitespace!(src, title_start); + + let priority = match Headline::parse_priority(&src[title_start..eol]) { + Some(p) => { + title_start += 4; + Some(p) + } + None => None, + }; + + title_start = skip_whitespace!(src, title_start); + + let (tags, title_off) = Headline::parse_tags(&src[title_start..eol]); + + // println!("{:?} {:?} {:?}", keyword, priority, tags); + // println!("{:?} {}", title_start, title_off); + + ( + Headline::new( + level, + keyword, + priority, + &src[title_start..title_start + title_off], + tags, + ), + eol, + end, + ) + } + + // TODO: optimize + pub fn find_level(src: &str, level: usize) -> usize { + let mut pos = 0; + let end; + 'outer: loop { + if pos >= src.len() { + end = src.len(); + break; + } + + if src.as_bytes()[pos] == b'*' && (pos == 0 || src.as_bytes()[pos - 1] == b'\n') { + let pos_ = pos; + 'inner: loop { + if pos >= src.len() { + end = src.len(); + break 'outer; + } + if src.as_bytes()[pos] == b'*' { + pos += 1; + } else if src.as_bytes()[pos] == b' ' && pos - pos_ <= level { + end = pos_; + break 'outer; + } else { + break 'inner; + } + } + } + + pos += 1; + } + + end + } + + pub fn is_commented(&self) -> bool { + self.title.starts_with("COMMENT ") + } + + pub fn is_archived(&self) -> bool { + self.tags + .map(|tags| tags[1..].split_terminator(':').any(|t| t == "ARCHIVE")) + .unwrap_or(false) + } + + pub fn new( + level: usize, + keyword: Option<&'a str>, + priority: Option, + title: &'a str, + tags: Option<&'a str>, + ) -> Headline<'a> { + Headline { + level, + keyword, + priority, + title, + tags, + } + } +} + +#[test] +fn parse() { + assert_eq!( + Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%:").0, + Headline::new( + 4, + Some("TODO"), + Some('A'), + "COMMENT Title", + Some(":tag:a2%:"), + ), + ); + assert_eq!( + Headline::parse("**** ToDO [#A] COMMENT Title :tag:a2%:").0, + Headline { + level: 4, + priority: None, + tags: Some(":tag:a2%:"), + title: "ToDO [#A] COMMENT Title", + keyword: None, + }, + ); + assert_eq!( + Headline::parse("**** T0DO [#A] COMMENT Title :tag:a2%:").0, + Headline { + level: 4, + priority: None, + tags: Some(":tag:a2%:"), + title: "T0DO [#A] COMMENT Title", + keyword: None, + }, + ); + assert_eq!( + Headline::parse("**** TODO [#1] COMMENT Title :tag:a2%:").0, + Headline { + level: 4, + priority: None, + tags: Some(":tag:a2%:"), + title: "[#1] COMMENT Title", + keyword: Some("TODO") + }, + ); + assert_eq!( + Headline::parse("**** TODO [#a] COMMENT Title :tag:a2%:").0, + Headline { + level: 4, + priority: None, + tags: Some(":tag:a2%:"), + title: "[#a] COMMENT Title", + keyword: Some("TODO") + }, + ); + assert_eq!( + Headline::parse("**** TODO [#A] COMMENT Title :tag:a2%").0, + Headline { + level: 4, + priority: Some('A'), + tags: None, + title: "COMMENT Title :tag:a2%", + keyword: Some("TODO"), + }, + ); + assert_eq!( + Headline::parse("**** TODO [#A] COMMENT Title tag:a2%:").0, + Headline { + level: 4, + priority: Some('A'), + tags: None, + title: "COMMENT Title tag:a2%:", + keyword: Some("TODO"), + }, + ); + assert_eq!( + Headline::parse("**** COMMENT Title tag:a2%:").0, + Headline { + level: 4, + priority: None, + tags: None, + title: "COMMENT Title tag:a2%:", + keyword: None, + }, + ); +} + +#[test] +fn is_commented() { + assert!(Headline::parse("* COMMENT Title").0.is_commented()); + assert!(!Headline::parse("* Title").0.is_commented()); + assert!(!Headline::parse("* C0MMENT Title").0.is_commented()); + assert!(!Headline::parse("* comment Title").0.is_commented()); +} + +#[test] +fn is_archived() { + assert!(Headline::parse("* Title :ARCHIVE:").0.is_archived()); + assert!(Headline::parse("* Title :tag:ARCHIVE:").0.is_archived()); + assert!(Headline::parse("* Title :ARCHIVE:tag:").0.is_archived()); + assert!(!Headline::parse("* Title").0.is_commented()); + assert!(!Headline::parse("* Title :ARCHIVED:").0.is_archived()); + assert!(!Headline::parse("* Title :ARCHIVES:").0.is_archived()); + assert!(!Headline::parse("* Title :archive:").0.is_archived()); +} diff --git a/src/lib.rs b/src/lib.rs index 31e1bb2..79e2992 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ -#[cfg(test)] -mod tests { - #[test] - fn it_works() { - assert_eq!(2 + 2, 4); - } -} +#[macro_use] +mod utils; + +mod elements; +mod headline; +mod objects; +mod parser; diff --git a/src/objects/cookie.rs b/src/objects/cookie.rs new file mode 100644 index 0000000..2311405 --- /dev/null +++ b/src/objects/cookie.rs @@ -0,0 +1,67 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct Cookie<'a> { + value: &'a str, +} + +impl<'a> Cookie<'a> { + pub fn parse(src: &'a str) -> Option<(Cookie<'a>, usize)> { + starts_with!(src, '['); + + let num1 = until_while!(src, 1, |c| c == b'%' || c == b'/', |c: u8| c + .is_ascii_digit()); + + if src.len() > num1 && src.as_bytes()[num1 + 1] == b']' { + Some(( + Cookie { + value: &src[0..num1 + 2], + }, + num1 + 2, + )) + } else { + let num2 = until_while!(src, num1 + 1, b']', |c: u8| c.is_ascii_digit()); + Some(( + Cookie { + value: &src[0..=num2], + }, + num2 + 1, + )) + } + } +} + +#[test] +fn parse() { + assert_eq!( + Cookie::parse("[1/10]").unwrap(), + (Cookie { value: "[1/10]" }, "[1/10]".len()) + ); + assert_eq!( + Cookie::parse("[1/1000]").unwrap(), + (Cookie { value: "[1/1000]" }, "[1/1000]".len()) + ); + assert_eq!( + Cookie::parse("[10%]").unwrap(), + (Cookie { value: "[10%]" }, "[10%]".len()) + ); + assert_eq!( + Cookie::parse("[%]").unwrap(), + (Cookie { value: "[%]" }, "[%]".len()) + ); + assert_eq!( + Cookie::parse("[/]").unwrap(), + (Cookie { value: "[/]" }, "[/]".len()) + ); + assert_eq!( + Cookie::parse("[100/]").unwrap(), + (Cookie { value: "[100/]" }, "[100/]".len()) + ); + assert_eq!( + Cookie::parse("[/100]").unwrap(), + (Cookie { value: "[/100]" }, "[/100]".len()) + ); + + assert!(Cookie::parse("[10% ]").is_none(),); + assert!(Cookie::parse("[1//100]").is_none(),); + assert!(Cookie::parse("[1\\100]").is_none(),); + assert!(Cookie::parse("[10%%]").is_none(),); +} diff --git a/src/objects/emphasis.rs b/src/objects/emphasis.rs new file mode 100644 index 0000000..4aae3e0 --- /dev/null +++ b/src/objects/emphasis.rs @@ -0,0 +1,49 @@ +pub struct Emphasis; + +impl Emphasis { + pub fn parse(src: &str, marker: u8) -> Option<(&'_ str, usize)> { + expect!(src, 1, |c: u8| !c.is_ascii_whitespace()); + + let mut lines = 0; + let end = until_while!(src, 1, marker, |c| { + if c == b'\n' { + lines += 1; + } + lines < 2 + }); + + expect!(src, end - 1, |c: u8| !c.is_ascii_whitespace()); + + if end < src.len() - 1 { + expect!(src, end + 1, |ch| ch == b' ' + || ch == b'-' + || ch == b'.' + || ch == b',' + || ch == b':' + || ch == b'!' + || ch == b'?' + || ch == b'\'' + || ch == b')' + || ch == b'}'); + } + + Some((&src[1..end], end + 1)) + } +} + +#[test] +fn parse() { + assert_eq!( + Emphasis::parse("*bold*", b'*').unwrap(), + ("bold", "*bold*".len()) + ); + assert_eq!( + Emphasis::parse("*bo\nld*", b'*').unwrap(), + ("bo\nld", "*bo\nld*".len()) + ); + assert!(Emphasis::parse("*bold*a", b'*').is_none()); + assert!(Emphasis::parse("*bold*", b'/').is_none()); + assert!(Emphasis::parse("*bold *", b'*').is_none()); + assert!(Emphasis::parse("* bold*", b'*').is_none()); + assert!(Emphasis::parse("*b\nol\nd*", b'*').is_none()); +} diff --git a/src/objects/entity.rs b/src/objects/entity.rs new file mode 100644 index 0000000..4c92ef9 --- /dev/null +++ b/src/objects/entity.rs @@ -0,0 +1,38 @@ +pub struct Entity<'a> { + pub name: &'a str, + pub contents: Option<&'a str>, +} + +impl<'a> Entity<'a> { + pub fn parse(src: &'a str) -> Option<(Entity<'a>, usize)> { + expect!(src, 0, b'\\'); + + let name = position!(src, 1, |c| !c.is_ascii_alphabetic()); + + if src.as_bytes()[name] == b'[' { + Some(( + Entity { + name: &src[1..name], + contents: None, + }, + name, + )) + } else if src.as_bytes()[name] == b'{' { + Some(( + Entity { + name: &src[1..name], + contents: None, + }, + name, + )) + } else { + Some(( + Entity { + name: &src[1..name], + contents: None, + }, + name, + )) + } + } +} diff --git a/src/objects/fn_ref.rs b/src/objects/fn_ref.rs new file mode 100644 index 0000000..2b1f777 --- /dev/null +++ b/src/objects/fn_ref.rs @@ -0,0 +1,98 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct FnRef<'a> { + label: Option<&'a str>, + definition: Option<&'a str>, +} + +fn valid_label(ch: u8) -> bool { + ch.is_ascii_alphanumeric() || ch == b'-' || ch == b'_' +} + +impl<'a> FnRef<'a> { + pub fn parse(src: &'a str) -> Option<(FnRef<'a>, usize)> { + starts_with!(src, "[fn:"); + + let label = until_while!(src, 4, |c| c == b']' || c == b':', valid_label); + + if src.as_bytes()[label] == b':' { + let mut pairs = 1; + let def = until!(src[label..], |c| { + if c == b'[' { + pairs += 1; + } else if c == b']' { + pairs -= 1; + } + c == b']' && pairs == 0 + })? + label; + + Some(( + FnRef { + label: if label == 4 { + None + } else { + Some(&src[4..label]) + }, + definition: Some(&src[label + 1..def]), + }, + def + 1, + )) + } else { + Some(( + FnRef { + label: if label == 4 { + None + } else { + Some(&src[4..label]) + }, + definition: None, + }, + label + 1, + )) + } + } +} + +#[test] +fn parse() { + assert_eq!( + FnRef::parse("[fn:1]").unwrap(), + ( + FnRef { + label: Some("1"), + definition: None, + }, + "[fn:1]".len() + ) + ); + assert_eq!( + FnRef::parse("[fn:1:2]").unwrap(), + ( + FnRef { + label: Some("1"), + definition: Some("2"), + }, + "[fn:1:2]".len() + ) + ); + assert_eq!( + FnRef::parse("[fn::2]").unwrap(), + ( + FnRef { + label: None, + definition: Some("2"), + }, + "[fn::2]".len() + ) + ); + assert_eq!( + FnRef::parse("[fn::[]]").unwrap(), + ( + FnRef { + label: None, + definition: Some("[]"), + }, + "[fn::[]]".len() + ) + ); + assert!(FnRef::parse("[fn::[]").is_none()); +} diff --git a/src/objects/fragment.rs b/src/objects/fragment.rs new file mode 100644 index 0000000..a5fa8d0 --- /dev/null +++ b/src/objects/fragment.rs @@ -0,0 +1,10 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct Fragment<'a> { + value: &'a str, +} + +impl<'a> Fragment<'a> { + pub fn parse(src: &'a str) -> Option<(Fragment<'a>, usize)> { + None + } +} diff --git a/src/objects/inline_call.rs b/src/objects/inline_call.rs new file mode 100644 index 0000000..ecbbdde --- /dev/null +++ b/src/objects/inline_call.rs @@ -0,0 +1,105 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct InlineCall<'a> { + pub name: &'a str, + pub args: &'a str, + // header args for block + pub inside_header: Option<&'a str>, + // header args for call line + pub end_header: Option<&'a str>, +} + +impl<'a> InlineCall<'a> { + pub fn parse(src: &'a str) -> Option<(InlineCall, usize)> { + starts_with!(src, "call_"); + + let mut pos = until_while!(src, 5, |c| c == b'[' || c == b'(', |c: u8| c + .is_ascii_graphic()); + let mut pos_; + + let name = &src[5..pos]; + + let inside_header = if src.as_bytes()[pos] == b'[' { + pos_ = pos; + pos = until_while!(src, pos, b']', |c: u8| c != b'\n') + 1; + expect!(src, pos, b'('); + Some(&src[pos_ + 1..pos - 1]) + } else { + None + }; + + pos_ = pos; + pos = until_while!(src, pos, b')', |c| c != b'\n'); + let args = &src[pos_ + 1..pos]; + + let end_header = if src.len() > pos + 1 && src.as_bytes()[pos + 1] == b'[' { + pos_ = pos; + pos = until_while!(src, pos_ + 1, |c| c == b']', |c: u8| c != b'\n' + && c != b')'); + Some(&src[pos_ + 2..pos]) + } else { + None + }; + + Some(( + InlineCall { + name, + inside_header, + args, + end_header, + }, + pos + 1, + )) + } +} + +#[test] +fn parse() { + assert_eq!( + InlineCall::parse("call_square(4)").unwrap(), + ( + InlineCall { + name: "square", + args: "4", + inside_header: None, + end_header: None, + }, + "call_square(4)".len() + ) + ); + assert_eq!( + InlineCall::parse("call_square[:results output](4)").unwrap(), + ( + InlineCall { + name: "square", + args: "4", + inside_header: Some(":results output"), + end_header: None, + }, + "call_square[:results output](4)".len() + ) + ); + assert_eq!( + InlineCall::parse("call_square(4)[:results html]").unwrap(), + ( + InlineCall { + name: "square", + args: "4", + inside_header: None, + end_header: Some(":results html"), + }, + "call_square(4)[:results html]".len() + ) + ); + assert_eq!( + InlineCall::parse("call_square[:results output](4)[:results html]").unwrap(), + ( + InlineCall { + name: "square", + args: "4", + inside_header: Some(":results output"), + end_header: Some(":results html"), + }, + "call_square[:results output](4)[:results html]".len() + ) + ); +} diff --git a/src/objects/inline_src.rs b/src/objects/inline_src.rs new file mode 100644 index 0000000..83d3942 --- /dev/null +++ b/src/objects/inline_src.rs @@ -0,0 +1,73 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct InlineSrc<'a> { + pub lang: &'a str, + pub option: Option<&'a str>, + pub body: &'a str, +} + +impl<'a> InlineSrc<'a> { + pub fn parse(src: &'a str) -> Option<(InlineSrc, usize)> { + starts_with!(src, "src_"); + + let lang = until_while!(src, 4, |c| c == b'[' || c == b'{', |c: u8| !c + .is_ascii_whitespace()); + + if lang == 4 { + return None; + } + + if src.as_bytes()[lang] == b'[' { + let option = until_while!(src, lang, b']', |c| c != b'\n'); + let body = until_while!(src, option, b'}', |c| c != b'\n'); + + Some(( + InlineSrc { + lang: &src[4..lang], + option: Some(&src[lang + 1..option]), + body: &src[option + 2..body], + }, + body + 1, + )) + } else { + let body = until_while!(src, lang, b'}', |c| c != b'\n'); + + Some(( + InlineSrc { + lang: &src[4..lang], + option: None, + body: &src[lang + 1..body], + }, + body + 1, + )) + } + } +} + +#[test] +fn parse() { + assert_eq!( + InlineSrc::parse("src_C{int a = 0;}").unwrap(), + ( + InlineSrc { + lang: "C", + option: None, + body: "int a = 0;" + }, + "src_C{int a = 0;}".len() + ) + ); + assert_eq!( + InlineSrc::parse("src_xml[:exports code]{text}").unwrap(), + ( + InlineSrc { + lang: "xml", + option: Some(":exports code"), + body: "text" + }, + "src_xml[:exports code]{text}".len() + ) + ); + assert!(InlineSrc::parse("src_xml[:exports code]{text").is_none()); + assert!(InlineSrc::parse("src_[:exports code]{text}").is_none()); + assert!(InlineSrc::parse("src_xml[:exports code]").is_none()); +} diff --git a/src/objects/link.rs b/src/objects/link.rs new file mode 100644 index 0000000..11796b6 --- /dev/null +++ b/src/objects/link.rs @@ -0,0 +1,68 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct Link<'a> { + pub path: &'a str, + pub desc: Option<&'a str>, +} + +impl<'a> Link<'a> { + pub fn parse(src: &'a str) -> Option<(Link<'a>, usize)> { + starts_with!(src, "[["); + + let path = until_while!(src, 2, b']', |c| c != b']' + && c != b'<' + && c != b'>' + && c != b'\n'); + + if cond_eq!(src, path + 1, b']') { + Some(( + Link { + path: &src[2..path], + desc: None, + }, + path + 2, + )) + } else if src.as_bytes()[path + 1] == b'[' { + let desc = until_while!(src, path + 2, b']', |c| c != b']' + && c != b'[' + && c != b'\n'); + + expect!(src, desc + 1, b']'); + + Some(( + Link { + path: &src[2..path], + desc: Some(&src[path + 2..desc]), + }, + desc + 2, + )) + } else { + None + } + } +} + +#[test] +fn parse() { + assert_eq!( + Link::parse("[[#id]]").unwrap(), + ( + Link { + path: "#id", + desc: None, + }, + "[[#id]]".len() + ) + ); + assert_eq!( + Link::parse("[[#id][desc]]").unwrap(), + ( + Link { + path: "#id", + desc: Some("desc"), + }, + "[[#id][desc]]".len() + ) + ); + assert!(Link::parse("[[#id][desc]").is_none()); + assert!(Link::parse("[#id][desc]]").is_none()); +} diff --git a/src/objects/macros.rs b/src/objects/macros.rs new file mode 100644 index 0000000..4e35539 --- /dev/null +++ b/src/objects/macros.rs @@ -0,0 +1,75 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct Macros<'a> { + pub name: &'a str, + pub args: Option<&'a str>, +} + +fn valid_name(ch: u8) -> bool { + ch.is_ascii_alphanumeric() || ch == b'-' && ch == b'_' +} + +impl<'a> Macros<'a> { + pub fn parse(src: &'a str) -> Option<(Macros<'a>, usize)> { + starts_with!(src, "{{{"); + + expect!(src, 3, |c: u8| c.is_ascii_alphabetic()); + + let name = until_while!(src, 3, |c| c == b'}' || c == b'(', valid_name); + + if src.as_bytes()[name] == b'}' { + expect!(src, name + 1, b'}'); + expect!(src, name + 2, b'}'); + Some(( + Macros { + name: &src[3..name], + args: None, + }, + name + 3, + )) + } else { + let end = find!(src, name, "}}}"); + expect!(src, end - 1, b')'); + Some(( + Macros { + name: &src[3..name], + args: if name == end { + None + } else { + Some(&src[name + 1..end - 1]) + }, + }, + end + 3, + )) + } + } +} + +#[test] +fn parse() { + assert_eq!( + Macros::parse("{{{poem(red,blue)}}}").unwrap(), + ( + Macros { + name: "poem", + args: Some("red,blue") + }, + "{{{poem(red,blue)}}}".len() + ) + ); + assert_eq!( + Macros::parse("{{{author}}}").unwrap(), + ( + Macros { + name: "author", + args: None, + }, + "{{{author}}}".len() + ) + ); + assert!(Macros::parse("{{author}}}").is_none()); + assert!(Macros::parse("{{{0uthor}}}").is_none()); + assert!(Macros::parse("{{{author}}").is_none()); + assert!(Macros::parse("{{{poem(}}}").is_none()); + assert!(Macros::parse("{{{poem)}}}").is_none()); + // FIXME: assert_eq!(Macros::parse("{{{poem())}}}"), None); +} diff --git a/src/objects/mod.rs b/src/objects/mod.rs new file mode 100644 index 0000000..c3afcfb --- /dev/null +++ b/src/objects/mod.rs @@ -0,0 +1,91 @@ +mod cookie; +mod emphasis; +mod entity; +mod fn_ref; +mod fragment; +mod inline_call; +mod inline_src; +mod link; +mod macros; +mod snippet; +mod target; + +pub use self::cookie::Cookie; +pub use self::emphasis::Emphasis; +pub use self::fn_ref::FnRef; +pub use self::inline_call::InlineCall; +pub use self::inline_src::InlineSrc; +pub use self::link::Link; +pub use self::macros::Macros; +pub use self::snippet::Snippet; +pub use self::target::{RadioTarget, Target}; + +const ACTIVE_TAB: [u8; 6] = [b' ', b'"', b'(', b'{', b'\'', b'\n']; + +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct Objects<'a> { + text: &'a str, + off: usize, +} + +impl<'a> Objects<'a> { + pub fn new(text: &'a str) -> Objects<'a> { + Objects { text, off: 0 } + } +} + +pub enum Object<'a> { + Cookie(Cookie<'a>), + FnRef(FnRef<'a>), + InlineCall(InlineCall<'a>), + InlineSrc(InlineSrc<'a>), + Link(Link<'a>), + Macros(Macros<'a>), + RadioTarget(RadioTarget<'a>), + Snippet(Snippet<'a>), + Target(Target<'a>), + + Bold(&'a str), + Verbatim(&'a str), + Italic(&'a str), + Strike(&'a str), + Underline(&'a str), + Code(&'a str), + + Text(&'a str), +} + +impl<'a> Object<'a> { + pub fn parse(src: &'a str) -> (Object<'a>, usize) { + macro_rules! parse { + ($ty:ident) => { + $ty::parse(src).map(|(s, l)| (Object::$ty(s), l)) + }; + } + + macro_rules! parse_emphasis { + ($mk:tt, $ty:ident) => { + Emphasis::parse(src, $mk).map(|(s, l)| (Object::$ty(s), l)) + }; + } + + (match src.as_bytes()[0] { + b'@' => parse!(Snippet), + b'[' => parse!(FnRef) + .or_else(|| parse!(Link)) + .or_else(|| parse!(Cookie)), + b's' => parse!(InlineSrc), + b'c' => parse!(InlineCall), + b'{' => parse!(Macros), + b'<' => parse!(RadioTarget).or_else(|| parse!(Target)), + b'*' => parse_emphasis!(b'*', Bold), + b'=' => parse_emphasis!(b'=', Verbatim), + b'/' => parse_emphasis!(b'/', Italic), + b'+' => parse_emphasis!(b'+', Strike), + b'_' => parse_emphasis!(b'_', Underline), + b'~' => parse_emphasis!(b'~', Code), + _ => None, + }) + .unwrap_or((Object::Text(&src[0..1]), 1)) + } +} diff --git a/src/objects/snippet.rs b/src/objects/snippet.rs new file mode 100644 index 0000000..aa6e68f --- /dev/null +++ b/src/objects/snippet.rs @@ -0,0 +1,65 @@ +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct Snippet<'a> { + pub name: &'a str, + pub value: &'a str, +} + +impl<'a> Snippet<'a> { + pub fn parse(src: &'a str) -> Option<(Snippet<'a>, usize)> { + starts_with!(src, "@@"); + + let name = until_while!(src, 2, b':', |c: u8| c.is_ascii_alphanumeric() || c == b'-'); + + if name == 2 { + return None; + } + + let end = find!(src, name + 1, "@@"); + + Some(( + Snippet { + name: &src[2..name], + value: &src[name + 1..end], + }, + end + 2, + )) + } +} + +#[test] +fn parse() { + assert_eq!( + Snippet::parse("@@html:@@").unwrap(), + ( + Snippet { + name: "html", + value: "" + }, + "@@html:@@".len() + ) + ); + assert_eq!( + Snippet::parse("@@latex:any arbitrary LaTeX code@@").unwrap(), + ( + Snippet { + name: "latex", + value: "any arbitrary LaTeX code" + }, + "@@latex:any arbitrary LaTeX code@@".len() + ) + ); + assert_eq!( + Snippet::parse("@@html:@@").unwrap(), + ( + Snippet { + name: "html", + value: "" + }, + "@@html:@@".len() + ) + ); + assert!(Snippet::parse("@@html:@").is_none()); + assert!(Snippet::parse("@html:@@").is_none()); + assert!(Snippet::parse("@@html@@").is_none()); + assert!(Snippet::parse("@@:@@").is_none()); +} diff --git a/src/objects/target.rs b/src/objects/target.rs new file mode 100644 index 0000000..2131e96 --- /dev/null +++ b/src/objects/target.rs @@ -0,0 +1,72 @@ +use objects::Objects; + +#[cfg_attr(test, derive(PartialEq, Debug))] +// TODO: text-markup, entities, latex-fragments, subscript and superscript +pub struct RadioTarget<'a>(Objects<'a>); + +impl<'a> RadioTarget<'a> { + pub fn parse(src: &'a str) -> Option<(RadioTarget<'a>, usize)> { + starts_with!(src, "<<<"); + expect!(src, 3, |c| c != b' '); + + let end = until_while!(src, 3, b'>', |c| c != b'<' && c != b'\n'); + + expect!(src, end - 1, |c| c != b' '); + expect!(src, end + 1, b'>'); + expect!(src, end + 2, b'>'); + + Some((RadioTarget(Objects::new(&src[3..end])), end + 3)) + } +} + +#[cfg_attr(test, derive(PartialEq, Debug))] +pub struct Target<'a>(&'a str); + +impl<'a> Target<'a> { + pub fn parse(src: &'a str) -> Option<(Target<'a>, usize)> { + starts_with!(src, "<<"); + expect!(src, 2, |c| c != b' '); + + let end = until_while!(src, 2, b'>', |c| c != b'<' && c != b'\n'); + + expect!(src, end - 1, |c| c != b' '); + expect!(src, end + 1, b'>'); + + Some((Target(&src[2..end]), end + 2)) + } +} + +#[test] +fn parse() { + assert_eq!( + RadioTarget::parse("<<>>").unwrap(), + (RadioTarget(Objects::new("target")), "<<>>".len()) + ); + assert_eq!( + RadioTarget::parse("<<>>").unwrap(), + (RadioTarget(Objects::new("tar get")), "<<>>".len()) + ); + assert!(RadioTarget::parse("<<>>").is_none()); + assert!(RadioTarget::parse("<<< target>>>").is_none()); + assert!(RadioTarget::parse("<<>>").is_none()); + assert!(RadioTarget::parse("<<get>>>").is_none()); + assert!(RadioTarget::parse("<<>>").is_none()); + assert!(RadioTarget::parse("<>>").is_none()); + assert!(RadioTarget::parse("<<>").is_none()); + + assert_eq!( + Target::parse("<>").unwrap(), + (Target("target"), "<>".len()) + ); + assert_eq!( + Target::parse("<>").unwrap(), + (Target("tar get"), "<>".len()) + ); + assert!(Target::parse("<>").is_none()); + assert!(Target::parse("<< target>>").is_none()); + assert!(Target::parse("<>").is_none()); + assert!(Target::parse("<get>>").is_none()); + assert!(Target::parse("<>").is_none()); + assert!(Target::parse(">").is_none()); + assert!(Target::parse("<").is_none()); +} diff --git a/src/objects/timestamp.rs b/src/objects/timestamp.rs new file mode 100644 index 0000000..db20dbc --- /dev/null +++ b/src/objects/timestamp.rs @@ -0,0 +1,7 @@ +pub struct Time<'a> { + pub date: &'a str, +} + +pub enum Timestamp<'a> { + ActiveRange, +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..c0b66a5 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,199 @@ +use elements::*; +use headline::*; +use objects::*; + +#[cfg_attr(test, derive(PartialEq))] +#[derive(Copy, Clone, Debug)] +pub enum Container { + Block, + Bold, + Drawer, + Headline { beg: usize, end: usize }, + Italic, + LatexEnv, + List, + Paragraph, + Section { end: usize }, + StrikeThrough, + Table, + Underline, +} + +#[cfg_attr(test, derive(PartialEq, Debug))] +pub enum Event<'a> { + StartHeadline(Headline<'a>), + EndHeadline, + + StartSection, + EndSection, + + Paragraph, + BlockStart, + BlockEnd, + DynBlockStart, + DynBlockEnd, + ListStart, + ListEnd, + ParagraphStart, + ParagraphEnd, + + AffKeywords, + + Call, + + Clock, + + Comment, + + TableStart, + TableEnd, + TableCell, + + LatexEnv, + StrikeThrough, + FnDef(FnDef<'a>), + Keyword(Keyword<'a>), + Rule, + Cookie(Cookie<'a>), + FnRef(FnRef<'a>), + InlineCall(InlineCall<'a>), + InlineSrc(InlineSrc<'a>), + Link(Link<'a>), + Macros(Macros<'a>), + RadioTarget(RadioTarget<'a>), + Snippet(Snippet<'a>), + Target(Target<'a>), + Bold(&'a str), + Verbatim(&'a str), + Italic(&'a str), + Strike(&'a str), + Underline(&'a str), + Code(&'a str), + + Text(&'a str), +} + +pub struct Parser<'a> { + text: &'a str, + stack: Vec, + off: usize, +} + +impl<'a> Parser<'a> { + pub fn new(text: &'a str) -> Parser<'a> { + Parser { + text, + stack: Vec::new(), + off: 0, + } + } + + fn start_section_or_headline(&mut self, tail: &'a str) -> Event<'a> { + let end = Headline::find_level(tail, std::usize::MAX); + if end != 0 { + self.stack.push(Container::Section { + end: self.off + end, + }); + Event::StartSection + } else { + self.start_headline(tail) + } + } + + fn end_section(&mut self) -> Event<'a> { + self.stack.pop(); + Event::EndSection + } + + fn start_headline(&mut self, tail: &'a str) -> Event<'a> { + let (hdl, off, end) = Headline::parse(tail); + self.stack.push(Container::Headline { + beg: self.off + off, + end: self.off + end, + }); + self.off += off; + Event::StartHeadline(hdl) + } + + fn end_headline(&mut self) -> Event<'a> { + self.stack.pop(); + Event::EndHeadline + } +} + +impl<'a> Iterator for Parser<'a> { + type Item = Event<'a>; + + fn next(&mut self) -> Option> { + let tail = &self.text[self.off..]; + + if self.stack.is_empty() { + if self.off >= self.text.len() { + None + } else { + Some(self.start_section_or_headline(tail)) + } + } else { + let last = *self.stack.last_mut()?; + + Some(match last { + Container::Headline { beg, end } => { + if self.off >= end { + self.end_headline() + } else if self.off == beg { + self.start_section_or_headline(tail) + } else { + self.start_headline(tail) + } + } + Container::Section { end } => { + if self.off >= end { + self.end_section() + } else { + match Element::find_elem(&self.text[self.off..end]) { + (Element::Paragraph(_), off) => { + self.off += off; + Event::Paragraph + } + } + } + } + _ => unimplemented!(), + }) + } + } +} + +#[test] +fn parse() { + use self::Event::*; + + let expected = vec![ + StartHeadline(Headline::new(1, None, None, "Title 1", None)), + StartSection, + Paragraph, + EndSection, + StartHeadline(Headline::new(2, None, None, "Title 2", None)), + StartSection, + Paragraph, + EndSection, + EndHeadline, + EndHeadline, + StartHeadline(Headline::new(1, None, None, "Title 3", None)), + StartSection, + Paragraph, + EndSection, + EndHeadline, + StartHeadline(Headline::new(1, None, None, "Title 4 ", None)), + StartSection, + Paragraph, + EndSection, + EndHeadline, + ]; + + assert_eq!( + Parser::new("* Title 1\nSection 1\n** Title 2\nSection 2\n* Title 3\nSection 3\n* Title 4 \nSection 4") + .collect::>(), + expected + ); +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..b869456 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,160 @@ +//! Until macros + +#[macro_export] +macro_rules! expect { + ($src:ident, $index:expr, $expect:tt) => { + if $index >= $src.len() || $src.as_bytes()[$index] != $expect { + return None; + } + }; + ($src:ident, $index:expr, $expect:expr) => { + if $index >= $src.len() || !$expect($src.as_bytes()[$index]) { + return None; + } + }; +} + +#[macro_export] +macro_rules! eol { + ($src:expr) => {{ + let mut pos = 0; + while pos < $src.len() { + if $src.as_bytes()[pos] == b'\n' { + break; + } + pos += 1; + } + pos + }}; +} + +#[macro_export] +macro_rules! until { + ($src:expr, $until:tt) => {{ + let mut pos = 0; + while pos < $src.len() { + if $until == $src.as_bytes()[pos] { + break; + } + pos += 1; + } + if pos == $src.len() { + None + } else { + Some(pos) + } + }}; + ($src:expr, $until:expr) => {{ + let mut pos = 0; + while pos < $src.len() { + if $until($src.as_bytes()[pos]) { + break; + } + pos += 1; + } + if pos == $src.len() { + None + } else { + Some(pos) + } + }}; +} + +#[macro_export] +macro_rules! until_while { + ($src:expr, $start:expr, $until:tt, $while:expr) => {{ + let mut pos = $start; + while pos < $src.len() { + // println!("pos {} char {} ", pos, $src.as_bytes()[pos] as char,); + if $until == $src.as_bytes()[pos] { + break; + } else if $while($src.as_bytes()[pos]) { + pos += 1; + continue; + } else { + return None; + } + } + if pos == $src.len() { + return None; + } else { + pos + } + }}; + ($src:expr, $start:expr, $until:expr, $while:expr) => {{ + let mut pos = $start; + while pos < $src.len() { + // println!("pos {} char {}", pos, $src.as_bytes()[pos] as char); + if $until($src.as_bytes()[pos]) { + break; + } else if $while($src.as_bytes()[pos]) { + pos += 1; + continue; + } else { + return None; + } + } + if pos == $src.len() { + return None; + } else { + pos + } + }}; +} + +#[macro_export] +macro_rules! cond_eq { + ($s:ident, $i:expr, $p:expr) => { + if $i > $s.len() { + return None; + } else { + $s.as_bytes()[$i] == $p + } + }; +} + +#[macro_export] +macro_rules! position { + ($s:ident, $i:expr, $p:expr) => { + match $s[$i..].chars().position($p) { + Some(x) => x + $i, + None => return None, + } + }; +} + +#[macro_export] +macro_rules! find { + ($s:ident, $i:expr, $p:expr) => { + match $s[$i..].find($p) { + Some(x) => x + $i, + None => return None, + } + }; +} + +#[macro_export] +macro_rules! starts_with { + ($s:ident, $p:expr) => { + if !$s.starts_with($p) { + return None; + } + }; +} + +#[macro_export] +macro_rules! next_line { + ($s:ident, $p:expr) => { + self.chars().position(|c| c == ch).unwrap_or(self.len()) + if !$s.starts_with($p) { + return None; + } + }; +} + +#[macro_export] +macro_rules! skip_whitespace { + ($src:ident, $from:ident) => { + until!($src[$from..], |c| c != b' ').unwrap_or(0) + $from + }; +}