diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index 78df32f..452dbe7 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -1,7 +1,7 @@ use crate::lines::Lines; use memchr::memchr2; -// return (name, parameters, contents-begin, contents-end, end) +/// return (name, parameters, contents-begin, contents-end, end) #[inline] pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { debug_assert!(src.starts_with("#+")); @@ -11,7 +11,10 @@ pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { } let bytes = src.as_bytes(); - let args = eol!(src); + + let args = memchr::memchr(b'\n', src.as_bytes()) + .map(|i| i + 1) + .unwrap_or_else(|| src.len()); let name = memchr2(b' ', b'\n', &bytes[9..]) .map(|i| i + 9) .filter(|&i| { @@ -56,7 +59,7 @@ CONTENTS #+END: " ), - Some(("clocktable", Some(":scope file"), 31, 40, 48)) - ) + Some(("clocktable", Some(":scope file"), 32, 40, 48)) + ); } } diff --git a/src/elements/fn_def.rs b/src/elements/fn_def.rs index 3026398..c490503 100644 --- a/src/elements/fn_def.rs +++ b/src/elements/fn_def.rs @@ -11,7 +11,7 @@ pub fn parse(src: &str) -> Option<(&str, &str, usize)> { .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') })?; - let end = eol!(src); + let end = memchr::memchr(b'\n', src.as_bytes()).unwrap_or_else(|| src.len()); Some((&src[4..label], &src[label + 1..end], end)) } diff --git a/src/elements/mod.rs b/src/elements/mod.rs index bb4ef75..1146648 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -7,8 +7,7 @@ pub mod rule; pub use self::keyword::Key; -use memchr::memchr; -use memchr::memchr_iter; +use memchr::{memchr, memchr_iter}; #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] @@ -79,68 +78,26 @@ pub enum Element<'a> { }, } -impl<'a> Element<'a> { - // return (element, off, next element, next offset) - // the end of first element is relative to the offset - // next offset is relative to the end of the first element - pub fn next_2(src: &'a str) -> (Option>, usize, Option<(Element<'a>, usize)>) { - // skip empty lines - let mut pos = match src.chars().position(|c| c != '\n') { - Some(pos) => pos, - None => return (None, src.len(), None), - }; - let start = pos; - let bytes = src.as_bytes(); - let mut line_ends = memchr_iter(b'\n', &bytes[start..]).map(|i| i + start); +// return (element, off, next element, next offset) +// the end of first element is relative to the offset +// next offset is relative to the end of the first element +pub fn parse<'a>(src: &'a str) -> (Option>, usize, Option<(Element<'a>, usize)>) { + // skip empty lines + let mut pos = match src.chars().position(|c| c != '\n') { + Some(pos) => pos, + None => return (None, src.len(), None), + }; + let start = pos; + let bytes = src.as_bytes(); + let mut line_ends = memchr_iter(b'\n', &bytes[start..]).map(|i| i + start); - loop { - let line_beg = pos; + loop { + let line_beg = pos; - macro_rules! brk { - ($ele:expr, $off:expr) => { - break if line_beg == 0 || pos == start { - (Some($ele), start + $off, None) - } else { - ( - Some(Element::Paragraph { - cont_end: line_beg - start - 1, - end: line_beg - start, - }), - start, - Some(($ele, $off)), - ) - }; - }; - } - - // Unlike other element, footnote def must starts at column 0 - if bytes[pos..].starts_with(b"[fn:") { - if let Some((label, cont, off)) = fn_def::parse(&src[pos..]) { - brk!(Element::FnDef { label, cont }, off + 1); - } - } - - if bytes[pos] == b'\n' { - break ( - Some(Element::Paragraph { - cont_end: pos - start - 1, - end: pos - start + 1, - }), - start, - None, - ); - } - - pos = skip_space!(src, pos); - - let (is_item, ordered) = list::is_item(&src[pos..]); - if is_item { - let list = Element::List { - ident: pos - line_beg, - ordered, - }; - break if line_beg == start { - (Some(list), start, None) + macro_rules! brk { + ($ele:expr, $off:expr) => { + break if line_beg == 0 || pos == start { + (Some($ele), start + $off, None) } else { ( Some(Element::Paragraph { @@ -148,245 +105,289 @@ impl<'a> Element<'a> { end: line_beg - start, }), start, - Some((list, 0)), + Some(($ele, $off)), ) }; + }; + } + + // Unlike other element, footnote def must starts at column 0 + if bytes[pos..].starts_with(b"[fn:") { + if let Some((label, cont, off)) = fn_def::parse(&src[pos..]) { + brk!(Element::FnDef { label, cont }, off + 1); + } + } + + if bytes[pos] == b'\n' { + break ( + Some(Element::Paragraph { + cont_end: pos - start - 1, + end: pos - start + 1, + }), + start, + None, + ); + } + + pos = skip_space!(src, pos); + + let (is_item, ordered) = list::is_item(&src[pos..]); + if is_item { + let list = Element::List { + ident: pos - line_beg, + ordered, + }; + break if line_beg == start { + (Some(list), start, None) + } else { + ( + Some(Element::Paragraph { + cont_end: line_beg - start - 1, + end: line_beg - start, + }), + start, + Some((list, 0)), + ) + }; + } + + // TODO: LaTeX environment + if bytes[pos..].starts_with(b"\\begin{") {} + + // Rule + if bytes[pos] == b'-' { + let off = rule::parse(&src[pos..]); + if off != 0 { + brk!(Element::Rule, off); + } + } + + // TODO: multiple lines fixed width area + if bytes[pos..].starts_with(b": ") || bytes[pos..].starts_with(b":\n") { + let eol = memchr(b'\n', &bytes[pos..]) + .map(|i| i + 1) + .unwrap_or_else(|| src.len() - pos); + brk!(Element::FixedWidth(&src[pos + 1..pos + eol].trim()), eol); + } + + if bytes[pos..].starts_with(b"#+") { + if let Some((name, args, cont_beg, cont_end, end)) = block::parse(&src[pos..]) { + let cont = &src[pos + cont_beg..pos + cont_end]; + match name.to_uppercase().as_str() { + "COMMENT" => brk!(Element::CommentBlock { args, cont }, end), + "EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end), + "EXPORT" => brk!(Element::ExportBlock { args, cont }, end), + "SRC" => brk!(Element::SrcBlock { args, cont }, end), + "VERSE" => brk!(Element::VerseBlock { args, cont }, end), + "CENTER" => brk!( + Element::CtrBlock { + args, + cont_end: cont_end - cont_beg, + end: end - cont_beg, + }, + cont_beg + ), + "QUOTE" => brk!( + Element::QteBlock { + args, + cont_end: cont_end - cont_beg, + end: end - cont_beg, + }, + cont_beg + ), + _ => brk!( + Element::SplBlock { + name, + args, + cont_end: cont_end - cont_beg, + end: end - cont_beg, + }, + cont_beg + ), + }; } - // TODO: LaTeX environment - if bytes[pos..].starts_with(b"\\begin{") {} - - // Rule - if bytes[pos] == b'-' { - let off = rule::parse(&src[pos..]); - if off != 0 { - brk!(Element::Rule, off); - } + if let Some((name, args, cont_beg, cont_end, end)) = dyn_block::parse(&src[pos..]) { + brk!( + Element::DynBlock { + name, + args, + cont_end: cont_end - cont_beg, + end: end - cont_beg, + }, + cont_beg + ) } - // TODO: multiple lines fixed width area - if bytes[pos..].starts_with(b": ") || bytes[pos..].starts_with(b":\n") { - let eol = memchr(b'\n', &bytes[pos..]) - .map(|i| i + 1) - .unwrap_or_else(|| src.len() - pos); - brk!(Element::FixedWidth(&src[pos + 1..pos + eol].trim()), eol); + if let Some((key, value, off)) = keyword::parse(&src[pos..]) { + brk!( + if let Key::Call = key { + Element::Call { value } + } else { + Element::Keyword { key, value } + }, + off + ) } + } - if bytes[pos..].starts_with(b"#+") { - if let Some((name, args, cont_beg, cont_end, end)) = block::parse(&src[pos..]) { - let cont = &src[pos + cont_beg..pos + cont_end]; - match name.to_uppercase().as_str() { - "COMMENT" => brk!(Element::CommentBlock { args, cont }, end), - "EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end), - "EXPORT" => brk!(Element::ExportBlock { args, cont }, end), - "SRC" => brk!(Element::SrcBlock { args, cont }, end), - "VERSE" => brk!(Element::VerseBlock { args, cont }, end), - "CENTER" => brk!( - Element::CtrBlock { - args, - cont_end: cont_end - cont_beg, - end: end - cont_beg, - }, - cont_beg - ), - "QUOTE" => brk!( - Element::QteBlock { - args, - cont_end: cont_end - cont_beg, - end: end - cont_beg, - }, - cont_beg - ), - _ => brk!( - Element::SplBlock { - name, - args, - cont_end: cont_end - cont_beg, - end: end - cont_beg, - }, - cont_beg - ), - }; - } + // Comment + // TODO: multiple lines comment + if bytes[pos..].starts_with(b"# ") || bytes[pos..].starts_with(b"#\n") { + let eol = memchr(b'\n', &bytes[pos..]) + .map(|i| i + 1) + .unwrap_or_else(|| src.len() - pos); + brk!(Element::Comment(&src[pos + 1..pos + eol].trim()), eol); + } - if let Some((name, args, cont_beg, cont_end, end)) = dyn_block::parse(&src[pos..]) { - brk!( - Element::DynBlock { - name, - args, - cont_end, - end, - }, - cont_beg - ) - } + // move to the beginning of the next line + if let Some(off) = line_ends.next() { + pos = off + 1; - if let Some((key, value, off)) = keyword::parse(&src[pos..]) { - brk!( - if let Key::Call = key { - Element::Call { value } - } else { - Element::Keyword { key, value } - }, - off - ) - } - } - - // Comment - // TODO: multiple lines comment - if bytes[pos..].starts_with(b"# ") || bytes[pos..].starts_with(b"#\n") { - let eol = memchr(b'\n', &bytes[pos..]) - .map(|i| i + 1) - .unwrap_or_else(|| src.len() - pos); - brk!(Element::Comment(&src[pos + 1..pos + eol].trim()), eol); - } - - // move to the beginning of the next line - if let Some(off) = line_ends.next() { - pos = off + 1; - - // the last character - if pos >= src.len() { - break ( - Some(Element::Paragraph { - cont_end: src.len() - start - 1, - end: src.len() - start, - }), - start, - None, - ); - } - } else { + // the last character + if pos >= src.len() { break ( Some(Element::Paragraph { - cont_end: src.len() - start, + cont_end: src.len() - start - 1, end: src.len() - start, }), start, None, ); } + } else { + break ( + Some(Element::Paragraph { + cont_end: src.len() - start, + end: src.len() - start, + }), + start, + None, + ); } } } -#[test] -fn next_2() { - use self::Element::*; +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::parse; + use super::Element::*; - assert_eq!(Element::next_2("\n\n\n"), (None, 3, None)); + assert_eq!(parse("\n\n\n"), (None, 3, None)); - let len = "Lorem ipsum dolor sit amet.".len(); - assert_eq!( - Element::next_2("\nLorem ipsum dolor sit amet.\n\n\n"), - ( - Some(Paragraph { - cont_end: len, - end: len + 2, - }), - 1, - None - ) - ); - assert_eq!( - Element::next_2("\n\nLorem ipsum dolor sit amet.\n\n"), - ( - Some(Paragraph { - cont_end: len, - end: len + 2, - }), - 2, - None - ) - ); - assert_eq!( - Element::next_2("\nLorem ipsum dolor sit amet.\n"), - ( - Some(Paragraph { - cont_end: len, - end: len + 1, - }), - 1, - None - ) - ); - assert_eq!( - Element::next_2("\n\n\nLorem ipsum dolor sit amet."), - ( - Some(Paragraph { - cont_end: len, - end: len, - }), - 3, - None - ) - ); + let len = "Lorem ipsum dolor sit amet.".len(); + assert_eq!( + parse("\nLorem ipsum dolor sit amet.\n\n\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 2, + }), + 1, + None + ) + ); + assert_eq!( + parse("\n\nLorem ipsum dolor sit amet.\n\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 2, + }), + 2, + None + ) + ); + assert_eq!( + parse("\nLorem ipsum dolor sit amet.\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 1, + }), + 1, + None + ) + ); + assert_eq!( + parse("\n\n\nLorem ipsum dolor sit amet."), + ( + Some(Paragraph { + cont_end: len, + end: len, + }), + 3, + None + ) + ); - assert_eq!( - Element::next_2("\n\n\n: Lorem ipsum dolor sit amet.\n"), - ( - Some(FixedWidth("Lorem ipsum dolor sit amet.")), - "\n\n\n: Lorem ipsum dolor sit amet.\n".len(), - None - ) - ); - assert_eq!( - Element::next_2("\n\n\n: Lorem ipsum dolor sit amet."), - ( - Some(FixedWidth("Lorem ipsum dolor sit amet.")), - "\n\n\n: Lorem ipsum dolor sit amet.".len(), - None - ) - ); + assert_eq!( + parse("\n\n\n: Lorem ipsum dolor sit amet.\n"), + ( + Some(FixedWidth("Lorem ipsum dolor sit amet.")), + "\n\n\n: Lorem ipsum dolor sit amet.\n".len(), + None + ) + ); + assert_eq!( + parse("\n\n\n: Lorem ipsum dolor sit amet."), + ( + Some(FixedWidth("Lorem ipsum dolor sit amet.")), + "\n\n\n: Lorem ipsum dolor sit amet.".len(), + None + ) + ); - assert_eq!( - Element::next_2("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n"), - ( - Some(Paragraph { - cont_end: len, - end: len + 1, - }), - 2, - Some((FixedWidth("Lorem ipsum dolor sit amet."), 30)) - ) - ); + assert_eq!( + parse("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 1, + }), + 2, + Some((FixedWidth("Lorem ipsum dolor sit amet."), 30)) + ) + ); - assert_eq!( - Element::next_2("\n\nLorem ipsum dolor sit amet.\n+ Lorem ipsum dolor sit amet.\n"), - ( - Some(Paragraph { - cont_end: len, - end: len + 1, - }), - 2, - Some(( - List { - ident: 0, - ordered: false, - }, - 0 - )) - ) - ); + assert_eq!( + parse("\n\nLorem ipsum dolor sit amet.\n+ Lorem ipsum dolor sit amet.\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 1, + }), + 2, + Some(( + List { + ident: 0, + ordered: false, + }, + 0 + )) + ) + ); - assert_eq!( - Element::next_2("\n\nLorem ipsum dolor sit amet.\n#+BEGIN_QUOTE\nLorem ipsum dolor sit amet.\n#+END_QUOTE\n"), - ( - Some(Paragraph { - cont_end: len, - end: len + 1, - }), - 2, - Some(( - QteBlock { - args: None, - cont_end: len + 1, - end: len + 1 + "#+END_QUOTE\n".len() - }, - "#+BEGIN_QUOTE\n".len() - )) - ) - ); - // TODO: more tests + assert_eq!( + parse("\n\nLorem ipsum dolor sit amet.\n#+BEGIN_QUOTE\nLorem ipsum dolor sit amet.\n#+END_QUOTE\n"), + ( + Some(Paragraph { + cont_end: len, + end: len + 1, + }), + 2, + Some(( + QteBlock { + args: None, + cont_end: len + 1, + end: len + 1 + "#+END_QUOTE\n".len() + }, + "#+BEGIN_QUOTE\n".len() + )) + ) + ); + // TODO: more tests + } } diff --git a/src/headline.rs b/src/headline.rs index 238c6d9..8c5e1c0 100644 --- a/src/headline.rs +++ b/src/headline.rs @@ -1,5 +1,7 @@ //! Headline +use memchr::memchr2; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct Headline<'a> { @@ -34,20 +36,12 @@ impl<'a> Headline<'a> { #[inline] fn parse_keyword(src: &'a str) -> Option<(&'a str, usize)> { - let mut pos = 0; - while pos < src.len() { - if src.as_bytes()[pos] == b' ' { - break; - } else if src.as_bytes()[pos].is_ascii_uppercase() { - pos += 1; - } else { - return None; - } - } - if pos == src.len() || src[0..pos] == *"COMMENT" { - None + let pos = memchr2(b' ', b'\n', src.as_bytes()).unwrap_or_else(|| src.len()); + let word = &src[0..pos]; + if word.as_bytes().iter().all(|&c| c.is_ascii_uppercase()) && word != "COMMENT" { + Some((word, pos)) } else { - Some((&src[0..pos], pos)) + None } } @@ -80,21 +74,13 @@ impl<'a> Headline<'a> { /// assert_eq!(hdl.keyword, Some("DONE")); /// ``` pub fn parse(src: &'a str) -> (Headline<'a>, usize, usize) { - let mut level = 0; - loop { - if src.as_bytes()[level] == b'*' { - level += 1; - } else { - break; - } - } + let level = memchr2(b'\n', b' ', src.as_bytes()).unwrap_or_else(|| src.len()); - let eol = eol!(src); - let end = if eol == src.len() { - eol - } else { - Headline::find_level(&src[eol..], level) + eol - }; + debug_assert!(src.as_bytes()[0..level].iter().all(|&c| c == b'*')); + + let (eol, end) = memchr::memchr(b'\n', src.as_bytes()) + .map(|i| (i, Headline::find_level(&src[i..], level) + i)) + .unwrap_or_else(|| (src.len(), src.len())); let mut title_start = skip_space!(src, level); @@ -129,12 +115,11 @@ impl<'a> Headline<'a> { pub fn find_level(src: &str, level: usize) -> usize { use jetscii::ByteSubstring; - use memchr::memchr2; let bytes = src.as_bytes(); if bytes[0] == b'*' { if let Some(stars) = memchr2(b'\n', b' ', bytes) { - if stars > 0 && stars <= level && bytes[0..stars].iter().all(|&c| c == b'*') { + if stars <= level && bytes[0..stars].iter().all(|&c| c == b'*') { return 0; } } diff --git a/src/objects/mod.rs b/src/objects/mod.rs index 21607e6..72d878d 100644 --- a/src/objects/mod.rs +++ b/src/objects/mod.rs @@ -68,152 +68,133 @@ pub enum Object<'a> { Text(&'a str), } -impl<'a> Object<'a> { - pub fn next_2(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)>) { - let bytes = src.as_bytes(); +pub fn parse<'a>(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)>) { + let bytes = src.as_bytes(); - if src.len() <= 2 { - return (Object::Text(src), src.len(), None); + if src.len() <= 2 { + return (Object::Text(src), src.len(), None); + } + + let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'['); + + let mut pos = 0; + loop { + macro_rules! brk { + ($obj:expr, $off:expr, $pos:expr) => { + break if $pos == 0 { + ($obj, $off, None) + } else { + (Object::Text(&src[0..$pos]), $pos, Some(($obj, $off))) + }; + }; } - let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'['); - - let mut pos = 0; - loop { - macro_rules! brk { - ($obj:expr, $off:expr, $pos:expr) => { - break if $pos == 0 { - ($obj, $off, None) - } else { - (Object::Text(&src[0..$pos]), $pos, Some(($obj, $off))) - }; - }; + match bytes[pos] { + b'@' if bytes[pos + 1] == b'@' => { + if let Some((name, value, off)) = snippet::parse(&src[pos..]) { + brk!(Object::Snippet { name, value }, off, pos); + } } - - let mut pre = pos; - - match bytes[pos] { - b'@' if bytes[pos + 1] == b'@' => { - if let Some((name, value, off)) = snippet::parse(&src[pos..]) { - brk!(Object::Snippet { name, value }, off, pos); - } + b'{' if bytes[pos + 1] == b'{' && bytes[pos + 2] == b'{' => { + if let Some((name, args, off)) = macros::parse(&src[pos..]) { + brk!(Object::Macros { name, args }, off, pos); } - b'{' if bytes[pos + 1] == b'{' && bytes[pos + 2] == b'{' => { - if let Some((name, args, off)) = macros::parse(&src[pos..]) { - brk!(Object::Macros { name, args }, off, pos); - } - } - b'<' if bytes[pos + 1] == b'<' => { - if bytes[pos + 2] == b'<' { - if let Some((target, off)) = radio_target::parse(&src[pos..]) { - brk!(Object::RadioTarget { target }, off, pos); - } - } else if bytes[pos + 2] != b'\n' { - if let Some((target, off)) = target::parse(&src[pos..]) { - brk!(Object::Target { target }, off, pos); - } - } - } - b'[' => { - if bytes[pos + 1..].starts_with(b"fn:") { - if let Some((label, def, off)) = fn_ref::parse(&src[pos..]) { - brk!(Object::FnRef { label, def }, off, pos); - } - } - - if bytes[pos + 1] == b'[' { - if let Some((path, desc, off)) = link::parse(&src[pos..]) { - brk!(Object::Link { path, desc }, off, pos); - } - } - - if let Some((cookie, off)) = cookie::parse(&src[pos..]) { - brk!(Object::Cookie(cookie), off, pos); - } - // TODO: Timestamp - } - b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => pre += 1, - _ => (), } - - match bytes[pre] { - b'*' => { - if let Some(end) = emphasis::parse(&src[pre..], b'*') { - brk!(Object::Bold { end }, 1, pre); + b'<' if bytes[pos + 1] == b'<' => { + if bytes[pos + 2] == b'<' { + if let Some((target, off)) = radio_target::parse(&src[pos..]) { + brk!(Object::RadioTarget { target }, off, pos); + } + } else if bytes[pos + 2] != b'\n' { + if let Some((target, off)) = target::parse(&src[pos..]) { + brk!(Object::Target { target }, off, pos); } } - b'+' => { - if let Some(end) = emphasis::parse(&src[pre..], b'+') { - brk!(Object::Strike { end }, 1, pre); - } - } - b'/' => { - if let Some(end) = emphasis::parse(&src[pre..], b'/') { - brk!(Object::Italic { end }, 1, pre); - } - } - b'_' => { - if let Some(end) = emphasis::parse(&src[pre..], b'_') { - brk!(Object::Underline { end }, 1, pre); - } - } - b'=' => { - if let Some(end) = emphasis::parse(&src[pre..], b'=') { - brk!(Object::Verbatim(&src[pre + 1..pre + end]), end + 1, pre); - } - } - b'~' => { - if let Some(end) = emphasis::parse(&src[pre..], b'~') { - brk!(Object::Code(&src[pre + 1..pre + end]), end + 1, pre); - } - } - b'c' if src[pre..].starts_with("call_") => { - if let Some((name, args, inside_header, end_header, off)) = - inline_call::parse(&src[pre..]) - { - brk!( - Object::InlineCall { - name, - args, - inside_header, - end_header, - }, - off, - pre - ); - } - } - b's' if src[pre..].starts_with("src_") => { - if let Some((lang, option, body, off)) = inline_src::parse(&src[pre..]) { - brk!(Object::InlineSrc { lang, option, body }, off, pre); - } - } - _ => (), } + b'[' => { + if bytes[pos + 1..].starts_with(b"fn:") { + if let Some((label, def, off)) = fn_ref::parse(&src[pos..]) { + brk!(Object::FnRef { label, def }, off, pos); + } + } - if let Some(off) = bs - .find(&bytes[pos + 1..]) - .map(|i| i + pos + 1) - .filter(|&i| i < src.len() - 2) - { - pos = off; - } else { - break (Object::Text(src), src.len(), None); + if bytes[pos + 1] == b'[' { + if let Some((path, desc, off)) = link::parse(&src[pos..]) { + brk!(Object::Link { path, desc }, off, pos); + } + } + + if let Some((cookie, off)) = cookie::parse(&src[pos..]) { + brk!(Object::Cookie(cookie), off, pos); + } + // TODO: Timestamp } + b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => { + if let Some((obj, off)) = parse_text_markup(&src[pos + 1..]) { + brk!(obj, off, pos + 1); + } + } + _ => { + if let Some((obj, off)) = parse_text_markup(&src[pos..]) { + brk!(obj, off, pos); + } + } + } + + if let Some(off) = bs + .find(&bytes[pos + 1..]) + .map(|i| i + pos + 1) + .filter(|&i| i < src.len() - 2) + { + pos = off; + } else { + break (Object::Text(src), src.len(), None); } } } -#[test] -fn next_2() { - // TODO: more tests - assert_eq!(Object::next_2("*bold*"), (Object::Bold { end: 5 }, 1, None)); - assert_eq!( - Object::next_2("Normal =verbatim="), - ( - Object::Text("Normal "), - "Normal ".len(), - Some((Object::Verbatim("verbatim"), "=verbatim=".len())) - ) - ); +fn parse_text_markup<'a>(src: &'a str) -> Option<(Object<'a>, usize)> { + match src.as_bytes()[0] { + b'*' => emphasis::parse(src, b'*').map(|end| (Object::Bold { end }, 1)), + b'+' => emphasis::parse(src, b'+').map(|end| (Object::Strike { end }, 1)), + b'/' => emphasis::parse(src, b'/').map(|end| (Object::Italic { end }, 1)), + b'_' => emphasis::parse(src, b'_').map(|end| (Object::Underline { end }, 1)), + b'=' => emphasis::parse(src, b'=').map(|end| (Object::Verbatim(&src[1..end]), end + 1)), + b'~' => emphasis::parse(src, b'~').map(|end| (Object::Code(&src[1..end]), end + 1)), + b's' if src.starts_with("src_") => inline_src::parse(src) + .map(|(lang, option, body, off)| (Object::InlineSrc { lang, option, body }, off)), + b'c' if src.starts_with("call_") => { + inline_call::parse(src).map(|(name, args, inside_header, end_header, off)| { + ( + Object::InlineCall { + name, + args, + inside_header, + end_header, + }, + off, + ) + }) + } + _ => None, + } +} + +#[cfg(test)] +mod tests { + #[test] + fn parse() { + use super::*; + + assert_eq!(parse("*bold*"), (Object::Bold { end: 5 }, 1, None)); + assert_eq!( + parse("Normal =verbatim="), + ( + Object::Text("Normal "), + "Normal ".len(), + Some((Object::Verbatim("verbatim"), "=verbatim=".len())) + ) + ); + // TODO: more tests + } } diff --git a/src/parser.rs b/src/parser.rs index bb52e85..7ea55bd 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,8 +1,8 @@ //! Parser -use crate::elements::*; +use crate::elements::{self, *}; use crate::headline::*; -use crate::objects::*; +use crate::objects::{self, *}; #[cfg_attr(test, derive(PartialEq))] #[derive(Copy, Clone, Debug)] @@ -227,7 +227,7 @@ impl<'a> Parser<'a> { fn next_sec_or_hdl(&mut self) -> Event<'a> { let end = Headline::find_level(&self.text[self.off..], std::usize::MAX); - debug_assert!(end <= self.text.len()); + debug_assert!(end <= self.text[self.off..].len()); if end != 0 { self.stack.push(Container::Section { end: self.off + end, @@ -241,7 +241,7 @@ impl<'a> Parser<'a> { fn next_hdl(&mut self) -> Event<'a> { let tail = &self.text[self.off..]; let (hdl, off, end) = Headline::parse(tail); - debug_assert!(end <= self.text.len()); + debug_assert!(end <= self.text[self.off..].len()); self.stack.push(Container::Headline { beg: self.off + off, end: self.off + end, @@ -257,7 +257,7 @@ impl<'a> Parser<'a> { .take() .map(|(ele, off)| (Some(ele), off)) .unwrap_or_else(|| { - let (ele, off, next_ele) = Element::next_2(text); + let (ele, off, next_ele) = elements::parse(text); self.ele_buf = next_ele; (ele, off) }); @@ -344,49 +344,48 @@ impl<'a> Parser<'a> { fn next_obj(&mut self, end: usize) -> Event<'a> { let text = &self.text[self.off..end]; let (obj, off) = self.obj_buf.take().unwrap_or_else(|| { - let (obj, off, next_obj) = Object::next_2(text); + let (obj, off, next_obj) = objects::parse(text); self.obj_buf = next_obj; (obj, off) }); debug_assert!(off <= text.len()); + self.off += off; + match obj { Object::Underline { end } => { debug_assert!(end <= text.len()); self.stack.push(Container::Underline { - cont_end: self.off + end, - end: self.off + end + 1, + cont_end: self.off + end - 1, + end: self.off + end, }); + Event::UnderlineBeg } Object::Strike { end } => { debug_assert!(end <= text.len()); self.stack.push(Container::Strike { - cont_end: self.off + end, - end: self.off + end + 1, + cont_end: self.off + end - 1, + end: self.off + end, }); + Event::StrikeBeg } Object::Italic { end } => { debug_assert!(end <= text.len()); self.stack.push(Container::Italic { - cont_end: self.off + end, - end: self.off + end + 1, + cont_end: self.off + end - 1, + end: self.off + end, }); + Event::ItalicBeg } Object::Bold { end } => { debug_assert!(end <= text.len()); self.stack.push(Container::Bold { - cont_end: self.off + end, - end: self.off + end + 1, + cont_end: self.off + end - 1, + end: self.off + end, }); + Event::BoldBeg } - _ => (), - } - - self.off += off; - - match obj { - Object::Bold { .. } => Event::BoldBeg, Object::Code(c) => Event::Code(c), Object::Cookie(c) => Event::Cookie(c), Object::FnRef { label, def } => Event::FnRef { label, def }, @@ -402,15 +401,12 @@ impl<'a> Parser<'a> { end_header, }, Object::InlineSrc { lang, option, body } => Event::InlineSrc { lang, option, body }, - Object::Italic { .. } => Event::ItalicBeg, Object::Link { path, desc } => Event::Link { path, desc }, Object::Macros { name, args } => Event::Macros { name, args }, Object::RadioTarget { target } => Event::RadioTarget { target }, Object::Snippet { name, value } => Event::Snippet { name, value }, - Object::Strike { .. } => Event::StrikeBeg, Object::Target { target } => Event::Target { target }, Object::Text(t) => Event::Text(t), - Object::Underline { .. } => Event::UnderlineBeg, Object::Verbatim(v) => Event::Verbatim(v), } } diff --git a/src/utils.rs b/src/utils.rs index a052273..4b21a45 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -10,18 +10,6 @@ macro_rules! expect { }; } -#[macro_export] -macro_rules! eol { - ($src:expr) => { - memchr::memchr(b'\n', $src.as_bytes()).unwrap_or_else(|| $src.len()) - }; - ($src:expr, $from:expr) => { - memchr::memchr(b'\n', $src.as_bytes()[$from..]) - .map(|i| i + $from) - .unwrap_or_else(|| $src.len()) - }; -} - #[macro_export] macro_rules! skip_space { ($src:ident) => { @@ -39,17 +27,3 @@ macro_rules! skip_space { .unwrap_or(0) }; } - -#[macro_export] -macro_rules! skip_empty_line { - ($src:ident, $from:expr) => {{ - let mut pos = $from; - loop { - if pos >= $src.len() || $src.as_bytes()[pos] != b'\n' { - break pos; - } else { - pos += 1; - } - } - }}; -}