refactor: clenup

This commit is contained in:
PoiScript 2019-02-09 20:32:31 +08:00
parent c5b14256f0
commit 4d56633c43
7 changed files with 432 additions and 492 deletions

View file

@ -1,7 +1,7 @@
use crate::lines::Lines; use crate::lines::Lines;
use memchr::memchr2; use memchr::memchr2;
// return (name, parameters, contents-begin, contents-end, end) /// return (name, parameters, contents-begin, contents-end, end)
#[inline] #[inline]
pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> { pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
debug_assert!(src.starts_with("#+")); debug_assert!(src.starts_with("#+"));
@ -11,7 +11,10 @@ pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
} }
let bytes = src.as_bytes(); let bytes = src.as_bytes();
let args = eol!(src);
let args = memchr::memchr(b'\n', src.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| src.len());
let name = memchr2(b' ', b'\n', &bytes[9..]) let name = memchr2(b' ', b'\n', &bytes[9..])
.map(|i| i + 9) .map(|i| i + 9)
.filter(|&i| { .filter(|&i| {
@ -56,7 +59,7 @@ CONTENTS
#+END: #+END:
" "
), ),
Some(("clocktable", Some(":scope file"), 31, 40, 48)) Some(("clocktable", Some(":scope file"), 32, 40, 48))
) );
} }
} }

View file

@ -11,7 +11,7 @@ pub fn parse(src: &str) -> Option<(&str, &str, usize)> {
.all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_') .all(|&c| c.is_ascii_alphanumeric() || c == b'-' || c == b'_')
})?; })?;
let end = eol!(src); let end = memchr::memchr(b'\n', src.as_bytes()).unwrap_or_else(|| src.len());
Some((&src[4..label], &src[label + 1..end], end)) Some((&src[4..label], &src[label + 1..end], end))
} }

View file

@ -7,8 +7,7 @@ pub mod rule;
pub use self::keyword::Key; pub use self::keyword::Key;
use memchr::memchr; use memchr::{memchr, memchr_iter};
use memchr::memchr_iter;
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)] #[derive(Debug)]
@ -79,11 +78,10 @@ pub enum Element<'a> {
}, },
} }
impl<'a> Element<'a> {
// return (element, off, next element, next offset) // return (element, off, next element, next offset)
// the end of first element is relative to the offset // the end of first element is relative to the offset
// next offset is relative to the end of the first element // next offset is relative to the end of the first element
pub fn next_2(src: &'a str) -> (Option<Element<'a>>, usize, Option<(Element<'a>, usize)>) { pub fn parse<'a>(src: &'a str) -> (Option<Element<'a>>, usize, Option<(Element<'a>, usize)>) {
// skip empty lines // skip empty lines
let mut pos = match src.chars().position(|c| c != '\n') { let mut pos = match src.chars().position(|c| c != '\n') {
Some(pos) => pos, Some(pos) => pos,
@ -214,8 +212,8 @@ impl<'a> Element<'a> {
Element::DynBlock { Element::DynBlock {
name, name,
args, args,
cont_end, cont_end: cont_end - cont_beg,
end, end: end - cont_beg,
}, },
cont_beg cont_beg
) )
@ -269,17 +267,19 @@ impl<'a> Element<'a> {
} }
} }
} }
}
#[cfg(test)]
mod tests {
#[test] #[test]
fn next_2() { fn parse() {
use self::Element::*; use super::parse;
use super::Element::*;
assert_eq!(Element::next_2("\n\n\n"), (None, 3, None)); assert_eq!(parse("\n\n\n"), (None, 3, None));
let len = "Lorem ipsum dolor sit amet.".len(); let len = "Lorem ipsum dolor sit amet.".len();
assert_eq!( assert_eq!(
Element::next_2("\nLorem ipsum dolor sit amet.\n\n\n"), parse("\nLorem ipsum dolor sit amet.\n\n\n"),
( (
Some(Paragraph { Some(Paragraph {
cont_end: len, cont_end: len,
@ -290,7 +290,7 @@ fn next_2() {
) )
); );
assert_eq!( assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n\n"), parse("\n\nLorem ipsum dolor sit amet.\n\n"),
( (
Some(Paragraph { Some(Paragraph {
cont_end: len, cont_end: len,
@ -301,7 +301,7 @@ fn next_2() {
) )
); );
assert_eq!( assert_eq!(
Element::next_2("\nLorem ipsum dolor sit amet.\n"), parse("\nLorem ipsum dolor sit amet.\n"),
( (
Some(Paragraph { Some(Paragraph {
cont_end: len, cont_end: len,
@ -312,7 +312,7 @@ fn next_2() {
) )
); );
assert_eq!( assert_eq!(
Element::next_2("\n\n\nLorem ipsum dolor sit amet."), parse("\n\n\nLorem ipsum dolor sit amet."),
( (
Some(Paragraph { Some(Paragraph {
cont_end: len, cont_end: len,
@ -324,7 +324,7 @@ fn next_2() {
); );
assert_eq!( assert_eq!(
Element::next_2("\n\n\n: Lorem ipsum dolor sit amet.\n"), parse("\n\n\n: Lorem ipsum dolor sit amet.\n"),
( (
Some(FixedWidth("Lorem ipsum dolor sit amet.")), Some(FixedWidth("Lorem ipsum dolor sit amet.")),
"\n\n\n: Lorem ipsum dolor sit amet.\n".len(), "\n\n\n: Lorem ipsum dolor sit amet.\n".len(),
@ -332,7 +332,7 @@ fn next_2() {
) )
); );
assert_eq!( assert_eq!(
Element::next_2("\n\n\n: Lorem ipsum dolor sit amet."), parse("\n\n\n: Lorem ipsum dolor sit amet."),
( (
Some(FixedWidth("Lorem ipsum dolor sit amet.")), Some(FixedWidth("Lorem ipsum dolor sit amet.")),
"\n\n\n: Lorem ipsum dolor sit amet.".len(), "\n\n\n: Lorem ipsum dolor sit amet.".len(),
@ -341,7 +341,7 @@ fn next_2() {
); );
assert_eq!( assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n"), parse("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n"),
( (
Some(Paragraph { Some(Paragraph {
cont_end: len, cont_end: len,
@ -353,7 +353,7 @@ fn next_2() {
); );
assert_eq!( assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n+ Lorem ipsum dolor sit amet.\n"), parse("\n\nLorem ipsum dolor sit amet.\n+ Lorem ipsum dolor sit amet.\n"),
( (
Some(Paragraph { Some(Paragraph {
cont_end: len, cont_end: len,
@ -371,7 +371,7 @@ fn next_2() {
); );
assert_eq!( assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n#+BEGIN_QUOTE\nLorem ipsum dolor sit amet.\n#+END_QUOTE\n"), parse("\n\nLorem ipsum dolor sit amet.\n#+BEGIN_QUOTE\nLorem ipsum dolor sit amet.\n#+END_QUOTE\n"),
( (
Some(Paragraph { Some(Paragraph {
cont_end: len, cont_end: len,
@ -390,3 +390,4 @@ fn next_2() {
); );
// TODO: more tests // TODO: more tests
} }
}

View file

@ -1,5 +1,7 @@
//! Headline //! Headline
use memchr::memchr2;
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)] #[derive(Debug)]
pub struct Headline<'a> { pub struct Headline<'a> {
@ -34,20 +36,12 @@ impl<'a> Headline<'a> {
#[inline] #[inline]
fn parse_keyword(src: &'a str) -> Option<(&'a str, usize)> { fn parse_keyword(src: &'a str) -> Option<(&'a str, usize)> {
let mut pos = 0; let pos = memchr2(b' ', b'\n', src.as_bytes()).unwrap_or_else(|| src.len());
while pos < src.len() { let word = &src[0..pos];
if src.as_bytes()[pos] == b' ' { if word.as_bytes().iter().all(|&c| c.is_ascii_uppercase()) && word != "COMMENT" {
break; Some((word, pos))
} else if src.as_bytes()[pos].is_ascii_uppercase() {
pos += 1;
} else { } else {
return None;
}
}
if pos == src.len() || src[0..pos] == *"COMMENT" {
None None
} else {
Some((&src[0..pos], pos))
} }
} }
@ -80,21 +74,13 @@ impl<'a> Headline<'a> {
/// assert_eq!(hdl.keyword, Some("DONE")); /// assert_eq!(hdl.keyword, Some("DONE"));
/// ``` /// ```
pub fn parse(src: &'a str) -> (Headline<'a>, usize, usize) { pub fn parse(src: &'a str) -> (Headline<'a>, usize, usize) {
let mut level = 0; let level = memchr2(b'\n', b' ', src.as_bytes()).unwrap_or_else(|| src.len());
loop {
if src.as_bytes()[level] == b'*' {
level += 1;
} else {
break;
}
}
let eol = eol!(src); debug_assert!(src.as_bytes()[0..level].iter().all(|&c| c == b'*'));
let end = if eol == src.len() {
eol let (eol, end) = memchr::memchr(b'\n', src.as_bytes())
} else { .map(|i| (i, Headline::find_level(&src[i..], level) + i))
Headline::find_level(&src[eol..], level) + eol .unwrap_or_else(|| (src.len(), src.len()));
};
let mut title_start = skip_space!(src, level); let mut title_start = skip_space!(src, level);
@ -129,12 +115,11 @@ impl<'a> Headline<'a> {
pub fn find_level(src: &str, level: usize) -> usize { pub fn find_level(src: &str, level: usize) -> usize {
use jetscii::ByteSubstring; use jetscii::ByteSubstring;
use memchr::memchr2;
let bytes = src.as_bytes(); let bytes = src.as_bytes();
if bytes[0] == b'*' { if bytes[0] == b'*' {
if let Some(stars) = memchr2(b'\n', b' ', bytes) { if let Some(stars) = memchr2(b'\n', b' ', bytes) {
if stars > 0 && stars <= level && bytes[0..stars].iter().all(|&c| c == b'*') { if stars <= level && bytes[0..stars].iter().all(|&c| c == b'*') {
return 0; return 0;
} }
} }

View file

@ -68,8 +68,7 @@ pub enum Object<'a> {
Text(&'a str), Text(&'a str),
} }
impl<'a> Object<'a> { pub fn parse<'a>(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)>) {
pub fn next_2(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)>) {
let bytes = src.as_bytes(); let bytes = src.as_bytes();
if src.len() <= 2 { if src.len() <= 2 {
@ -90,8 +89,6 @@ impl<'a> Object<'a> {
}; };
} }
let mut pre = pos;
match bytes[pos] { match bytes[pos] {
b'@' if bytes[pos + 1] == b'@' => { b'@' if bytes[pos + 1] == b'@' => {
if let Some((name, value, off)) = snippet::parse(&src[pos..]) { if let Some((name, value, off)) = snippet::parse(&src[pos..]) {
@ -132,63 +129,16 @@ impl<'a> Object<'a> {
} }
// TODO: Timestamp // TODO: Timestamp
} }
b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => pre += 1, b'{' | b' ' | b'"' | b',' | b'(' | b'\n' => {
_ => (), if let Some((obj, off)) = parse_text_markup(&src[pos + 1..]) {
} brk!(obj, off, pos + 1);
match bytes[pre] {
b'*' => {
if let Some(end) = emphasis::parse(&src[pre..], b'*') {
brk!(Object::Bold { end }, 1, pre);
} }
} }
b'+' => { _ => {
if let Some(end) = emphasis::parse(&src[pre..], b'+') { if let Some((obj, off)) = parse_text_markup(&src[pos..]) {
brk!(Object::Strike { end }, 1, pre); brk!(obj, off, pos);
} }
} }
b'/' => {
if let Some(end) = emphasis::parse(&src[pre..], b'/') {
brk!(Object::Italic { end }, 1, pre);
}
}
b'_' => {
if let Some(end) = emphasis::parse(&src[pre..], b'_') {
brk!(Object::Underline { end }, 1, pre);
}
}
b'=' => {
if let Some(end) = emphasis::parse(&src[pre..], b'=') {
brk!(Object::Verbatim(&src[pre + 1..pre + end]), end + 1, pre);
}
}
b'~' => {
if let Some(end) = emphasis::parse(&src[pre..], b'~') {
brk!(Object::Code(&src[pre + 1..pre + end]), end + 1, pre);
}
}
b'c' if src[pre..].starts_with("call_") => {
if let Some((name, args, inside_header, end_header, off)) =
inline_call::parse(&src[pre..])
{
brk!(
Object::InlineCall {
name,
args,
inside_header,
end_header,
},
off,
pre
);
}
}
b's' if src[pre..].starts_with("src_") => {
if let Some((lang, option, body, off)) = inline_src::parse(&src[pre..]) {
brk!(Object::InlineSrc { lang, option, body }, off, pre);
}
}
_ => (),
} }
if let Some(off) = bs if let Some(off) = bs
@ -202,18 +152,49 @@ impl<'a> Object<'a> {
} }
} }
} }
fn parse_text_markup<'a>(src: &'a str) -> Option<(Object<'a>, usize)> {
match src.as_bytes()[0] {
b'*' => emphasis::parse(src, b'*').map(|end| (Object::Bold { end }, 1)),
b'+' => emphasis::parse(src, b'+').map(|end| (Object::Strike { end }, 1)),
b'/' => emphasis::parse(src, b'/').map(|end| (Object::Italic { end }, 1)),
b'_' => emphasis::parse(src, b'_').map(|end| (Object::Underline { end }, 1)),
b'=' => emphasis::parse(src, b'=').map(|end| (Object::Verbatim(&src[1..end]), end + 1)),
b'~' => emphasis::parse(src, b'~').map(|end| (Object::Code(&src[1..end]), end + 1)),
b's' if src.starts_with("src_") => inline_src::parse(src)
.map(|(lang, option, body, off)| (Object::InlineSrc { lang, option, body }, off)),
b'c' if src.starts_with("call_") => {
inline_call::parse(src).map(|(name, args, inside_header, end_header, off)| {
(
Object::InlineCall {
name,
args,
inside_header,
end_header,
},
off,
)
})
}
_ => None,
}
} }
#[cfg(test)]
mod tests {
#[test] #[test]
fn next_2() { fn parse() {
// TODO: more tests use super::*;
assert_eq!(Object::next_2("*bold*"), (Object::Bold { end: 5 }, 1, None));
assert_eq!(parse("*bold*"), (Object::Bold { end: 5 }, 1, None));
assert_eq!( assert_eq!(
Object::next_2("Normal =verbatim="), parse("Normal =verbatim="),
( (
Object::Text("Normal "), Object::Text("Normal "),
"Normal ".len(), "Normal ".len(),
Some((Object::Verbatim("verbatim"), "=verbatim=".len())) Some((Object::Verbatim("verbatim"), "=verbatim=".len()))
) )
); );
// TODO: more tests
}
} }

View file

@ -1,8 +1,8 @@
//! Parser //! Parser
use crate::elements::*; use crate::elements::{self, *};
use crate::headline::*; use crate::headline::*;
use crate::objects::*; use crate::objects::{self, *};
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
@ -227,7 +227,7 @@ impl<'a> Parser<'a> {
fn next_sec_or_hdl(&mut self) -> Event<'a> { fn next_sec_or_hdl(&mut self) -> Event<'a> {
let end = Headline::find_level(&self.text[self.off..], std::usize::MAX); let end = Headline::find_level(&self.text[self.off..], std::usize::MAX);
debug_assert!(end <= self.text.len()); debug_assert!(end <= self.text[self.off..].len());
if end != 0 { if end != 0 {
self.stack.push(Container::Section { self.stack.push(Container::Section {
end: self.off + end, end: self.off + end,
@ -241,7 +241,7 @@ impl<'a> Parser<'a> {
fn next_hdl(&mut self) -> Event<'a> { fn next_hdl(&mut self) -> Event<'a> {
let tail = &self.text[self.off..]; let tail = &self.text[self.off..];
let (hdl, off, end) = Headline::parse(tail); let (hdl, off, end) = Headline::parse(tail);
debug_assert!(end <= self.text.len()); debug_assert!(end <= self.text[self.off..].len());
self.stack.push(Container::Headline { self.stack.push(Container::Headline {
beg: self.off + off, beg: self.off + off,
end: self.off + end, end: self.off + end,
@ -257,7 +257,7 @@ impl<'a> Parser<'a> {
.take() .take()
.map(|(ele, off)| (Some(ele), off)) .map(|(ele, off)| (Some(ele), off))
.unwrap_or_else(|| { .unwrap_or_else(|| {
let (ele, off, next_ele) = Element::next_2(text); let (ele, off, next_ele) = elements::parse(text);
self.ele_buf = next_ele; self.ele_buf = next_ele;
(ele, off) (ele, off)
}); });
@ -344,49 +344,48 @@ impl<'a> Parser<'a> {
fn next_obj(&mut self, end: usize) -> Event<'a> { fn next_obj(&mut self, end: usize) -> Event<'a> {
let text = &self.text[self.off..end]; let text = &self.text[self.off..end];
let (obj, off) = self.obj_buf.take().unwrap_or_else(|| { let (obj, off) = self.obj_buf.take().unwrap_or_else(|| {
let (obj, off, next_obj) = Object::next_2(text); let (obj, off, next_obj) = objects::parse(text);
self.obj_buf = next_obj; self.obj_buf = next_obj;
(obj, off) (obj, off)
}); });
debug_assert!(off <= text.len()); debug_assert!(off <= text.len());
self.off += off;
match obj { match obj {
Object::Underline { end } => { Object::Underline { end } => {
debug_assert!(end <= text.len()); debug_assert!(end <= text.len());
self.stack.push(Container::Underline { self.stack.push(Container::Underline {
cont_end: self.off + end, cont_end: self.off + end - 1,
end: self.off + end + 1, end: self.off + end,
}); });
Event::UnderlineBeg
} }
Object::Strike { end } => { Object::Strike { end } => {
debug_assert!(end <= text.len()); debug_assert!(end <= text.len());
self.stack.push(Container::Strike { self.stack.push(Container::Strike {
cont_end: self.off + end, cont_end: self.off + end - 1,
end: self.off + end + 1, end: self.off + end,
}); });
Event::StrikeBeg
} }
Object::Italic { end } => { Object::Italic { end } => {
debug_assert!(end <= text.len()); debug_assert!(end <= text.len());
self.stack.push(Container::Italic { self.stack.push(Container::Italic {
cont_end: self.off + end, cont_end: self.off + end - 1,
end: self.off + end + 1, end: self.off + end,
}); });
Event::ItalicBeg
} }
Object::Bold { end } => { Object::Bold { end } => {
debug_assert!(end <= text.len()); debug_assert!(end <= text.len());
self.stack.push(Container::Bold { self.stack.push(Container::Bold {
cont_end: self.off + end, cont_end: self.off + end - 1,
end: self.off + end + 1, end: self.off + end,
}); });
Event::BoldBeg
} }
_ => (),
}
self.off += off;
match obj {
Object::Bold { .. } => Event::BoldBeg,
Object::Code(c) => Event::Code(c), Object::Code(c) => Event::Code(c),
Object::Cookie(c) => Event::Cookie(c), Object::Cookie(c) => Event::Cookie(c),
Object::FnRef { label, def } => Event::FnRef { label, def }, Object::FnRef { label, def } => Event::FnRef { label, def },
@ -402,15 +401,12 @@ impl<'a> Parser<'a> {
end_header, end_header,
}, },
Object::InlineSrc { lang, option, body } => Event::InlineSrc { lang, option, body }, Object::InlineSrc { lang, option, body } => Event::InlineSrc { lang, option, body },
Object::Italic { .. } => Event::ItalicBeg,
Object::Link { path, desc } => Event::Link { path, desc }, Object::Link { path, desc } => Event::Link { path, desc },
Object::Macros { name, args } => Event::Macros { name, args }, Object::Macros { name, args } => Event::Macros { name, args },
Object::RadioTarget { target } => Event::RadioTarget { target }, Object::RadioTarget { target } => Event::RadioTarget { target },
Object::Snippet { name, value } => Event::Snippet { name, value }, Object::Snippet { name, value } => Event::Snippet { name, value },
Object::Strike { .. } => Event::StrikeBeg,
Object::Target { target } => Event::Target { target }, Object::Target { target } => Event::Target { target },
Object::Text(t) => Event::Text(t), Object::Text(t) => Event::Text(t),
Object::Underline { .. } => Event::UnderlineBeg,
Object::Verbatim(v) => Event::Verbatim(v), Object::Verbatim(v) => Event::Verbatim(v),
} }
} }

View file

@ -10,18 +10,6 @@ macro_rules! expect {
}; };
} }
#[macro_export]
macro_rules! eol {
($src:expr) => {
memchr::memchr(b'\n', $src.as_bytes()).unwrap_or_else(|| $src.len())
};
($src:expr, $from:expr) => {
memchr::memchr(b'\n', $src.as_bytes()[$from..])
.map(|i| i + $from)
.unwrap_or_else(|| $src.len())
};
}
#[macro_export] #[macro_export]
macro_rules! skip_space { macro_rules! skip_space {
($src:ident) => { ($src:ident) => {
@ -39,17 +27,3 @@ macro_rules! skip_space {
.unwrap_or(0) .unwrap_or(0)
}; };
} }
#[macro_export]
macro_rules! skip_empty_line {
($src:ident, $from:expr) => {{
let mut pos = $from;
loop {
if pos >= $src.len() || $src.as_bytes()[pos] != b'\n' {
break pos;
} else {
pos += 1;
}
}
}};
}