feat: use bytecount for bytes counting

This commit is contained in:
PoiScript 2019-02-13 15:59:18 +08:00
parent c5a6d82aa8
commit 1c3300ca61
18 changed files with 135 additions and 137 deletions

View file

@ -46,6 +46,10 @@ Check out https://orgmode.org/worg/dev/org-syntax.html for more information.
- [x] Inline Babel Calls and Source Blocks - [x] Inline Babel Calls and Source Blocks
- [ ] Line Breaks - [ ] Line Breaks
- [x] Links - [x] Links
- [x] Regular link
- [ ] Plain link
- [ ] Angle link
- [ ] Radio link
- [x] Macros - [x] Macros
- [x] Targets and Radio Targets - [x] Targets and Radio Targets
- [x] Statistics Cookies - [x] Statistics Cookies

View file

@ -1,5 +1,5 @@
use crate::lines::Lines; use crate::lines::Lines;
use memchr::memchr2; use memchr::{memchr, memchr2};
/// return (name, parameters, contents-begin, contents-end, end) /// return (name, parameters, contents-begin, contents-end, end)
#[inline] #[inline]
@ -10,34 +10,32 @@ pub fn parse(src: &str) -> Option<(&str, Option<&str>, usize, usize, usize)> {
return None; return None;
} }
let bytes = src.as_bytes();
let args = memchr::memchr(b'\n', src.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| src.len());
let name = memchr2(b' ', b'\n', &bytes[9..])
.map(|i| i + 9)
.filter(|&i| {
src.as_bytes()[9..i]
.iter()
.all(|&c| c.is_ascii_alphabetic())
})?;
let mut lines = Lines::new(src); let mut lines = Lines::new(src);
let (mut pre_cont_end, _, _) = lines.next()?; let (mut pre_cont_end, _, _) = lines.next()?;
for (cont_end, end, line) in lines { for (cont_end, end, line) in lines {
if line.trim().eq_ignore_ascii_case("#+END:") { if line.trim().eq_ignore_ascii_case("#+END:") {
return Some(( let bytes = src.as_bytes();
&src[8..name].trim(),
if name == args { let i = memchr2(b' ', b'\n', &bytes[9..])
None .map(|i| i + 9)
.filter(|&i| bytes[9..i].iter().all(|&c| c.is_ascii_alphabetic()))?;
let name = &src[8..i].trim();
return Some(if bytes[i] == b'\n' {
(name, None, i, pre_cont_end, end)
} else { } else {
Some(&src[name..args].trim()) let cont_beg = memchr(b'\n', bytes)
}, .map(|i| i + 1)
args, .unwrap_or_else(|| src.len());
(
name,
Some(&src[i..cont_beg].trim()),
cont_beg,
pre_cont_end, pre_cont_end,
end, end,
)); )
});
} }
pre_cont_end = cont_end; pre_cont_end = cont_end;
} }

View file

@ -23,32 +23,32 @@ mod tests {
use super::parse; use super::parse;
assert_eq!( assert_eq!(
parse("[fn:1] https://orgmode.org").unwrap(), parse("[fn:1] https://orgmode.org"),
( Some((
"1", "1",
" https://orgmode.org", " https://orgmode.org",
"[fn:1] https://orgmode.org".len() "[fn:1] https://orgmode.org".len()
) ))
); );
assert_eq!( assert_eq!(
parse("[fn:word_1] https://orgmode.org").unwrap(), parse("[fn:word_1] https://orgmode.org"),
( Some((
"word_1", "word_1",
" https://orgmode.org", " https://orgmode.org",
"[fn:word_1] https://orgmode.org".len() "[fn:word_1] https://orgmode.org".len()
) ))
); );
assert_eq!( assert_eq!(
parse("[fn:WORD-1] https://orgmode.org").unwrap(), parse("[fn:WORD-1] https://orgmode.org"),
( Some((
"WORD-1", "WORD-1",
" https://orgmode.org", " https://orgmode.org",
"[fn:WORD-1] https://orgmode.org".len() "[fn:WORD-1] https://orgmode.org".len()
) ))
); );
assert_eq!(parse("[fn:WORD]").unwrap(), ("WORD", "", "[fn:WORD]".len())); assert_eq!(parse("[fn:WORD]"), Some(("WORD", "", "[fn:WORD]".len())));
assert!(parse("[fn:] https://orgmode.org").is_none()); assert_eq!(parse("[fn:] https://orgmode.org"), None);
assert!(parse("[fn:wor d] https://orgmode.org").is_none()); assert_eq!(parse("[fn:wor d] https://orgmode.org"), None);
assert!(parse("[fn:WORD https://orgmode.org").is_none()); assert_eq!(parse("[fn:WORD https://orgmode.org"), None);
} }
} }

View file

@ -101,8 +101,8 @@ mod tests {
parse("#+KEY:VALUE\n"), parse("#+KEY:VALUE\n"),
Some((Key::Custom("KEY"), "VALUE", "#+KEY:VALUE\n".len())) Some((Key::Custom("KEY"), "VALUE", "#+KEY:VALUE\n".len()))
); );
assert!(parse("#+KE Y: VALUE").is_none()); assert_eq!(parse("#+KE Y: VALUE"), None);
assert!(parse("#+ KEY: VALUE").is_none()); assert_eq!(parse("#+ KEY: VALUE"), None);
assert_eq!( assert_eq!(
parse("#+RESULTS:"), parse("#+RESULTS:"),

View file

@ -77,7 +77,7 @@ pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
} }
} }
let line_ident = self::ident(line); let line_ident = count_ident(line);
if line_ident < ident { if line_ident < ident {
return (bullet, beg, pre_cont_end, pre_end, false); return (bullet, beg, pre_cont_end, pre_end, false);
@ -99,7 +99,7 @@ pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
} }
#[inline] #[inline]
fn ident(src: &str) -> usize { fn count_ident(src: &str) -> usize {
src.as_bytes() src.as_bytes()
.iter() .iter()
.position(|&c| c != b' ' && c != b'\t') .position(|&c| c != b' ' && c != b'\t')

View file

@ -81,7 +81,7 @@ pub enum Element<'a> {
// return (element, off, next element, next offset) // return (element, off, next element, next offset)
// the end of first element is relative to the offset // the end of first element is relative to the offset
// next offset is relative to the end of the first element // next offset is relative to the end of the first element
pub fn parse<'a>(src: &'a str) -> (Option<Element<'a>>, usize, Option<(Element<'a>, usize)>) { pub fn parse(src: &str) -> (Option<Element<'_>>, usize, Option<(Element<'_>, usize)>) {
// skip empty lines // skip empty lines
let mut pos = match src.chars().position(|c| c != '\n') { let mut pos = match src.chars().position(|c| c != '\n') {
Some(pos) => pos, Some(pos) => pos,

View file

@ -76,6 +76,7 @@ impl<'a> Headline<'a> {
pub fn parse(src: &'a str) -> (Headline<'a>, usize, usize) { pub fn parse(src: &'a str) -> (Headline<'a>, usize, usize) {
let level = memchr2(b'\n', b' ', src.as_bytes()).unwrap_or_else(|| src.len()); let level = memchr2(b'\n', b' ', src.as_bytes()).unwrap_or_else(|| src.len());
debug_assert!(level > 0);
debug_assert!(src.as_bytes()[0..level].iter().all(|&c| c == b'*')); debug_assert!(src.as_bytes()[0..level].iter().all(|&c| c == b'*'));
let (eol, end) = memchr::memchr(b'\n', src.as_bytes()) let (eol, end) = memchr::memchr(b'\n', src.as_bytes())

View file

@ -32,20 +32,20 @@ mod tests {
use super::parse; use super::parse;
use super::Cookie::*; use super::Cookie::*;
assert_eq!(parse("[1/10]").unwrap(), (Slash("1", "10"), "[1/10]".len())); assert_eq!(parse("[1/10]"), Some((Slash("1", "10"), "[1/10]".len())));
assert_eq!( assert_eq!(
parse("[1/1000]").unwrap(), parse("[1/1000]"),
(Slash("1", "1000"), "[1/1000]".len()) Some((Slash("1", "1000"), "[1/1000]".len()))
); );
assert_eq!(parse("[10%]").unwrap(), (Percent("10"), "[10%]".len())); assert_eq!(parse("[10%]"), Some((Percent("10"), "[10%]".len())));
assert_eq!(parse("[%]").unwrap(), (Percent(""), "[%]".len())); assert_eq!(parse("[%]"), Some((Percent(""), "[%]".len())));
assert_eq!(parse("[/]").unwrap(), (Slash("", ""), "[/]".len())); assert_eq!(parse("[/]"), Some((Slash("", ""), "[/]".len())));
assert_eq!(parse("[100/]").unwrap(), (Slash("100", ""), "[100/]".len())); assert_eq!(parse("[100/]"), Some((Slash("100", ""), "[100/]".len())));
assert_eq!(parse("[/100]").unwrap(), (Slash("", "100"), "[/100]".len())); assert_eq!(parse("[/100]"), Some((Slash("", "100"), "[/100]".len())));
assert!(parse("[10% ]").is_none(),); assert_eq!(parse("[10% ]"), None);
assert!(parse("[1//100]").is_none(),); assert_eq!(parse("[1//100]"), None);
assert!(parse("[1\\100]").is_none(),); assert_eq!(parse("[1\\100]"), None);
assert!(parse("[10%%]").is_none(),); assert_eq!(parse("[10%%]"), None);
} }
} }

View file

@ -1,3 +1,4 @@
use bytecount::count;
use memchr::memchr; use memchr::memchr;
#[inline] #[inline]
@ -13,14 +14,13 @@ pub fn parse(src: &str, marker: u8) -> Option<usize> {
let end = memchr(marker, &bytes[1..]) let end = memchr(marker, &bytes[1..])
.map(|i| i + 1) .map(|i| i + 1)
.filter(|&i| bytes[1..i].iter().filter(|&&c| c == b'\n').count() < 2)?; .filter(|&i| count(&bytes[1..i], b'\n') < 2)?;
if bytes[end - 1].is_ascii_whitespace() { if bytes[end - 1].is_ascii_whitespace() {
return None; return None;
} }
if end < src.len() - 1 { if let Some(&post) = bytes.get(end + 1) {
let post = bytes[end + 1];
if post == b' ' if post == b' '
|| post == b'-' || post == b'-'
|| post == b'.' || post == b'.'
@ -48,12 +48,12 @@ mod tests {
fn parse() { fn parse() {
use super::parse; use super::parse;
assert_eq!(parse("*bold*", b'*').unwrap(), "*bold".len()); assert_eq!(parse("*bold*", b'*'), Some("*bold".len()));
assert_eq!(parse("*bo\nld*", b'*').unwrap(), "*bo\nld".len()); assert_eq!(parse("*bo\nld*", b'*'), Some("*bo\nld".len()));
assert!(parse("*bold*a", b'*').is_none()); assert_eq!(parse("*bold*a", b'*'), None);
assert!(parse("*bold*", b'/').is_none()); assert_eq!(parse("*bold*", b'/'), None);
assert!(parse("*bold *", b'*').is_none()); assert_eq!(parse("*bold *", b'*'), None);
assert!(parse("* bold*", b'*').is_none()); assert_eq!(parse("* bold*", b'*'), None);
assert!(parse("*b\nol\nd*", b'*').is_none()); assert_eq!(parse("*b\nol\nd*", b'*'), None);
} }
} }

View file

@ -55,19 +55,16 @@ mod tests {
fn parse() { fn parse() {
use super::parse; use super::parse;
assert_eq!(parse("[fn:1]").unwrap(), (Some("1"), None, "[fn:1]".len())); assert_eq!(parse("[fn:1]"), Some((Some("1"), None, "[fn:1]".len())));
assert_eq!( assert_eq!(
parse("[fn:1:2]").unwrap(), parse("[fn:1:2]"),
(Some("1"), Some("2"), "[fn:1:2]".len()) Some((Some("1"), Some("2"), "[fn:1:2]".len()))
); );
assert_eq!(parse("[fn::2]"), Some((None, Some("2"), "[fn::2]".len())));
assert_eq!( assert_eq!(
parse("[fn::2]").unwrap(), parse("[fn::[]]"),
(None, Some("2"), "[fn::2]".len()) Some((None, Some("[]"), "[fn::[]]".len()))
); );
assert_eq!( assert_eq!(parse("[fn::[]"), None);
parse("[fn::[]]").unwrap(),
(None, Some("[]"), "[fn::[]]".len())
);
assert!(parse("[fn::[]").is_none());
} }
} }

View file

@ -51,38 +51,38 @@ mod tests {
use super::parse; use super::parse;
assert_eq!( assert_eq!(
parse("call_square(4)").unwrap(), parse("call_square(4)"),
("square", "4", None, None, "call_square(4)".len()) Some(("square", "4", None, None, "call_square(4)".len()))
); );
assert_eq!( assert_eq!(
parse("call_square[:results output](4)").unwrap(), parse("call_square[:results output](4)"),
( Some((
"square", "square",
"4", "4",
Some(":results output"), Some(":results output"),
None, None,
"call_square[:results output](4)".len() "call_square[:results output](4)".len()
) ))
); );
assert_eq!( assert_eq!(
parse("call_square(4)[:results html]").unwrap(), parse("call_square(4)[:results html]"),
( Some((
"square", "square",
"4", "4",
None, None,
Some(":results html"), Some(":results html"),
"call_square(4)[:results html]".len() "call_square(4)[:results html]".len()
) ))
); );
assert_eq!( assert_eq!(
parse("call_square[:results output](4)[:results html]").unwrap(), parse("call_square[:results output](4)[:results html]"),
( Some((
"square", "square",
"4", "4",
Some(":results output"), Some(":results output"),
Some(":results html"), Some(":results html"),
"call_square[:results output](4)[:results html]".len() "call_square[:results output](4)[:results html]".len()
) ))
); );
} }
} }

View file

@ -35,20 +35,20 @@ mod tests {
use super::parse; use super::parse;
assert_eq!( assert_eq!(
parse("src_C{int a = 0;}").unwrap(), parse("src_C{int a = 0;}"),
("C", None, "int a = 0;", "src_C{int a = 0;}".len()) Some(("C", None, "int a = 0;", "src_C{int a = 0;}".len()))
); );
assert_eq!( assert_eq!(
parse("src_xml[:exports code]{<tag>text</tag>}").unwrap(), parse("src_xml[:exports code]{<tag>text</tag>}"),
( Some((
"xml", "xml",
Some(":exports code"), Some(":exports code"),
"<tag>text</tag>", "<tag>text</tag>",
"src_xml[:exports code]{<tag>text</tag>}".len() "src_xml[:exports code]{<tag>text</tag>}".len()
) ))
); );
assert!(parse("src_xml[:exports code]{<tag>text</tag>").is_none()); assert_eq!(parse("src_xml[:exports code]{<tag>text</tag>"), None);
assert!(parse("src_[:exports code]{<tag>text</tag>}").is_none()); assert_eq!(parse("src_[:exports code]{<tag>text</tag>}"), None);
assert!(parse("src_xml[:exports code]").is_none()); assert_eq!(parse("src_xml[:exports code]"), None);
} }
} }

View file

@ -32,11 +32,11 @@ mod tests {
fn parse() { fn parse() {
use super::parse; use super::parse;
assert_eq!(parse("[[#id]]").unwrap(), ("#id", None, "[[#id]]".len())); assert_eq!(parse("[[#id]]"), Some(("#id", None, "[[#id]]".len())));
assert_eq!( assert_eq!(
parse("[[#id][desc]]").unwrap(), parse("[[#id][desc]]"),
("#id", Some("desc"), "[[#id][desc]]".len()) Some(("#id", Some("desc"), "[[#id][desc]]".len()))
); );
assert!(parse("[[#id][desc]").is_none()); assert_eq!(parse("[[#id][desc]"), None);
} }
} }

View file

@ -68,20 +68,25 @@ pub enum Object<'a> {
Text(&'a str), Text(&'a str),
} }
pub fn parse<'a>(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)>) { pub fn parse(src: &str) -> (Object<'_>, usize, Option<(Object<'_>, usize)>) {
let bytes = src.as_bytes(); let bytes = src.as_bytes();
if src.len() <= 2 {
return (Object::Text(src), src.len(), None);
}
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'['); let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
let mut pos = 0; let mut pos = 0;
loop { while let Some(off) = if pos == 0 {
Some(0)
} else {
bs.find(&bytes[pos..])
} {
pos += off;
if src.len() - pos < 3 {
return (Object::Text(src), src.len(), None);
}
macro_rules! brk { macro_rules! brk {
($obj:expr, $off:expr, $pos:expr) => { ($obj:expr, $off:expr, $pos:expr) => {
break if $pos == 0 { return if $pos == 0 {
($obj, $off, None) ($obj, $off, None)
} else { } else {
(Object::Text(&src[0..$pos]), $pos, Some(($obj, $off))) (Object::Text(&src[0..$pos]), $pos, Some(($obj, $off)))
@ -141,19 +146,13 @@ pub fn parse<'a>(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)
} }
} }
if let Some(off) = bs pos += 1;
.find(&bytes[pos + 1..])
.map(|i| i + pos + 1)
.filter(|&i| i < src.len() - 3)
{
pos = off;
} else {
break (Object::Text(src), src.len(), None);
}
}
} }
fn parse_text_markup<'a>(src: &'a str) -> Option<(Object<'a>, usize)> { (Object::Text(src), src.len(), None)
}
fn parse_text_markup(src: &str) -> Option<(Object<'_>, usize)> {
match src.as_bytes()[0] { match src.as_bytes()[0] {
b'*' => emphasis::parse(src, b'*').map(|end| (Object::Bold { end }, 1)), b'*' => emphasis::parse(src, b'*').map(|end| (Object::Bold { end }, 1)),
b'+' => emphasis::parse(src, b'+').map(|end| (Object::Strike { end }, 1)), b'+' => emphasis::parse(src, b'+').map(|end| (Object::Strike { end }, 1)),

View file

@ -7,13 +7,14 @@ pub fn parse(src: &str) -> Option<(&str, usize)> {
expect!(src, 3, |c| c != b' ')?; expect!(src, 3, |c| c != b' ')?;
let bytes = src.as_bytes();
let end = Substring::new(">>>").find(src).filter(|&i| { let end = Substring::new(">>>").find(src).filter(|&i| {
src.as_bytes()[3..i] bytes[3..i]
.iter() .iter()
.all(|&c| c != b'<' && c != b'\n' && c != b'>') .all(|&c| c != b'<' && c != b'\n' && c != b'>')
})?; })?;
if src.as_bytes()[end - 1] == b' ' { if bytes[end - 1] == b' ' {
return None; return None;
} }
@ -27,12 +28,12 @@ mod tests {
use super::parse; use super::parse;
assert_eq!( assert_eq!(
parse("<<<target>>>").unwrap(), parse("<<<target>>>"),
("target", "<<<target>>>".len()) Some(("target", "<<<target>>>".len()))
); );
assert_eq!( assert_eq!(
parse("<<<tar get>>>").unwrap(), parse("<<<tar get>>>"),
("tar get", "<<<tar get>>>".len()) Some(("tar get", "<<<tar get>>>".len()))
); );
assert_eq!(parse("<<<target >>>"), None); assert_eq!(parse("<<<target >>>"), None);
assert_eq!(parse("<<< target>>>"), None); assert_eq!(parse("<<< target>>>"), None);

View file

@ -27,20 +27,20 @@ mod tests {
use super::parse; use super::parse;
assert_eq!( assert_eq!(
parse("@@html:<b>@@").unwrap(), parse("@@html:<b>@@"),
("html", "<b>", "@@html:<b>@@".len()) Some(("html", "<b>", "@@html:<b>@@".len()))
); );
assert_eq!( assert_eq!(
parse("@@latex:any arbitrary LaTeX code@@").unwrap(), parse("@@latex:any arbitrary LaTeX code@@"),
( Some((
"latex", "latex",
"any arbitrary LaTeX code", "any arbitrary LaTeX code",
"@@latex:any arbitrary LaTeX code@@".len() "@@latex:any arbitrary LaTeX code@@".len()
) ))
); );
assert_eq!(parse("@@html:@@").unwrap(), ("html", "", "@@html:@@".len())); assert_eq!(parse("@@html:@@"), Some(("html", "", "@@html:@@".len())));
assert!(parse("@@html:<b>@").is_none()); assert_eq!(parse("@@html:<b>@"), None);
assert!(parse("@@html<b>@@").is_none()); assert_eq!(parse("@@html<b>@@"), None);
assert!(parse("@@:<b>@@").is_none()); assert_eq!(parse("@@:<b>@@"), None);
} }
} }

View file

@ -25,11 +25,8 @@ mod tests {
fn parse() { fn parse() {
use super::parse; use super::parse;
assert_eq!(parse("<<target>>").unwrap(), ("target", "<<target>>".len())); assert_eq!(parse("<<target>>"), Some(("target", "<<target>>".len())));
assert_eq!( assert_eq!(parse("<<tar get>>"), Some(("tar get", "<<tar get>>".len())));
parse("<<tar get>>").unwrap(),
("tar get", "<<tar get>>".len())
);
assert_eq!(parse("<<target >>"), None); assert_eq!(parse("<<target >>"), None);
assert_eq!(parse("<< target>>"), None); assert_eq!(parse("<< target>>"), None);
assert_eq!(parse("<<ta<get>>"), None); assert_eq!(parse("<<ta<get>>"), None);

View file

@ -239,8 +239,7 @@ impl<'a> Parser<'a> {
} }
fn next_hdl(&mut self) -> Event<'a> { fn next_hdl(&mut self) -> Event<'a> {
let tail = &self.text[self.off..]; let (hdl, off, end) = Headline::parse(&self.text[self.off..]);
let (hdl, off, end) = Headline::parse(tail);
debug_assert!(end <= self.text[self.off..].len()); debug_assert!(end <= self.text[self.off..].len());
self.stack.push(Container::Headline { self.stack.push(Container::Headline {
beg: self.off + off, beg: self.off + off,
@ -467,6 +466,7 @@ impl<'a> Iterator for Parser<'a> {
| Container::SplBlock { cont_end, end, .. } | Container::SplBlock { cont_end, end, .. }
| Container::ListItem { cont_end, end } => { | Container::ListItem { cont_end, end } => {
debug_assert!(self.off <= cont_end); debug_assert!(self.off <= cont_end);
debug_assert!(self.off <= end);
if self.off >= cont_end { if self.off >= cont_end {
self.off = end; self.off = end;
self.end() self.end()
@ -495,6 +495,7 @@ impl<'a> Iterator for Parser<'a> {
| Container::Underline { cont_end, end } | Container::Underline { cont_end, end }
| Container::Italic { cont_end, end } | Container::Italic { cont_end, end }
| Container::Strike { cont_end, end } => { | Container::Strike { cont_end, end } => {
debug_assert!(self.off <= cont_end);
debug_assert!(self.off <= end); debug_assert!(self.off <= end);
if self.off >= cont_end { if self.off >= cont_end {
self.off = end; self.off = end;