perf: replace all str.find(..)s with memchr(..)

which is much faster, theoretically.
This commit is contained in:
PoiScript 2019-01-22 11:08:10 +08:00
parent 19f7bacf55
commit da04d3d25d
7 changed files with 45 additions and 50 deletions

View file

@ -12,7 +12,9 @@ impl Keyword {
let key = until_while!(src, 2, b':', |c: u8| c.is_ascii_alphabetic() || c == b'_')?;
// includes the eol character
let end = src.find('\n').map(|i| i + 1).unwrap_or_else(|| src.len());
let end = memchr::memchr(b'\n', src.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| src.len());
Some((&src[2..key], &src[key + 1..end].trim(), end))
}

View file

@ -51,7 +51,9 @@ impl List {
// returns (contents_begin, contents_end)
pub fn parse_item(src: &str, ident: usize) -> (usize, usize) {
let beg = src[ident..].find(' ').map(|i| ident + i + 1).unwrap();
let beg = memchr::memchr(b' ', &src.as_bytes()[ident..])
.map(|i| i + ident + 1)
.unwrap();
let mut lines = lines!(src);
// skip first line
let mut pos = lines.next().unwrap();

View file

@ -179,12 +179,12 @@ impl<'a> Element<'a> {
}
}
// TODO: multiple lines fixed width area
if bytes[pos] == b':' && bytes.get(pos + 1).map(|&b| b == b' ').unwrap_or(false) {
let eol = src[pos..]
.find('\n')
.map(|i| i + pos + 1)
.unwrap_or_else(|| src.len());
ret!(Element::FixedWidth(&src[pos + 1..eol]), eol);
let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..])
.map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos);
ret!(Element::FixedWidth(&src[pos + 1..pos + eol]), eol);
}
if bytes[pos] == b'#' && bytes.get(pos + 1).filter(|&&b| b == b'+').is_some() {
@ -246,12 +246,12 @@ impl<'a> Element<'a> {
}
// Comment
// TODO: multiple lines comment
if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b' ').unwrap_or(false) {
let eol = src[pos..]
.find('\n')
.map(|i| i + pos + 1)
.unwrap_or_else(|| src.len());
ret!(Element::Comment(&src[pos + 1..eol]), eol);
let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..])
.map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos);
ret!(Element::Comment(&src[pos + 1..pos + eol]), eol);
}
}

View file

@ -1,12 +1,10 @@
use memchr::memchr;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct Rule;
impl Rule {
pub fn parse(src: &str) -> usize {
let end = memchr(b'\n', src.as_bytes())
let end = memchr::memchr(b'\n', src.as_bytes())
.map(|i| i + 1)
.unwrap_or_else(|| src.len());
let rules = &src[0..end].trim();

View file

@ -48,13 +48,14 @@ impl<'a> Headline<'a> {
fn parse_tags(src: &'a str) -> (Option<&'a str>, usize) {
if let Some(last) = src.split_whitespace().last() {
if last.len() > 2 && last.starts_with(':') && last.ends_with(':') {
(Some(last), src.rfind(':').unwrap() - last.len())
} else {
(None, src.len())
return (
Some(last),
memchr::memrchr(b':', src.as_bytes()).unwrap() - last.len(),
);
}
} else {
(None, src.len())
}
(None, src.len())
}
pub fn parse(src: &'a str) -> (Headline<'a>, usize, usize) {
@ -72,23 +73,17 @@ impl<'a> Headline<'a> {
let mut title_start = skip_space!(src, level);
let keyword = match Headline::parse_keyword(&src[title_start..eol]) {
Some((k, l)) => {
title_start += l;
Some(k)
}
None => None,
};
let keyword = Headline::parse_keyword(&src[title_start..eol]).map(|(k, l)| {
title_start += l;
k
});
title_start = skip_space!(src, title_start);
let priority = match Headline::parse_priority(&src[title_start..eol]) {
Some(p) => {
title_start += 4;
Some(p)
}
None => None,
};
let priority = Headline::parse_priority(&src[title_start..eol]).map(|p| {
title_start += 4;
p
});
title_start = skip_space!(src, title_start);

View file

@ -12,7 +12,6 @@ pub enum Container {
Section {
end: usize,
},
Paragraph {
cont_end: usize,
end: usize,
@ -33,7 +32,6 @@ pub enum Container {
cont_end: usize,
end: usize,
},
List {
ident: usize,
ordered: bool,
@ -43,7 +41,6 @@ pub enum Container {
ListItem {
end: usize,
},
Italic {
end: usize,
},
@ -182,20 +179,20 @@ impl<'a> Parser<'a> {
}
}
fn start_sec_or_hdl(&mut self, tail: &'a str) -> Event<'a> {
let end = Headline::find_level(tail, std::usize::MAX);
fn next_sec_or_hdl(&mut self) -> Event<'a> {
let end = Headline::find_level(&self.text[self.off..], std::usize::MAX);
if end != 0 {
self.stack.push(Container::Section {
end: self.off + end,
});
Event::SectionBeg
} else {
self.start_hdl(tail)
self.next_hdl()
}
}
fn start_hdl(&mut self, tail: &'a str) -> Event<'a> {
let (hdl, off, end) = Headline::parse(tail);
fn next_hdl(&mut self) -> Event<'a> {
let (hdl, off, end) = Headline::parse(&self.text[self.off..]);
self.stack.push(Container::Headline {
beg: self.off + off,
end: self.off + end,
@ -215,6 +212,8 @@ impl<'a> Parser<'a> {
(ele, off)
});
debug_assert!(self.off + off <= end);
if let Some(ele) = ele {
match ele {
Element::Paragraph { cont_end, end } => self.stack.push(Container::Paragraph {
@ -284,6 +283,8 @@ impl<'a> Parser<'a> {
(obj, off)
});
debug_assert!(self.off + off <= end);
match obj {
Object::Underline { end } => self.stack.push(Container::Underline {
end: self.off + end,
@ -390,21 +391,19 @@ impl<'a> Iterator for Parser<'a> {
if self.off >= self.text.len() {
None
} else {
let tail = &self.text[self.off..];
Some(self.start_sec_or_hdl(tail))
Some(self.next_sec_or_hdl())
}
} else {
let last = *self.stack.last_mut().unwrap();
Some(match last {
Container::Headline { beg, end } => {
let tail = &self.text[self.off..];
if self.off >= end {
self.end()
} else if self.off == beg {
self.start_sec_or_hdl(tail)
self.next_sec_or_hdl()
} else {
self.start_hdl(tail)
self.next_hdl()
}
}
Container::DynBlock { cont_end, end, .. }

View file

@ -13,11 +13,10 @@ macro_rules! expect {
#[macro_export]
macro_rules! eol {
($src:expr) => {
$src.find('\n').unwrap_or_else(|| $src.len())
memchr::memchr(b'\n', $src.as_bytes()).unwrap_or_else(|| $src.len())
};
($src:expr, $from:expr) => {
$src[$from..]
.find('\n')
memchr::memchr(b'\n', $src.as_bytes()[$from..])
.map(|i| i + $from)
.unwrap_or_else(|| $src.len())
};