refactor: simplify list parsing

This commit is contained in:
PoiScript 2019-02-02 23:42:31 +08:00
parent 763ec00434
commit d20d4c2880
9 changed files with 562 additions and 570 deletions

View file

@ -9,7 +9,9 @@ fn valid_label(ch: u8) -> bool {
impl FnDef { impl FnDef {
pub fn parse(src: &str) -> Option<(&str, &str, usize)> { pub fn parse(src: &str) -> Option<(&str, &str, usize)> {
if cfg!(test) {
starts_with!(src, "[fn:"); starts_with!(src, "[fn:");
}
let label = until_while!(src, 4, b']', valid_label)?; let label = until_while!(src, 4, b']', valid_label)?;

View file

@ -1,289 +1,157 @@
pub struct List; use lines::Lines;
macro_rules! ident { pub struct List;
($src:expr) => {
$src.as_bytes()
.iter()
.position(|&c| c != b' ' && c != b'\t')
.unwrap_or(0)
};
}
impl List { impl List {
#[inline] #[inline]
fn is_item(src: &str) -> bool { pub fn is_item(src: &str) -> (bool, bool) {
if src.len() < 2 { if src.is_empty() {
return false; return (false, false);
} }
let bytes = src.as_bytes(); let bytes = src.as_bytes();
let i = match bytes[0] { let (i, ordered) = match bytes[0] {
b'*' | b'-' | b'+' => 1, b'*' | b'-' | b'+' => (1, false),
b'0'...b'9' => { b'0'...b'9' => {
let i = bytes let i = bytes
.iter() .iter()
.position(|&c| !c.is_ascii_digit()) .position(|&c| !c.is_ascii_digit())
.unwrap_or_else(|| src.len()); .unwrap_or_else(|| src.len());
if i >= src.len() - 1 {
return false;
}
let c = bytes[i]; let c = bytes[i];
if !(c == b'.' || c == b')') { if !(c == b'.' || c == b')') {
return false; return (false, false);
} }
i + 1 (i + 1, true)
} }
_ => return false, _ => return (false, false),
}; };
if i < src.len() {
// bullet is follwed by a space or line ending // bullet is follwed by a space or line ending
bytes[i] == b' ' || bytes[i] == b'\n' (bytes[i] == b' ' || bytes[i] == b'\n', ordered)
} } else {
(false, false)
#[inline]
pub fn is_ordered(byte: u8) -> bool {
match byte {
b'*' | b'-' | b'+' => false,
b'0'...b'9' => true,
_ => unreachable!(),
} }
} }
// returns (contents_begin, contents_end) // returns (bullets, contents begin, contents end, end, has more)
pub fn parse_item(src: &str, ident: usize) -> (usize, usize) { pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
let beg = memchr::memchr(b' ', &src.as_bytes()[ident..]) debug_assert!(Self::is_item(&src[ident..]).0);
.map(|i| i + ident + 1) debug_assert!(
.unwrap(); src[..ident].chars().all(|c| c == ' ' || c == '\t'),
let mut lines = lines!(src); "{:?} doesn't starts with indentation {}",
// skip first line src,
let mut pos = lines.next().unwrap(); ident
for line_end in lines { );
let line = &src[pos..line_end];
if !line.trim().is_empty() && ident!(line) == ident {
break;
}
pos = line_end;
}
(beg, pos)
}
// return (ident, is_ordered, contents_end, end) let mut lines = Lines::new(src);
pub fn parse(src: &str) -> Option<(usize, bool, usize, usize)> { let (mut pre_cont_end, mut pre_end, first_line) = lines.next().unwrap();
let bytes = src.as_bytes(); let beg = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) {
let starting_ident = ident!(src); Some(i) => i + ident + 1,
None => {
if !Self::is_item(&src[starting_ident..]) { let len = first_line.len();
return None; return (
&first_line,
len,
len,
len,
Self::is_item(lines.next().unwrap().2).0,
);
} }
let mut lines = lines!(src);
// skip the starting line
let mut pos = lines.next().unwrap();
let is_ordered = Self::is_ordered(bytes[starting_ident]);
Some(loop {
let mut curr_line = match lines.next() {
Some(i) => i,
None => break (starting_ident, is_ordered, pos, pos),
};
// current line is empty
if src[pos..curr_line].trim().is_empty() {
let next_line = match lines.next() {
Some(i) => i,
None => break (starting_ident, is_ordered, pos, pos),
}; };
let bullet = &src[0..beg];
while let Some((mut cont_end, mut end, mut line)) = lines.next() {
// this line is emtpy
if line.is_empty() {
if let Some((next_cont_end, next_end, next_line)) = lines.next() {
// next line is emtpy, too // next line is emtpy, too
if src[curr_line..next_line].trim().is_empty() { if next_line.is_empty() {
break (starting_ident, is_ordered, pos, next_line); return (bullet, beg, pre_cont_end, next_end, false);
} else { } else {
// move to next line // move to next line
pos = curr_line; pre_end = end;
curr_line = next_line; cont_end = next_cont_end;
end = next_end;
line = next_line;
} }
}
let ident = ident!(src[pos..curr_line]);
// less indented than the starting line
if ident < starting_ident {
break (starting_ident, is_ordered, pos, pos);
}
if ident > starting_ident || Self::is_item(&src[pos + ident..]) {
pos = curr_line;
} else { } else {
break (starting_ident, is_ordered, pos, pos); return (bullet, beg, pre_cont_end, end, false);
} }
})
} }
let line_ident = Self::ident(line);
if line_ident < ident {
return (bullet, beg, pre_cont_end, pre_end, false);
} else if line_ident == ident {
return (
bullet,
beg,
pre_cont_end,
pre_end,
Self::is_item(&line[ident..]).0,
);
}
pre_end = end;
pre_cont_end = cont_end;
}
(bullet, beg, src.len(), src.len(), false)
}
fn ident(src: &str) -> usize {
src.as_bytes()
.iter()
.position(|&c| c != b' ' && c != b'\t')
.unwrap_or(0)
}
}
#[test]
fn is_item() {
assert_eq!(List::is_item("+ item"), (true, false));
assert_eq!(List::is_item("- item"), (true, false));
assert_eq!(List::is_item("10. item"), (true, true));
assert_eq!(List::is_item("10) item"), (true, true));
assert_eq!(List::is_item("1. item"), (true, true));
assert_eq!(List::is_item("1) item"), (true, true));
assert_eq!(List::is_item("10. "), (true, true));
assert_eq!(List::is_item("10.\n"), (true, true));
assert_eq!(List::is_item("10."), (false, false));
assert_eq!(List::is_item("+"), (false, false));
assert_eq!(List::is_item("-item"), (false, false));
assert_eq!(List::is_item("+item"), (false, false));
} }
#[test] #[test]
fn parse() { fn parse() {
assert_eq!( assert_eq!(
List::parse( List::parse("+ item1\n+ item2\n+ item3", 0),
r"+ item1 ("+ ", 2, 7, 8, true)
+ item2
+ item3"
),
Some((0, false, 23, 23))
); );
assert_eq!( assert_eq!(
List::parse( List::parse("* item1\n\n* item2\n* item3", 0),
r"* item1 ("* ", 2, 7, 9, true)
* item2
* item3"
),
Some((0, false, 24, 24))
); );
assert_eq!( assert_eq!(
List::parse( List::parse("- item1\n\n\n- item2\n- item3", 0),
r"- item1 ("- ", 2, 7, 10, false)
- item2
- item1"
),
Some((0, false, 16, 18))
); );
assert_eq!( assert_eq!(
List::parse( List::parse("1. item1\n\n\n\n2. item2\n3. item3", 0),
r"1. item1 ("1. ", 3, 8, 11, false)
2. item1
3. item2"
),
Some((0, true, 28, 28))
); );
assert_eq!( assert_eq!(
List::parse( List::parse(" + item1\n + item2\n+ item3", 2),
r" 1) item1 (" + ", 4, 21, 22, false)
2) item1
3) item2"
),
Some((2, true, 11, 11))
); );
assert_eq!( assert_eq!(
List::parse( List::parse(" + item1\n + item2\n + item3", 2),
r" + item1 (" + ", 4, 9, 10, true)
1) item1
+ item2"
),
Some((2, false, 32, 32))
);
assert_eq!(
List::parse(
r" item1
+ item1
+ item2"
),
None
);
assert_eq!(
List::parse(
r#"- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Nulla et dolor vitae elit placerat sagittis. Aliquam a lobortis massa,
aliquam efficitur arcu.
- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Phasellus auctor lacus a orci imperdiet, ut facilisis neque lobortis.
- Proin condimentum id orci vitae lobortis. Nunc sollicitudin risus neque,
dapibus malesuada sem faucibus vitae.
- Sed vitae dolor augue. Phasellus at rhoncus arcu. Suspendisse potenti.
- Nulla faucibus, metus ut porta hendrerit, urna lorem porta metus, in tempus
nibh orci sed sapien.
- Morbi tortor mi, dapibus vel faucibus a, iaculis sed turpis."#
),
Some((0, false, 666, 666))
);
}
#[test]
fn is_item() {
assert!(List::is_item("+ item"));
assert!(List::is_item("- item"));
assert!(List::is_item("10. item"));
assert!(List::is_item("10) item"));
assert!(List::is_item("1. item"));
assert!(List::is_item("1) item"));
assert!(List::is_item("10. "));
assert!(List::is_item("10.\n"));
assert!(!List::is_item("10."));
assert!(!List::is_item("-item"));
assert!(!List::is_item("+item"));
}
#[test]
fn parse_item() {
assert_eq!(List::parse_item("+ Item1\n+ Item2", 0), (2, 8));
assert_eq!(List::parse_item("+ Item1\n\n+ Item2", 0), (2, 9));
assert_eq!(
List::parse_item(
r"+ item1
+ item1
+ item2",
0
),
(2, 25)
);
assert_eq!(
List::parse_item(
r" 1. item1
+ item2",
2
),
(5, 11)
);
assert_eq!(
List::parse_item(
r"+ It
em1
+ Item2",
0
),
(2, 11)
);
assert_eq!(
List::parse_item(
r#"1) Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec sit amet
ullamcorper ante, nec pellentesque nisi.
2) Sed pulvinar ut arcu id aliquam.Curabitur quis justo eu magna maximus sodales.
Curabitur nisl nisi, ornare in enim id, sagittis facilisis magna.
3) Curabitur venenatis molestie eros sit amet congue. Nunc at molestie leo, vitae
malesuada nisi."#,
0
),
(3, 119)
);
assert_eq!(
List::parse_item(
r#"- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Nulla et dolor vitae elit placerat sagittis. Aliquam a lobortis massa,
aliquam efficitur arcu.
- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Phasellus auctor lacus a orci imperdiet, ut facilisis neque lobortis.
- Proin condimentum id orci vitae lobortis. Nunc sollicitudin risus neque,
dapibus malesuada sem faucibus vitae.
- Sed vitae dolor augue. Phasellus at rhoncus arcu. Suspendisse potenti.
- Nulla faucibus, metus ut porta hendrerit, urna lorem porta metus, in tempus
nibh orci sed sapien.
- Morbi tortor mi, dapibus vel faucibus a, iaculis sed turpis."#,
0
),
(2, 421)
); );
assert_eq!(List::parse("+\n", 0), ("+", 1, 1, 1, false));
assert_eq!(List::parse("+\n+ item2\n+ item3", 0), ("+", 1, 1, 1, true));
assert_eq!(List::parse("1) item1", 0), ("1) ", 3, 8, 8, false));
assert_eq!(List::parse("1) item1\n", 0), ("1) ", 3, 8, 9, false));
} }

View file

@ -12,7 +12,11 @@ pub use self::keyword::{Key, Keyword};
pub use self::list::List; pub use self::list::List;
pub use self::rule::Rule; pub use self::rule::Rule;
#[cfg_attr(test, derive(PartialEq, Debug))] use memchr::memchr;
use memchr::memchr_iter;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Element<'a> { pub enum Element<'a> {
Paragraph { Paragraph {
cont_end: usize, cont_end: usize,
@ -77,102 +81,86 @@ pub enum Element<'a> {
List { List {
ident: usize, ident: usize,
ordered: bool, ordered: bool,
cont_end: usize,
end: usize,
}, },
} }
impl<'a> Element<'a> { impl<'a> Element<'a> {
pub fn next_2(src: &'a str) -> (usize, Option<Element<'a>>, Option<(Element<'a>, usize)>) { // return (element, off, next element, next offset)
let bytes = src.as_bytes(); // the end of first element is relative to the offset
// next offset is relative to the end of the first element
let mut pos = skip_empty_line!(src, 0); pub fn next_2(src: &'a str) -> (Option<Element<'a>>, usize, Option<(Element<'a>, usize)>) {
// skip empty lines
let mut pos = match src.chars().position(|c| c != '\n') {
Some(pos) => pos,
None => return (None, src.len(), None),
};
let start = pos; let start = pos;
let bytes = src.as_bytes();
if start == src.len() { let mut line_ends = memchr_iter(b'\n', &bytes[start..]).map(|i| i + start);
return (start, None, None);
}
loop { loop {
// Unlike other element, footnote definition must starts at column 0 let line_beg = pos;
if bytes[pos] == b'[' {
if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) {
break if pos == start {
(off + 1, Some(Element::FnDef { label, cont }), None)
} else {
(
start,
Some(Element::Paragraph {
cont_end: pos - 1,
end: pos,
}),
Some((Element::FnDef { label, cont }, off + 1)),
)
};
}
}
let end = pos;
pos = skip_space!(src, pos);
if pos <= src.len() {
macro_rules! brk { macro_rules! brk {
($ele:expr, $off:expr) => { ($ele:expr, $off:expr) => {
break if pos == start { break if line_beg == 0 || pos == start {
($off, Some($ele), None) (Some($ele), start + $off, None)
} else { } else {
( (
start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: end, cont_end: line_beg - start - 1,
end: pos - 1, end: line_beg - start,
}), }),
start,
Some(($ele, $off)), Some(($ele, $off)),
) )
}; };
}; };
} }
if bytes[pos] == b'+' // Unlike other element, footnote definition must starts at column 0
|| bytes[pos] == b'-' if bytes[pos..].starts_with(b"[fn:") {
|| bytes[pos] == b'*' if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) {
|| (bytes[pos] >= b'0' && bytes[pos] <= b'9') brk!(Element::FnDef { label, cont }, off + 1);
{
if let Some((ident, ordered, cont_end, list_end)) = List::parse(&src[end..]) {
let list = Element::List {
ident,
ordered,
cont_end,
end: list_end,
};
break if pos == start {
(1, Some(list), None)
} else {
(
start,
Some(Element::Paragraph {
cont_end: end,
end: end,
}),
Some((list, 1)),
)
};
} }
} }
// FIXME:
if bytes[pos] == b'\n' { if bytes[pos] == b'\n' {
break ( break (
start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: end, cont_end: pos - start - 1,
end: pos, end: pos - start + 1,
}), }),
start,
None, None,
); );
} }
pos = skip_space!(src, pos);
let (is_item, ordered) = List::is_item(&src[pos..]);
if is_item {
let list = Element::List {
ident: pos - line_beg,
ordered,
};
break if line_beg == start {
(Some(list), start, None)
} else {
(
Some(Element::Paragraph {
cont_end: line_beg - start - 1,
end: line_beg - start,
}),
start,
Some((list, 1)),
)
};
}
// TODO: LaTeX environment // TODO: LaTeX environment
if bytes[pos] == b'\\' {} if bytes[pos..].starts_with(b"\\begin{") {}
// Rule // Rule
if bytes[pos] == b'-' { if bytes[pos] == b'-' {
@ -183,19 +171,14 @@ impl<'a> Element<'a> {
} }
// TODO: multiple lines fixed width area // TODO: multiple lines fixed width area
if bytes[pos] == b':' if bytes[pos..].starts_with(b": ") || bytes[pos..].starts_with(b":\n") {
&& bytes let eol = memchr(b'\n', &bytes[pos..])
.get(pos + 1)
.map(|&b| b == b' ' || b == b'\n')
.unwrap_or(false)
{
let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..])
.map(|i| i + 1) .map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos); .unwrap_or_else(|| src.len() - pos);
brk!(Element::FixedWidth(&src[pos + 1..pos + eol]), eol); brk!(Element::FixedWidth(&src[pos + 1..pos + eol].trim()), eol);
} }
if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b'+').unwrap_or(false) { if bytes[pos..].starts_with(b"#+") {
if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) { if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) {
let cont = &src[pos + cont_beg + 1..pos + cont_end - 1]; let cont = &src[pos + cont_beg + 1..pos + cont_end - 1];
match name.to_uppercase().as_str() { match name.to_uppercase().as_str() {
@ -232,9 +215,7 @@ impl<'a> Element<'a> {
}; };
} }
if let Some((name, args, cont_beg, cont_end, end)) = if let Some((name, args, cont_beg, cont_end, end)) = DynBlock::parse(&src[pos..]) {
DynBlock::parse(&src[pos..])
{
brk!( brk!(
Element::DynBlock { Element::DynBlock {
name, name,
@ -260,34 +241,35 @@ impl<'a> Element<'a> {
// Comment // Comment
// TODO: multiple lines comment // TODO: multiple lines comment
if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b' ').unwrap_or(false) { if bytes[pos..].starts_with(b"# ") || bytes[pos..].starts_with(b"#\n") {
let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..]) let eol = memchr(b'\n', &bytes[pos..])
.map(|i| i + 1) .map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos); .unwrap_or_else(|| src.len() - pos);
brk!(Element::Comment(&src[pos + 1..pos + eol]), eol); brk!(Element::Comment(&src[pos + 1..pos + eol].trim()), eol);
}
} }
if let Some(off) = memchr::memchr(b'\n', &src.as_bytes()[pos..]) { // move to the beginning of the next line
pos += off + 1; if let Some(off) = line_ends.next() {
// last char pos = off + 1;
if pos == src.len() {
// the last character
if pos >= src.len() {
break ( break (
start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: pos - 1, cont_end: src.len() - start - 1,
end: pos, end: src.len() - start,
}), }),
start,
None, None,
); );
} }
} else { } else {
break ( break (
start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: src.len(), cont_end: src.len() - start,
end: src.len(), end: src.len() - start,
}), }),
start,
None, None,
); );
} }
@ -297,6 +279,102 @@ impl<'a> Element<'a> {
#[test] #[test]
fn next_2() { fn next_2() {
use self::Element::*;
assert_eq!(Element::next_2("\n\n\n"), (None, 3, None));
let len = "Lorem ipsum dolor sit amet.".len();
assert_eq!(
Element::next_2("\nLorem ipsum dolor sit amet.\n\n\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 2,
}),
1,
None
)
);
assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 2,
}),
2,
None
)
);
assert_eq!(
Element::next_2("\nLorem ipsum dolor sit amet.\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 1,
}),
1,
None
)
);
assert_eq!(
Element::next_2("\n\n\nLorem ipsum dolor sit amet."),
(
Some(Paragraph {
cont_end: len,
end: len,
}),
3,
None
)
);
assert_eq!(
Element::next_2("\n\n\n: Lorem ipsum dolor sit amet.\n"),
(
Some(FixedWidth("Lorem ipsum dolor sit amet.")),
"\n\n\n: Lorem ipsum dolor sit amet.\n".len(),
None
)
);
assert_eq!(
Element::next_2("\n\n\n: Lorem ipsum dolor sit amet."),
(
Some(FixedWidth("Lorem ipsum dolor sit amet.")),
"\n\n\n: Lorem ipsum dolor sit amet.".len(),
None
)
);
assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 1,
}),
2,
Some((FixedWidth("Lorem ipsum dolor sit amet."), 30))
)
);
assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n+ Lorem ipsum dolor sit amet.\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 1,
}),
2,
Some((
List {
ident: 0,
ordered: false,
},
1
))
)
);
// TODO: more tests // TODO: more tests
assert_eq!(Element::next_2("\n\n\n\n"), (4, None, None));
} }

View file

@ -73,7 +73,7 @@ impl<W: Write> Handler<W> for HtmlHandler {
fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()> { fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()> {
write!(w, "{}", if ordered { "</ol>" } else { "</ul>" }) write!(w, "{}", if ordered { "</ol>" } else { "</ul>" })
} }
fn handle_list_beg_item(&mut self, w: &mut W) -> Result<()> { fn handle_list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<()> {
write!(w, "<li>") write!(w, "<li>")
} }
fn handle_list_end_item(&mut self, w: &mut W) -> Result<()> { fn handle_list_end_item(&mut self, w: &mut W) -> Result<()> {

View file

@ -30,7 +30,7 @@ pub trait Handler<W: Write> {
fn handle_dyn_block_end(&mut self, w: &mut W) -> Result<()>; fn handle_dyn_block_end(&mut self, w: &mut W) -> Result<()>;
fn handle_list_beg(&mut self, w: &mut W, ordered: bool) -> Result<()>; fn handle_list_beg(&mut self, w: &mut W, ordered: bool) -> Result<()>;
fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()>; fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()>;
fn handle_list_beg_item(&mut self, w: &mut W) -> Result<()>; fn handle_list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<()>;
fn handle_list_end_item(&mut self, w: &mut W) -> Result<()>; fn handle_list_end_item(&mut self, w: &mut W) -> Result<()>;
fn handle_call(&mut self, w: &mut W, value: &str) -> Result<()>; fn handle_call(&mut self, w: &mut W, value: &str) -> Result<()>;
fn handle_clock(&mut self, w: &mut W) -> Result<()>; fn handle_clock(&mut self, w: &mut W) -> Result<()>;
@ -113,7 +113,7 @@ impl<'a, W: Write, H: Handler<W>> Render<'a, W, H> {
DynBlockEnd => h.handle_dyn_block_end(w)?, DynBlockEnd => h.handle_dyn_block_end(w)?,
ListBeg { ordered } => h.handle_list_beg(w, ordered)?, ListBeg { ordered } => h.handle_list_beg(w, ordered)?,
ListEnd { ordered } => h.handle_list_end(w, ordered)?, ListEnd { ordered } => h.handle_list_end(w, ordered)?,
ListItemBeg => h.handle_list_beg_item(w)?, ListItemBeg { bullet } => h.handle_list_beg_item(w, bullet)?,
ListItemEnd => h.handle_list_end_item(w)?, ListItemEnd => h.handle_list_end_item(w)?,
Call { value } => h.handle_call(w, value)?, Call { value } => h.handle_call(w, value)?,
Clock => h.handle_clock(w)?, Clock => h.handle_clock(w)?,

View file

@ -8,6 +8,7 @@ mod utils;
mod elements; mod elements;
mod export; mod export;
mod headline; mod headline;
mod lines;
mod objects; mod objects;
mod parser; mod parser;

54
src/lines.rs Normal file
View file

@ -0,0 +1,54 @@
use memchr::{memchr_iter, Memchr};
use std::iter::{once, Chain, Once};
pub struct Lines<'a> {
src: &'a str,
iter: Chain<Memchr<'a>, Once<usize>>,
start: usize,
pre_cont_end: usize,
}
impl<'a> Lines<'a> {
pub fn new(src: &'a str) -> Lines<'a> {
Lines {
src,
iter: memchr_iter(b'\n', &src.as_bytes()).chain(once(src.len())),
start: 0,
pre_cont_end: 0,
}
}
}
impl<'a> Iterator for Lines<'a> {
type Item = (usize, usize, &'a str);
#[inline]
fn next(&mut self) -> Option<(usize, usize, &'a str)> {
self.iter.next().map(|i| {
let (line, cont_end) = if i != self.src.len() && self.src.as_bytes()[i - 1] == b'\r' {
(&self.src[self.start..i - 1], i - 1)
} else {
(&self.src[self.start..i], i)
};
self.start = if i != self.src.len() { i + 1 } else { i };
(cont_end, self.start, line)
})
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
#[test]
fn lines() {
let mut lines = Lines::new("foo\r\nbar\n\nbaz\n");
assert_eq!(Some((3, 5, "foo")), lines.next());
assert_eq!(Some((8, 9, "bar")), lines.next());
assert_eq!(Some((9, 10, "")), lines.next());
assert_eq!(Some((13, 14, "baz")), lines.next());
assert_eq!(Some((14, 14, "")), lines.next());
assert_eq!(None, lines.next());
}

View file

@ -51,13 +51,13 @@ impl<'a> Object<'a> {
return (Object::Text(src), src.len(), None); return (Object::Text(src), src.len(), None);
} }
let chars = ascii_chars!('@', ' ', '"', '(', '\n', '{', '<', '['); let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
let mut pos = 0; let mut pos = 0;
loop { loop {
macro_rules! brk { macro_rules! brk {
($obj:expr, $off:expr, $pos:expr) => { ($obj:expr, $off:expr, $pos:expr) => {
break if pos == 0 { break if $pos == 0 {
($obj, $off, None) ($obj, $off, None)
} else { } else {
(Object::Text(&src[0..$pos]), $pos, Some(($obj, $off))) (Object::Text(&src[0..$pos]), $pos, Some(($obj, $off)))
@ -159,8 +159,8 @@ impl<'a> Object<'a> {
_ => (), _ => (),
} }
if let Some(off) = chars if let Some(off) = bs
.find(&src[pos + 1..]) .find(&bytes[pos + 1..])
.map(|i| i + pos + 1) .map(|i| i + pos + 1)
.filter(|&i| i < src.len() - 2) .filter(|&i| i < src.len() - 2)
{ {

View file

@ -5,54 +5,19 @@ use objects::*;
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub enum Container { pub enum Container {
Headline { Headline { beg: usize, end: usize },
beg: usize, Section { end: usize },
end: usize, Paragraph { cont_end: usize, end: usize },
}, CtrBlock { cont_end: usize, end: usize },
Section { QteBlock { cont_end: usize, end: usize },
end: usize, SplBlock { cont_end: usize, end: usize },
}, DynBlock { cont_end: usize, end: usize },
Paragraph { List { ident: usize, ordered: bool },
cont_end: usize, ListItem { cont_end: usize, end: usize },
end: usize, Italic { end: usize },
}, Strike { end: usize },
CtrBlock { Bold { end: usize },
cont_end: usize, Underline { end: usize },
end: usize,
},
QteBlock {
cont_end: usize,
end: usize,
},
SplBlock {
cont_end: usize,
end: usize,
},
DynBlock {
cont_end: usize,
end: usize,
},
List {
ident: usize,
ordered: bool,
cont_end: usize,
end: usize,
},
ListItem {
end: usize,
},
Italic {
end: usize,
},
Strike {
end: usize,
},
Bold {
end: usize,
},
Underline {
end: usize,
},
} }
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
@ -109,7 +74,9 @@ pub enum Event<'a> {
ListEnd { ListEnd {
ordered: bool, ordered: bool,
}, },
ListItemBeg, ListItemBeg {
bullet: &'a str,
},
ListItemEnd, ListItemEnd,
Call { Call {
@ -166,6 +133,7 @@ pub struct Parser<'a> {
off: usize, off: usize,
ele_buf: Option<(Element<'a>, usize)>, ele_buf: Option<(Element<'a>, usize)>,
obj_buf: Option<(Object<'a>, usize)>, obj_buf: Option<(Object<'a>, usize)>,
has_more_item: bool,
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
@ -176,11 +144,13 @@ impl<'a> Parser<'a> {
off: 0, off: 0,
ele_buf: None, ele_buf: None,
obj_buf: None, obj_buf: None,
has_more_item: false,
} }
} }
fn next_sec_or_hdl(&mut self) -> Event<'a> { fn next_sec_or_hdl(&mut self) -> Event<'a> {
let end = Headline::find_level(&self.text[self.off..], std::usize::MAX); let end = Headline::find_level(&self.text[self.off..], std::usize::MAX);
debug_assert!(end <= self.text.len());
if end != 0 { if end != 0 {
self.stack.push(Container::Section { self.stack.push(Container::Section {
end: self.off + end, end: self.off + end,
@ -192,7 +162,9 @@ impl<'a> Parser<'a> {
} }
fn next_hdl(&mut self) -> Event<'a> { fn next_hdl(&mut self) -> Event<'a> {
let (hdl, off, end) = Headline::parse(&self.text[self.off..]); let tail = &self.text[self.off..];
let (hdl, off, end) = Headline::parse(tail);
debug_assert!(end <= self.text.len());
self.stack.push(Container::Headline { self.stack.push(Container::Headline {
beg: self.off + off, beg: self.off + off,
end: self.off + end, end: self.off + end,
@ -201,104 +173,126 @@ impl<'a> Parser<'a> {
Event::HeadlineBeg(hdl) Event::HeadlineBeg(hdl)
} }
fn next_ele(&mut self, end: usize) -> Event<'a> { fn next_ele(&mut self, text: &'a str) -> Event<'a> {
let (ele, off) = self let (ele, off) = self
.ele_buf .ele_buf
.take() .take()
.map(|(ele, off)| (Some(ele), off)) .map(|(ele, off)| (Some(ele), off))
.unwrap_or_else(|| { .unwrap_or_else(|| {
let (off, ele, next_2) = Element::next_2(&self.text[self.off..end]); let (ele, off, next_ele) = Element::next_2(text);
self.ele_buf = next_2; self.ele_buf = next_ele;
(ele, off) (ele, off)
}); });
debug_assert!(self.off + off <= end); debug_assert!(off <= text.len());
self.off += off;
if let Some(ele) = ele {
match ele { match ele {
Element::Paragraph { cont_end, end } => self.stack.push(Container::Paragraph { Some(Element::Paragraph { cont_end, end }) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::Paragraph {
cont_end: cont_end + self.off, cont_end: cont_end + self.off,
end: end + self.off, end: end + self.off,
}), });
Element::QteBlock { end, cont_end, .. } => self.stack.push(Container::QteBlock { Event::ParagraphBeg
}
Some(Element::QteBlock { end, cont_end, .. }) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::QteBlock {
cont_end: cont_end + self.off, cont_end: cont_end + self.off,
end: end + self.off, end: end + self.off,
}), });
Element::CtrBlock { end, cont_end, .. } => self.stack.push(Container::CtrBlock { Event::QteBlockBeg
}
Some(Element::CtrBlock { end, cont_end, .. }) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::CtrBlock {
cont_end: cont_end + self.off, cont_end: cont_end + self.off,
end: end + self.off, end: end + self.off,
}), });
Element::SplBlock { end, cont_end, .. } => self.stack.push(Container::SplBlock { Event::CtrBlockBeg
}
Some(Element::SplBlock {
name,
args,
end,
cont_end,
}) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::SplBlock {
cont_end: cont_end + self.off, cont_end: cont_end + self.off,
end: end + self.off, end: end + self.off,
}), });
Element::DynBlock { end, cont_end, .. } => self.stack.push(Container::DynBlock { Event::SplBlockBeg { name, args }
cont_end: cont_end + self.off, }
end: end + self.off, Some(Element::DynBlock {
}), name,
Element::List { args,
ident,
ordered,
cont_end, cont_end,
end, end,
} => self.stack.push(Container::List { }) => {
ident, debug_assert!(cont_end <= text.len() && end <= text.len());
ordered, self.stack.push(Container::DynBlock {
cont_end: cont_end + self.off, cont_end: cont_end + self.off,
end: end + self.off, end: end + self.off,
}), });
_ => (), Event::DynBlockBeg { name, args }
} }
Some(Element::List { ident, ordered }) => {
self.off += off; self.stack.push(Container::List { ident, ordered });
self.has_more_item = true;
match ele { Event::ListBeg { ordered }
Element::Call { value } => Event::Call { value },
Element::Comment(c) => Event::Comment(c),
Element::CommentBlock { args, cont } => Event::CommentBlock { args, cont },
Element::CtrBlock { .. } => Event::CtrBlockBeg,
Element::DynBlock { name, args, .. } => Event::DynBlockBeg { name, args },
Element::ExampleBlock { args, cont } => Event::ExampleBlock { args, cont },
Element::ExportBlock { args, cont } => Event::ExportBlock { args, cont },
Element::FixedWidth(f) => Event::FixedWidth(f),
Element::FnDef { label, cont } => Event::FnDef { label, cont },
Element::Keyword { key, value } => Event::Keyword { key, value },
Element::List { ordered, .. } => Event::ListBeg { ordered },
Element::Paragraph { .. } => Event::ParagraphBeg,
Element::QteBlock { .. } => Event::QteBlockBeg,
Element::Rule => Event::Rule,
Element::SplBlock { name, args, .. } => Event::SplBlockBeg { name, args },
Element::SrcBlock { args, cont } => Event::SrcBlock { args, cont },
Element::VerseBlock { args, cont } => Event::VerseBlock { args, cont },
} }
} else { Some(Element::Call { value }) => Event::Call { value },
self.off += off; Some(Element::Comment(c)) => Event::Comment(c),
self.end() Some(Element::CommentBlock { args, cont }) => Event::CommentBlock { args, cont },
Some(Element::ExampleBlock { args, cont }) => Event::ExampleBlock { args, cont },
Some(Element::ExportBlock { args, cont }) => Event::ExportBlock { args, cont },
Some(Element::FixedWidth(f)) => Event::FixedWidth(f),
Some(Element::FnDef { label, cont }) => Event::FnDef { label, cont },
Some(Element::Keyword { key, value }) => Event::Keyword { key, value },
Some(Element::Rule) => Event::Rule,
Some(Element::SrcBlock { args, cont }) => Event::SrcBlock { args, cont },
Some(Element::VerseBlock { args, cont }) => Event::VerseBlock { args, cont },
None => self.end(),
} }
} }
fn next_obj(&mut self, end: usize) -> Event<'a> { fn next_obj(&mut self, text: &'a str) -> Event<'a> {
let (obj, off) = self.obj_buf.take().unwrap_or_else(|| { let (obj, off) = self.obj_buf.take().unwrap_or_else(|| {
let (obj, off, next_2) = Object::next_2(&self.text[self.off..end]); let (obj, off, next_obj) = Object::next_2(text);
self.obj_buf = next_2; self.obj_buf = next_obj;
(obj, off) (obj, off)
}); });
debug_assert!(self.off + off <= end); debug_assert!(off <= text.len());
match obj { match obj {
Object::Underline { end } => self.stack.push(Container::Underline { Object::Underline { end } => {
debug_assert!(end <= text.len());
self.stack.push(Container::Underline {
end: self.off + end, end: self.off + end,
}), });
Object::Strike { end } => self.stack.push(Container::Strike { }
Object::Strike { end } => {
debug_assert!(end <= text.len());
self.stack.push(Container::Strike {
end: self.off + end, end: self.off + end,
}), });
Object::Italic { end } => self.stack.push(Container::Italic { }
Object::Italic { end } => {
debug_assert!(end <= text.len());
self.stack.push(Container::Italic {
end: self.off + end, end: self.off + end,
}), });
Object::Bold { end } => self.stack.push(Container::Bold { }
Object::Bold { end } => {
debug_assert!(end <= text.len());
self.stack.push(Container::Bold {
end: self.off + end, end: self.off + end,
}), });
}
_ => (), _ => (),
} }
@ -324,13 +318,16 @@ impl<'a> Parser<'a> {
} }
} }
fn next_list_item(&mut self, end: usize, ident: usize) -> Event<'a> { fn next_list_item(&mut self, ident: usize) -> Event<'a> {
let (beg, end) = List::parse_item(&self.text[self.off..end], ident); let (bullet, cont_beg, cont_end, end, has_more) =
List::parse(&self.text[self.off..], ident);
self.stack.push(Container::ListItem { self.stack.push(Container::ListItem {
cont_end: self.off + cont_end,
end: self.off + end, end: self.off + end,
}); });
self.off += beg; self.off += cont_beg;
Event::ListItemBeg self.has_more_item = has_more;
Event::ListItemBeg { bullet }
} }
fn end(&mut self) -> Event<'a> { fn end(&mut self) -> Event<'a> {
@ -378,58 +375,50 @@ impl<'a> Iterator for Parser<'a> {
Container::DynBlock { cont_end, end, .. } Container::DynBlock { cont_end, end, .. }
| Container::CtrBlock { cont_end, end, .. } | Container::CtrBlock { cont_end, end, .. }
| Container::QteBlock { cont_end, end, .. } | Container::QteBlock { cont_end, end, .. }
| Container::SplBlock { cont_end, end, .. } => { | Container::SplBlock { cont_end, end, .. }
| Container::ListItem { cont_end, end } => {
let text = &self.text[self.off..cont_end];
if self.off >= cont_end { if self.off >= cont_end {
self.off = end; self.off = end;
self.end() self.end()
} else { } else {
self.next_ele(cont_end) self.next_ele(text)
} }
} }
Container::List { Container::List { ident, .. } => {
cont_end, if self.has_more_item {
end, self.next_list_item(ident)
ident,
..
} => {
if self.off >= cont_end {
self.off = end;
self.end()
} else { } else {
self.next_list_item(cont_end, ident)
}
}
Container::ListItem { end } => {
if self.off >= end {
self.end() self.end()
} else {
self.next_ele(end)
} }
} }
Container::Section { end } => { Container::Section { end } => {
let text = &self.text[self.off..end];
if self.off >= end { if self.off >= end {
self.end() self.end()
} else { } else {
self.next_ele(end) self.next_ele(text)
} }
} }
Container::Paragraph { cont_end, end } => { Container::Paragraph { cont_end, end } => {
let text = &self.text[self.off..cont_end];
if self.off >= cont_end { if self.off >= cont_end {
self.off = end; self.off = end;
self.end() self.end()
} else { } else {
self.next_obj(cont_end) self.next_obj(text)
} }
} }
Container::Bold { end } Container::Bold { end }
| Container::Underline { end } | Container::Underline { end }
| Container::Italic { end } | Container::Italic { end }
| Container::Strike { end } => { | Container::Strike { end } => {
let text = &self.text[self.off..end];
if self.off >= end { if self.off >= end {
self.off += 1; self.off += 1;
self.end() self.end()
} else { } else {
self.next_obj(end) self.next_obj(text)
} }
} }
}) })