refactor: simplify list parsing

This commit is contained in:
PoiScript 2019-02-02 23:42:31 +08:00
parent 763ec00434
commit d20d4c2880
9 changed files with 562 additions and 570 deletions

View file

@ -9,7 +9,9 @@ fn valid_label(ch: u8) -> bool {
impl FnDef {
pub fn parse(src: &str) -> Option<(&str, &str, usize)> {
if cfg!(test) {
starts_with!(src, "[fn:");
}
let label = until_while!(src, 4, b']', valid_label)?;

View file

@ -1,289 +1,157 @@
pub struct List;
use lines::Lines;
macro_rules! ident {
($src:expr) => {
$src.as_bytes()
.iter()
.position(|&c| c != b' ' && c != b'\t')
.unwrap_or(0)
};
}
pub struct List;
impl List {
#[inline]
fn is_item(src: &str) -> bool {
if src.len() < 2 {
return false;
pub fn is_item(src: &str) -> (bool, bool) {
if src.is_empty() {
return (false, false);
}
let bytes = src.as_bytes();
let i = match bytes[0] {
b'*' | b'-' | b'+' => 1,
let (i, ordered) = match bytes[0] {
b'*' | b'-' | b'+' => (1, false),
b'0'...b'9' => {
let i = bytes
.iter()
.position(|&c| !c.is_ascii_digit())
.unwrap_or_else(|| src.len());
if i >= src.len() - 1 {
return false;
}
let c = bytes[i];
if !(c == b'.' || c == b')') {
return false;
return (false, false);
}
i + 1
(i + 1, true)
}
_ => return false,
_ => return (false, false),
};
if i < src.len() {
// bullet is follwed by a space or line ending
bytes[i] == b' ' || bytes[i] == b'\n'
}
#[inline]
pub fn is_ordered(byte: u8) -> bool {
match byte {
b'*' | b'-' | b'+' => false,
b'0'...b'9' => true,
_ => unreachable!(),
(bytes[i] == b' ' || bytes[i] == b'\n', ordered)
} else {
(false, false)
}
}
// returns (contents_begin, contents_end)
pub fn parse_item(src: &str, ident: usize) -> (usize, usize) {
let beg = memchr::memchr(b' ', &src.as_bytes()[ident..])
.map(|i| i + ident + 1)
.unwrap();
let mut lines = lines!(src);
// skip first line
let mut pos = lines.next().unwrap();
for line_end in lines {
let line = &src[pos..line_end];
if !line.trim().is_empty() && ident!(line) == ident {
break;
}
pos = line_end;
}
(beg, pos)
}
// returns (bullets, contents begin, contents end, end, has more)
pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
debug_assert!(Self::is_item(&src[ident..]).0);
debug_assert!(
src[..ident].chars().all(|c| c == ' ' || c == '\t'),
"{:?} doesn't starts with indentation {}",
src,
ident
);
// return (ident, is_ordered, contents_end, end)
pub fn parse(src: &str) -> Option<(usize, bool, usize, usize)> {
let bytes = src.as_bytes();
let starting_ident = ident!(src);
if !Self::is_item(&src[starting_ident..]) {
return None;
let mut lines = Lines::new(src);
let (mut pre_cont_end, mut pre_end, first_line) = lines.next().unwrap();
let beg = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) {
Some(i) => i + ident + 1,
None => {
let len = first_line.len();
return (
&first_line,
len,
len,
len,
Self::is_item(lines.next().unwrap().2).0,
);
}
let mut lines = lines!(src);
// skip the starting line
let mut pos = lines.next().unwrap();
let is_ordered = Self::is_ordered(bytes[starting_ident]);
Some(loop {
let mut curr_line = match lines.next() {
Some(i) => i,
None => break (starting_ident, is_ordered, pos, pos),
};
// current line is empty
if src[pos..curr_line].trim().is_empty() {
let next_line = match lines.next() {
Some(i) => i,
None => break (starting_ident, is_ordered, pos, pos),
};
let bullet = &src[0..beg];
while let Some((mut cont_end, mut end, mut line)) = lines.next() {
// this line is emtpy
if line.is_empty() {
if let Some((next_cont_end, next_end, next_line)) = lines.next() {
// next line is emtpy, too
if src[curr_line..next_line].trim().is_empty() {
break (starting_ident, is_ordered, pos, next_line);
if next_line.is_empty() {
return (bullet, beg, pre_cont_end, next_end, false);
} else {
// move to next line
pos = curr_line;
curr_line = next_line;
pre_end = end;
cont_end = next_cont_end;
end = next_end;
line = next_line;
}
}
let ident = ident!(src[pos..curr_line]);
// less indented than the starting line
if ident < starting_ident {
break (starting_ident, is_ordered, pos, pos);
}
if ident > starting_ident || Self::is_item(&src[pos + ident..]) {
pos = curr_line;
} else {
break (starting_ident, is_ordered, pos, pos);
return (bullet, beg, pre_cont_end, end, false);
}
})
}
let line_ident = Self::ident(line);
if line_ident < ident {
return (bullet, beg, pre_cont_end, pre_end, false);
} else if line_ident == ident {
return (
bullet,
beg,
pre_cont_end,
pre_end,
Self::is_item(&line[ident..]).0,
);
}
pre_end = end;
pre_cont_end = cont_end;
}
(bullet, beg, src.len(), src.len(), false)
}
fn ident(src: &str) -> usize {
src.as_bytes()
.iter()
.position(|&c| c != b' ' && c != b'\t')
.unwrap_or(0)
}
}
#[test]
fn is_item() {
assert_eq!(List::is_item("+ item"), (true, false));
assert_eq!(List::is_item("- item"), (true, false));
assert_eq!(List::is_item("10. item"), (true, true));
assert_eq!(List::is_item("10) item"), (true, true));
assert_eq!(List::is_item("1. item"), (true, true));
assert_eq!(List::is_item("1) item"), (true, true));
assert_eq!(List::is_item("10. "), (true, true));
assert_eq!(List::is_item("10.\n"), (true, true));
assert_eq!(List::is_item("10."), (false, false));
assert_eq!(List::is_item("+"), (false, false));
assert_eq!(List::is_item("-item"), (false, false));
assert_eq!(List::is_item("+item"), (false, false));
}
#[test]
fn parse() {
assert_eq!(
List::parse(
r"+ item1
+ item2
+ item3"
),
Some((0, false, 23, 23))
List::parse("+ item1\n+ item2\n+ item3", 0),
("+ ", 2, 7, 8, true)
);
assert_eq!(
List::parse(
r"* item1
* item2
* item3"
),
Some((0, false, 24, 24))
List::parse("* item1\n\n* item2\n* item3", 0),
("* ", 2, 7, 9, true)
);
assert_eq!(
List::parse(
r"- item1
- item2
- item1"
),
Some((0, false, 16, 18))
List::parse("- item1\n\n\n- item2\n- item3", 0),
("- ", 2, 7, 10, false)
);
assert_eq!(
List::parse(
r"1. item1
2. item1
3. item2"
),
Some((0, true, 28, 28))
List::parse("1. item1\n\n\n\n2. item2\n3. item3", 0),
("1. ", 3, 8, 11, false)
);
assert_eq!(
List::parse(
r" 1) item1
2) item1
3) item2"
),
Some((2, true, 11, 11))
List::parse(" + item1\n + item2\n+ item3", 2),
(" + ", 4, 21, 22, false)
);
assert_eq!(
List::parse(
r" + item1
1) item1
+ item2"
),
Some((2, false, 32, 32))
);
assert_eq!(
List::parse(
r" item1
+ item1
+ item2"
),
None
);
assert_eq!(
List::parse(
r#"- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Nulla et dolor vitae elit placerat sagittis. Aliquam a lobortis massa,
aliquam efficitur arcu.
- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Phasellus auctor lacus a orci imperdiet, ut facilisis neque lobortis.
- Proin condimentum id orci vitae lobortis. Nunc sollicitudin risus neque,
dapibus malesuada sem faucibus vitae.
- Sed vitae dolor augue. Phasellus at rhoncus arcu. Suspendisse potenti.
- Nulla faucibus, metus ut porta hendrerit, urna lorem porta metus, in tempus
nibh orci sed sapien.
- Morbi tortor mi, dapibus vel faucibus a, iaculis sed turpis."#
),
Some((0, false, 666, 666))
);
}
#[test]
fn is_item() {
assert!(List::is_item("+ item"));
assert!(List::is_item("- item"));
assert!(List::is_item("10. item"));
assert!(List::is_item("10) item"));
assert!(List::is_item("1. item"));
assert!(List::is_item("1) item"));
assert!(List::is_item("10. "));
assert!(List::is_item("10.\n"));
assert!(!List::is_item("10."));
assert!(!List::is_item("-item"));
assert!(!List::is_item("+item"));
}
#[test]
fn parse_item() {
assert_eq!(List::parse_item("+ Item1\n+ Item2", 0), (2, 8));
assert_eq!(List::parse_item("+ Item1\n\n+ Item2", 0), (2, 9));
assert_eq!(
List::parse_item(
r"+ item1
+ item1
+ item2",
0
),
(2, 25)
);
assert_eq!(
List::parse_item(
r" 1. item1
+ item2",
2
),
(5, 11)
);
assert_eq!(
List::parse_item(
r"+ It
em1
+ Item2",
0
),
(2, 11)
);
assert_eq!(
List::parse_item(
r#"1) Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec sit amet
ullamcorper ante, nec pellentesque nisi.
2) Sed pulvinar ut arcu id aliquam.Curabitur quis justo eu magna maximus sodales.
Curabitur nisl nisi, ornare in enim id, sagittis facilisis magna.
3) Curabitur venenatis molestie eros sit amet congue. Nunc at molestie leo, vitae
malesuada nisi."#,
0
),
(3, 119)
);
assert_eq!(
List::parse_item(
r#"- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Nulla et dolor vitae elit placerat sagittis. Aliquam a lobortis massa,
aliquam efficitur arcu.
- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Phasellus auctor lacus a orci imperdiet, ut facilisis neque lobortis.
- Proin condimentum id orci vitae lobortis. Nunc sollicitudin risus neque,
dapibus malesuada sem faucibus vitae.
- Sed vitae dolor augue. Phasellus at rhoncus arcu. Suspendisse potenti.
- Nulla faucibus, metus ut porta hendrerit, urna lorem porta metus, in tempus
nibh orci sed sapien.
- Morbi tortor mi, dapibus vel faucibus a, iaculis sed turpis."#,
0
),
(2, 421)
List::parse(" + item1\n + item2\n + item3", 2),
(" + ", 4, 9, 10, true)
);
assert_eq!(List::parse("+\n", 0), ("+", 1, 1, 1, false));
assert_eq!(List::parse("+\n+ item2\n+ item3", 0), ("+", 1, 1, 1, true));
assert_eq!(List::parse("1) item1", 0), ("1) ", 3, 8, 8, false));
assert_eq!(List::parse("1) item1\n", 0), ("1) ", 3, 8, 9, false));
}

View file

@ -12,7 +12,11 @@ pub use self::keyword::{Key, Keyword};
pub use self::list::List;
pub use self::rule::Rule;
#[cfg_attr(test, derive(PartialEq, Debug))]
use memchr::memchr;
use memchr::memchr_iter;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Element<'a> {
Paragraph {
cont_end: usize,
@ -77,102 +81,86 @@ pub enum Element<'a> {
List {
ident: usize,
ordered: bool,
cont_end: usize,
end: usize,
},
}
impl<'a> Element<'a> {
pub fn next_2(src: &'a str) -> (usize, Option<Element<'a>>, Option<(Element<'a>, usize)>) {
let bytes = src.as_bytes();
let mut pos = skip_empty_line!(src, 0);
// return (element, off, next element, next offset)
// the end of first element is relative to the offset
// next offset is relative to the end of the first element
pub fn next_2(src: &'a str) -> (Option<Element<'a>>, usize, Option<(Element<'a>, usize)>) {
// skip empty lines
let mut pos = match src.chars().position(|c| c != '\n') {
Some(pos) => pos,
None => return (None, src.len(), None),
};
let start = pos;
if start == src.len() {
return (start, None, None);
}
let bytes = src.as_bytes();
let mut line_ends = memchr_iter(b'\n', &bytes[start..]).map(|i| i + start);
loop {
// Unlike other element, footnote definition must starts at column 0
if bytes[pos] == b'[' {
if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) {
break if pos == start {
(off + 1, Some(Element::FnDef { label, cont }), None)
} else {
(
start,
Some(Element::Paragraph {
cont_end: pos - 1,
end: pos,
}),
Some((Element::FnDef { label, cont }, off + 1)),
)
};
}
}
let line_beg = pos;
let end = pos;
pos = skip_space!(src, pos);
if pos <= src.len() {
macro_rules! brk {
($ele:expr, $off:expr) => {
break if pos == start {
($off, Some($ele), None)
break if line_beg == 0 || pos == start {
(Some($ele), start + $off, None)
} else {
(
start,
Some(Element::Paragraph {
cont_end: end,
end: pos - 1,
cont_end: line_beg - start - 1,
end: line_beg - start,
}),
start,
Some(($ele, $off)),
)
};
};
}
if bytes[pos] == b'+'
|| bytes[pos] == b'-'
|| bytes[pos] == b'*'
|| (bytes[pos] >= b'0' && bytes[pos] <= b'9')
{
if let Some((ident, ordered, cont_end, list_end)) = List::parse(&src[end..]) {
let list = Element::List {
ident,
ordered,
cont_end,
end: list_end,
};
break if pos == start {
(1, Some(list), None)
} else {
(
start,
Some(Element::Paragraph {
cont_end: end,
end: end,
}),
Some((list, 1)),
)
};
// Unlike other element, footnote definition must starts at column 0
if bytes[pos..].starts_with(b"[fn:") {
if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) {
brk!(Element::FnDef { label, cont }, off + 1);
}
}
// FIXME:
if bytes[pos] == b'\n' {
break (
start,
Some(Element::Paragraph {
cont_end: end,
end: pos,
cont_end: pos - start - 1,
end: pos - start + 1,
}),
start,
None,
);
}
pos = skip_space!(src, pos);
let (is_item, ordered) = List::is_item(&src[pos..]);
if is_item {
let list = Element::List {
ident: pos - line_beg,
ordered,
};
break if line_beg == start {
(Some(list), start, None)
} else {
(
Some(Element::Paragraph {
cont_end: line_beg - start - 1,
end: line_beg - start,
}),
start,
Some((list, 1)),
)
};
}
// TODO: LaTeX environment
if bytes[pos] == b'\\' {}
if bytes[pos..].starts_with(b"\\begin{") {}
// Rule
if bytes[pos] == b'-' {
@ -183,19 +171,14 @@ impl<'a> Element<'a> {
}
// TODO: multiple lines fixed width area
if bytes[pos] == b':'
&& bytes
.get(pos + 1)
.map(|&b| b == b' ' || b == b'\n')
.unwrap_or(false)
{
let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..])
if bytes[pos..].starts_with(b": ") || bytes[pos..].starts_with(b":\n") {
let eol = memchr(b'\n', &bytes[pos..])
.map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos);
brk!(Element::FixedWidth(&src[pos + 1..pos + eol]), eol);
brk!(Element::FixedWidth(&src[pos + 1..pos + eol].trim()), eol);
}
if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b'+').unwrap_or(false) {
if bytes[pos..].starts_with(b"#+") {
if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) {
let cont = &src[pos + cont_beg + 1..pos + cont_end - 1];
match name.to_uppercase().as_str() {
@ -232,9 +215,7 @@ impl<'a> Element<'a> {
};
}
if let Some((name, args, cont_beg, cont_end, end)) =
DynBlock::parse(&src[pos..])
{
if let Some((name, args, cont_beg, cont_end, end)) = DynBlock::parse(&src[pos..]) {
brk!(
Element::DynBlock {
name,
@ -260,34 +241,35 @@ impl<'a> Element<'a> {
// Comment
// TODO: multiple lines comment
if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b' ').unwrap_or(false) {
let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..])
if bytes[pos..].starts_with(b"# ") || bytes[pos..].starts_with(b"#\n") {
let eol = memchr(b'\n', &bytes[pos..])
.map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos);
brk!(Element::Comment(&src[pos + 1..pos + eol]), eol);
}
brk!(Element::Comment(&src[pos + 1..pos + eol].trim()), eol);
}
if let Some(off) = memchr::memchr(b'\n', &src.as_bytes()[pos..]) {
pos += off + 1;
// last char
if pos == src.len() {
// move to the beginning of the next line
if let Some(off) = line_ends.next() {
pos = off + 1;
// the last character
if pos >= src.len() {
break (
start,
Some(Element::Paragraph {
cont_end: pos - 1,
end: pos,
cont_end: src.len() - start - 1,
end: src.len() - start,
}),
start,
None,
);
}
} else {
break (
start,
Some(Element::Paragraph {
cont_end: src.len(),
end: src.len(),
cont_end: src.len() - start,
end: src.len() - start,
}),
start,
None,
);
}
@ -297,6 +279,102 @@ impl<'a> Element<'a> {
#[test]
fn next_2() {
use self::Element::*;
assert_eq!(Element::next_2("\n\n\n"), (None, 3, None));
let len = "Lorem ipsum dolor sit amet.".len();
assert_eq!(
Element::next_2("\nLorem ipsum dolor sit amet.\n\n\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 2,
}),
1,
None
)
);
assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 2,
}),
2,
None
)
);
assert_eq!(
Element::next_2("\nLorem ipsum dolor sit amet.\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 1,
}),
1,
None
)
);
assert_eq!(
Element::next_2("\n\n\nLorem ipsum dolor sit amet."),
(
Some(Paragraph {
cont_end: len,
end: len,
}),
3,
None
)
);
assert_eq!(
Element::next_2("\n\n\n: Lorem ipsum dolor sit amet.\n"),
(
Some(FixedWidth("Lorem ipsum dolor sit amet.")),
"\n\n\n: Lorem ipsum dolor sit amet.\n".len(),
None
)
);
assert_eq!(
Element::next_2("\n\n\n: Lorem ipsum dolor sit amet."),
(
Some(FixedWidth("Lorem ipsum dolor sit amet.")),
"\n\n\n: Lorem ipsum dolor sit amet.".len(),
None
)
);
assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 1,
}),
2,
Some((FixedWidth("Lorem ipsum dolor sit amet."), 30))
)
);
assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n+ Lorem ipsum dolor sit amet.\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 1,
}),
2,
Some((
List {
ident: 0,
ordered: false,
},
1
))
)
);
// TODO: more tests
assert_eq!(Element::next_2("\n\n\n\n"), (4, None, None));
}

View file

@ -73,7 +73,7 @@ impl<W: Write> Handler<W> for HtmlHandler {
fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()> {
write!(w, "{}", if ordered { "</ol>" } else { "</ul>" })
}
fn handle_list_beg_item(&mut self, w: &mut W) -> Result<()> {
fn handle_list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<()> {
write!(w, "<li>")
}
fn handle_list_end_item(&mut self, w: &mut W) -> Result<()> {

View file

@ -30,7 +30,7 @@ pub trait Handler<W: Write> {
fn handle_dyn_block_end(&mut self, w: &mut W) -> Result<()>;
fn handle_list_beg(&mut self, w: &mut W, ordered: bool) -> Result<()>;
fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()>;
fn handle_list_beg_item(&mut self, w: &mut W) -> Result<()>;
fn handle_list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<()>;
fn handle_list_end_item(&mut self, w: &mut W) -> Result<()>;
fn handle_call(&mut self, w: &mut W, value: &str) -> Result<()>;
fn handle_clock(&mut self, w: &mut W) -> Result<()>;
@ -113,7 +113,7 @@ impl<'a, W: Write, H: Handler<W>> Render<'a, W, H> {
DynBlockEnd => h.handle_dyn_block_end(w)?,
ListBeg { ordered } => h.handle_list_beg(w, ordered)?,
ListEnd { ordered } => h.handle_list_end(w, ordered)?,
ListItemBeg => h.handle_list_beg_item(w)?,
ListItemBeg { bullet } => h.handle_list_beg_item(w, bullet)?,
ListItemEnd => h.handle_list_end_item(w)?,
Call { value } => h.handle_call(w, value)?,
Clock => h.handle_clock(w)?,

View file

@ -8,6 +8,7 @@ mod utils;
mod elements;
mod export;
mod headline;
mod lines;
mod objects;
mod parser;

54
src/lines.rs Normal file
View file

@ -0,0 +1,54 @@
use memchr::{memchr_iter, Memchr};
use std::iter::{once, Chain, Once};
pub struct Lines<'a> {
src: &'a str,
iter: Chain<Memchr<'a>, Once<usize>>,
start: usize,
pre_cont_end: usize,
}
impl<'a> Lines<'a> {
pub fn new(src: &'a str) -> Lines<'a> {
Lines {
src,
iter: memchr_iter(b'\n', &src.as_bytes()).chain(once(src.len())),
start: 0,
pre_cont_end: 0,
}
}
}
impl<'a> Iterator for Lines<'a> {
type Item = (usize, usize, &'a str);
#[inline]
fn next(&mut self) -> Option<(usize, usize, &'a str)> {
self.iter.next().map(|i| {
let (line, cont_end) = if i != self.src.len() && self.src.as_bytes()[i - 1] == b'\r' {
(&self.src[self.start..i - 1], i - 1)
} else {
(&self.src[self.start..i], i)
};
self.start = if i != self.src.len() { i + 1 } else { i };
(cont_end, self.start, line)
})
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
#[test]
fn lines() {
let mut lines = Lines::new("foo\r\nbar\n\nbaz\n");
assert_eq!(Some((3, 5, "foo")), lines.next());
assert_eq!(Some((8, 9, "bar")), lines.next());
assert_eq!(Some((9, 10, "")), lines.next());
assert_eq!(Some((13, 14, "baz")), lines.next());
assert_eq!(Some((14, 14, "")), lines.next());
assert_eq!(None, lines.next());
}

View file

@ -51,13 +51,13 @@ impl<'a> Object<'a> {
return (Object::Text(src), src.len(), None);
}
let chars = ascii_chars!('@', ' ', '"', '(', '\n', '{', '<', '[');
let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
let mut pos = 0;
loop {
macro_rules! brk {
($obj:expr, $off:expr, $pos:expr) => {
break if pos == 0 {
break if $pos == 0 {
($obj, $off, None)
} else {
(Object::Text(&src[0..$pos]), $pos, Some(($obj, $off)))
@ -159,8 +159,8 @@ impl<'a> Object<'a> {
_ => (),
}
if let Some(off) = chars
.find(&src[pos + 1..])
if let Some(off) = bs
.find(&bytes[pos + 1..])
.map(|i| i + pos + 1)
.filter(|&i| i < src.len() - 2)
{

View file

@ -5,54 +5,19 @@ use objects::*;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Copy, Clone, Debug)]
pub enum Container {
Headline {
beg: usize,
end: usize,
},
Section {
end: usize,
},
Paragraph {
cont_end: usize,
end: usize,
},
CtrBlock {
cont_end: usize,
end: usize,
},
QteBlock {
cont_end: usize,
end: usize,
},
SplBlock {
cont_end: usize,
end: usize,
},
DynBlock {
cont_end: usize,
end: usize,
},
List {
ident: usize,
ordered: bool,
cont_end: usize,
end: usize,
},
ListItem {
end: usize,
},
Italic {
end: usize,
},
Strike {
end: usize,
},
Bold {
end: usize,
},
Underline {
end: usize,
},
Headline { beg: usize, end: usize },
Section { end: usize },
Paragraph { cont_end: usize, end: usize },
CtrBlock { cont_end: usize, end: usize },
QteBlock { cont_end: usize, end: usize },
SplBlock { cont_end: usize, end: usize },
DynBlock { cont_end: usize, end: usize },
List { ident: usize, ordered: bool },
ListItem { cont_end: usize, end: usize },
Italic { end: usize },
Strike { end: usize },
Bold { end: usize },
Underline { end: usize },
}
#[cfg_attr(test, derive(PartialEq))]
@ -109,7 +74,9 @@ pub enum Event<'a> {
ListEnd {
ordered: bool,
},
ListItemBeg,
ListItemBeg {
bullet: &'a str,
},
ListItemEnd,
Call {
@ -166,6 +133,7 @@ pub struct Parser<'a> {
off: usize,
ele_buf: Option<(Element<'a>, usize)>,
obj_buf: Option<(Object<'a>, usize)>,
has_more_item: bool,
}
impl<'a> Parser<'a> {
@ -176,11 +144,13 @@ impl<'a> Parser<'a> {
off: 0,
ele_buf: None,
obj_buf: None,
has_more_item: false,
}
}
fn next_sec_or_hdl(&mut self) -> Event<'a> {
let end = Headline::find_level(&self.text[self.off..], std::usize::MAX);
debug_assert!(end <= self.text.len());
if end != 0 {
self.stack.push(Container::Section {
end: self.off + end,
@ -192,7 +162,9 @@ impl<'a> Parser<'a> {
}
fn next_hdl(&mut self) -> Event<'a> {
let (hdl, off, end) = Headline::parse(&self.text[self.off..]);
let tail = &self.text[self.off..];
let (hdl, off, end) = Headline::parse(tail);
debug_assert!(end <= self.text.len());
self.stack.push(Container::Headline {
beg: self.off + off,
end: self.off + end,
@ -201,104 +173,126 @@ impl<'a> Parser<'a> {
Event::HeadlineBeg(hdl)
}
fn next_ele(&mut self, end: usize) -> Event<'a> {
fn next_ele(&mut self, text: &'a str) -> Event<'a> {
let (ele, off) = self
.ele_buf
.take()
.map(|(ele, off)| (Some(ele), off))
.unwrap_or_else(|| {
let (off, ele, next_2) = Element::next_2(&self.text[self.off..end]);
self.ele_buf = next_2;
let (ele, off, next_ele) = Element::next_2(text);
self.ele_buf = next_ele;
(ele, off)
});
debug_assert!(self.off + off <= end);
debug_assert!(off <= text.len());
self.off += off;
if let Some(ele) = ele {
match ele {
Element::Paragraph { cont_end, end } => self.stack.push(Container::Paragraph {
Some(Element::Paragraph { cont_end, end }) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::Paragraph {
cont_end: cont_end + self.off,
end: end + self.off,
}),
Element::QteBlock { end, cont_end, .. } => self.stack.push(Container::QteBlock {
});
Event::ParagraphBeg
}
Some(Element::QteBlock { end, cont_end, .. }) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::QteBlock {
cont_end: cont_end + self.off,
end: end + self.off,
}),
Element::CtrBlock { end, cont_end, .. } => self.stack.push(Container::CtrBlock {
});
Event::QteBlockBeg
}
Some(Element::CtrBlock { end, cont_end, .. }) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::CtrBlock {
cont_end: cont_end + self.off,
end: end + self.off,
}),
Element::SplBlock { end, cont_end, .. } => self.stack.push(Container::SplBlock {
});
Event::CtrBlockBeg
}
Some(Element::SplBlock {
name,
args,
end,
cont_end,
}) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::SplBlock {
cont_end: cont_end + self.off,
end: end + self.off,
}),
Element::DynBlock { end, cont_end, .. } => self.stack.push(Container::DynBlock {
cont_end: cont_end + self.off,
end: end + self.off,
}),
Element::List {
ident,
ordered,
});
Event::SplBlockBeg { name, args }
}
Some(Element::DynBlock {
name,
args,
cont_end,
end,
} => self.stack.push(Container::List {
ident,
ordered,
}) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::DynBlock {
cont_end: cont_end + self.off,
end: end + self.off,
}),
_ => (),
});
Event::DynBlockBeg { name, args }
}
self.off += off;
match ele {
Element::Call { value } => Event::Call { value },
Element::Comment(c) => Event::Comment(c),
Element::CommentBlock { args, cont } => Event::CommentBlock { args, cont },
Element::CtrBlock { .. } => Event::CtrBlockBeg,
Element::DynBlock { name, args, .. } => Event::DynBlockBeg { name, args },
Element::ExampleBlock { args, cont } => Event::ExampleBlock { args, cont },
Element::ExportBlock { args, cont } => Event::ExportBlock { args, cont },
Element::FixedWidth(f) => Event::FixedWidth(f),
Element::FnDef { label, cont } => Event::FnDef { label, cont },
Element::Keyword { key, value } => Event::Keyword { key, value },
Element::List { ordered, .. } => Event::ListBeg { ordered },
Element::Paragraph { .. } => Event::ParagraphBeg,
Element::QteBlock { .. } => Event::QteBlockBeg,
Element::Rule => Event::Rule,
Element::SplBlock { name, args, .. } => Event::SplBlockBeg { name, args },
Element::SrcBlock { args, cont } => Event::SrcBlock { args, cont },
Element::VerseBlock { args, cont } => Event::VerseBlock { args, cont },
Some(Element::List { ident, ordered }) => {
self.stack.push(Container::List { ident, ordered });
self.has_more_item = true;
Event::ListBeg { ordered }
}
} else {
self.off += off;
self.end()
Some(Element::Call { value }) => Event::Call { value },
Some(Element::Comment(c)) => Event::Comment(c),
Some(Element::CommentBlock { args, cont }) => Event::CommentBlock { args, cont },
Some(Element::ExampleBlock { args, cont }) => Event::ExampleBlock { args, cont },
Some(Element::ExportBlock { args, cont }) => Event::ExportBlock { args, cont },
Some(Element::FixedWidth(f)) => Event::FixedWidth(f),
Some(Element::FnDef { label, cont }) => Event::FnDef { label, cont },
Some(Element::Keyword { key, value }) => Event::Keyword { key, value },
Some(Element::Rule) => Event::Rule,
Some(Element::SrcBlock { args, cont }) => Event::SrcBlock { args, cont },
Some(Element::VerseBlock { args, cont }) => Event::VerseBlock { args, cont },
None => self.end(),
}
}
fn next_obj(&mut self, end: usize) -> Event<'a> {
fn next_obj(&mut self, text: &'a str) -> Event<'a> {
let (obj, off) = self.obj_buf.take().unwrap_or_else(|| {
let (obj, off, next_2) = Object::next_2(&self.text[self.off..end]);
self.obj_buf = next_2;
let (obj, off, next_obj) = Object::next_2(text);
self.obj_buf = next_obj;
(obj, off)
});
debug_assert!(self.off + off <= end);
debug_assert!(off <= text.len());
match obj {
Object::Underline { end } => self.stack.push(Container::Underline {
Object::Underline { end } => {
debug_assert!(end <= text.len());
self.stack.push(Container::Underline {
end: self.off + end,
}),
Object::Strike { end } => self.stack.push(Container::Strike {
});
}
Object::Strike { end } => {
debug_assert!(end <= text.len());
self.stack.push(Container::Strike {
end: self.off + end,
}),
Object::Italic { end } => self.stack.push(Container::Italic {
});
}
Object::Italic { end } => {
debug_assert!(end <= text.len());
self.stack.push(Container::Italic {
end: self.off + end,
}),
Object::Bold { end } => self.stack.push(Container::Bold {
});
}
Object::Bold { end } => {
debug_assert!(end <= text.len());
self.stack.push(Container::Bold {
end: self.off + end,
}),
});
}
_ => (),
}
@ -324,13 +318,16 @@ impl<'a> Parser<'a> {
}
}
fn next_list_item(&mut self, end: usize, ident: usize) -> Event<'a> {
let (beg, end) = List::parse_item(&self.text[self.off..end], ident);
fn next_list_item(&mut self, ident: usize) -> Event<'a> {
let (bullet, cont_beg, cont_end, end, has_more) =
List::parse(&self.text[self.off..], ident);
self.stack.push(Container::ListItem {
cont_end: self.off + cont_end,
end: self.off + end,
});
self.off += beg;
Event::ListItemBeg
self.off += cont_beg;
self.has_more_item = has_more;
Event::ListItemBeg { bullet }
}
fn end(&mut self) -> Event<'a> {
@ -378,58 +375,50 @@ impl<'a> Iterator for Parser<'a> {
Container::DynBlock { cont_end, end, .. }
| Container::CtrBlock { cont_end, end, .. }
| Container::QteBlock { cont_end, end, .. }
| Container::SplBlock { cont_end, end, .. } => {
| Container::SplBlock { cont_end, end, .. }
| Container::ListItem { cont_end, end } => {
let text = &self.text[self.off..cont_end];
if self.off >= cont_end {
self.off = end;
self.end()
} else {
self.next_ele(cont_end)
self.next_ele(text)
}
}
Container::List {
cont_end,
end,
ident,
..
} => {
if self.off >= cont_end {
self.off = end;
self.end()
Container::List { ident, .. } => {
if self.has_more_item {
self.next_list_item(ident)
} else {
self.next_list_item(cont_end, ident)
}
}
Container::ListItem { end } => {
if self.off >= end {
self.end()
} else {
self.next_ele(end)
}
}
Container::Section { end } => {
let text = &self.text[self.off..end];
if self.off >= end {
self.end()
} else {
self.next_ele(end)
self.next_ele(text)
}
}
Container::Paragraph { cont_end, end } => {
let text = &self.text[self.off..cont_end];
if self.off >= cont_end {
self.off = end;
self.end()
} else {
self.next_obj(cont_end)
self.next_obj(text)
}
}
Container::Bold { end }
| Container::Underline { end }
| Container::Italic { end }
| Container::Strike { end } => {
let text = &self.text[self.off..end];
if self.off >= end {
self.off += 1;
self.end()
} else {
self.next_obj(end)
self.next_obj(text)
}
}
})