refactor: simplify list parsing

This commit is contained in:
PoiScript 2019-02-02 23:42:31 +08:00
parent 763ec00434
commit d20d4c2880
9 changed files with 562 additions and 570 deletions

View file

@ -9,7 +9,9 @@ fn valid_label(ch: u8) -> bool {
impl FnDef { impl FnDef {
pub fn parse(src: &str) -> Option<(&str, &str, usize)> { pub fn parse(src: &str) -> Option<(&str, &str, usize)> {
starts_with!(src, "[fn:"); if cfg!(test) {
starts_with!(src, "[fn:");
}
let label = until_while!(src, 4, b']', valid_label)?; let label = until_while!(src, 4, b']', valid_label)?;

View file

@ -1,289 +1,157 @@
pub struct List; use lines::Lines;
macro_rules! ident { pub struct List;
($src:expr) => {
$src.as_bytes()
.iter()
.position(|&c| c != b' ' && c != b'\t')
.unwrap_or(0)
};
}
impl List { impl List {
#[inline] #[inline]
fn is_item(src: &str) -> bool { pub fn is_item(src: &str) -> (bool, bool) {
if src.len() < 2 { if src.is_empty() {
return false; return (false, false);
} }
let bytes = src.as_bytes(); let bytes = src.as_bytes();
let i = match bytes[0] { let (i, ordered) = match bytes[0] {
b'*' | b'-' | b'+' => 1, b'*' | b'-' | b'+' => (1, false),
b'0'...b'9' => { b'0'...b'9' => {
let i = bytes let i = bytes
.iter() .iter()
.position(|&c| !c.is_ascii_digit()) .position(|&c| !c.is_ascii_digit())
.unwrap_or_else(|| src.len()); .unwrap_or_else(|| src.len());
if i >= src.len() - 1 {
return false;
}
let c = bytes[i]; let c = bytes[i];
if !(c == b'.' || c == b')') { if !(c == b'.' || c == b')') {
return false; return (false, false);
} }
i + 1 (i + 1, true)
} }
_ => return false, _ => return (false, false),
}; };
// bullet is follwed by a space or line ending if i < src.len() {
bytes[i] == b' ' || bytes[i] == b'\n' // bullet is follwed by a space or line ending
} (bytes[i] == b' ' || bytes[i] == b'\n', ordered)
} else {
#[inline] (false, false)
pub fn is_ordered(byte: u8) -> bool {
match byte {
b'*' | b'-' | b'+' => false,
b'0'...b'9' => true,
_ => unreachable!(),
} }
} }
// returns (contents_begin, contents_end) // returns (bullets, contents begin, contents end, end, has more)
pub fn parse_item(src: &str, ident: usize) -> (usize, usize) { pub fn parse(src: &str, ident: usize) -> (&str, usize, usize, usize, bool) {
let beg = memchr::memchr(b' ', &src.as_bytes()[ident..]) debug_assert!(Self::is_item(&src[ident..]).0);
.map(|i| i + ident + 1) debug_assert!(
.unwrap(); src[..ident].chars().all(|c| c == ' ' || c == '\t'),
let mut lines = lines!(src); "{:?} doesn't starts with indentation {}",
// skip first line src,
let mut pos = lines.next().unwrap(); ident
for line_end in lines { );
let line = &src[pos..line_end];
if !line.trim().is_empty() && ident!(line) == ident { let mut lines = Lines::new(src);
break; let (mut pre_cont_end, mut pre_end, first_line) = lines.next().unwrap();
let beg = match memchr::memchr(b' ', &first_line.as_bytes()[ident..]) {
Some(i) => i + ident + 1,
None => {
let len = first_line.len();
return (
&first_line,
len,
len,
len,
Self::is_item(lines.next().unwrap().2).0,
);
} }
pos = line_end; };
} let bullet = &src[0..beg];
(beg, pos)
}
// return (ident, is_ordered, contents_end, end) while let Some((mut cont_end, mut end, mut line)) = lines.next() {
pub fn parse(src: &str) -> Option<(usize, bool, usize, usize)> { // this line is emtpy
let bytes = src.as_bytes(); if line.is_empty() {
let starting_ident = ident!(src); if let Some((next_cont_end, next_end, next_line)) = lines.next() {
// next line is emtpy, too
if !Self::is_item(&src[starting_ident..]) { if next_line.is_empty() {
return None; return (bullet, beg, pre_cont_end, next_end, false);
} } else {
// move to next line
let mut lines = lines!(src); pre_end = end;
// skip the starting line cont_end = next_cont_end;
let mut pos = lines.next().unwrap(); end = next_end;
let is_ordered = Self::is_ordered(bytes[starting_ident]); line = next_line;
}
Some(loop {
let mut curr_line = match lines.next() {
Some(i) => i,
None => break (starting_ident, is_ordered, pos, pos),
};
// current line is empty
if src[pos..curr_line].trim().is_empty() {
let next_line = match lines.next() {
Some(i) => i,
None => break (starting_ident, is_ordered, pos, pos),
};
// next line is emtpy, too
if src[curr_line..next_line].trim().is_empty() {
break (starting_ident, is_ordered, pos, next_line);
} else { } else {
// move to next line return (bullet, beg, pre_cont_end, end, false);
pos = curr_line;
curr_line = next_line;
} }
} }
let ident = ident!(src[pos..curr_line]); let line_ident = Self::ident(line);
// less indented than the starting line if line_ident < ident {
if ident < starting_ident { return (bullet, beg, pre_cont_end, pre_end, false);
break (starting_ident, is_ordered, pos, pos); } else if line_ident == ident {
return (
bullet,
beg,
pre_cont_end,
pre_end,
Self::is_item(&line[ident..]).0,
);
} }
if ident > starting_ident || Self::is_item(&src[pos + ident..]) { pre_end = end;
pos = curr_line; pre_cont_end = cont_end;
} else { }
break (starting_ident, is_ordered, pos, pos);
} (bullet, beg, src.len(), src.len(), false)
})
} }
fn ident(src: &str) -> usize {
src.as_bytes()
.iter()
.position(|&c| c != b' ' && c != b'\t')
.unwrap_or(0)
}
}
#[test]
fn is_item() {
assert_eq!(List::is_item("+ item"), (true, false));
assert_eq!(List::is_item("- item"), (true, false));
assert_eq!(List::is_item("10. item"), (true, true));
assert_eq!(List::is_item("10) item"), (true, true));
assert_eq!(List::is_item("1. item"), (true, true));
assert_eq!(List::is_item("1) item"), (true, true));
assert_eq!(List::is_item("10. "), (true, true));
assert_eq!(List::is_item("10.\n"), (true, true));
assert_eq!(List::is_item("10."), (false, false));
assert_eq!(List::is_item("+"), (false, false));
assert_eq!(List::is_item("-item"), (false, false));
assert_eq!(List::is_item("+item"), (false, false));
} }
#[test] #[test]
fn parse() { fn parse() {
assert_eq!( assert_eq!(
List::parse( List::parse("+ item1\n+ item2\n+ item3", 0),
r"+ item1 ("+ ", 2, 7, 8, true)
+ item2
+ item3"
),
Some((0, false, 23, 23))
); );
assert_eq!( assert_eq!(
List::parse( List::parse("* item1\n\n* item2\n* item3", 0),
r"* item1 ("* ", 2, 7, 9, true)
* item2
* item3"
),
Some((0, false, 24, 24))
); );
assert_eq!( assert_eq!(
List::parse( List::parse("- item1\n\n\n- item2\n- item3", 0),
r"- item1 ("- ", 2, 7, 10, false)
- item2
- item1"
),
Some((0, false, 16, 18))
); );
assert_eq!( assert_eq!(
List::parse( List::parse("1. item1\n\n\n\n2. item2\n3. item3", 0),
r"1. item1 ("1. ", 3, 8, 11, false)
2. item1
3. item2"
),
Some((0, true, 28, 28))
); );
assert_eq!( assert_eq!(
List::parse( List::parse(" + item1\n + item2\n+ item3", 2),
r" 1) item1 (" + ", 4, 21, 22, false)
2) item1
3) item2"
),
Some((2, true, 11, 11))
); );
assert_eq!( assert_eq!(
List::parse( List::parse(" + item1\n + item2\n + item3", 2),
r" + item1 (" + ", 4, 9, 10, true)
1) item1
+ item2"
),
Some((2, false, 32, 32))
);
assert_eq!(
List::parse(
r" item1
+ item1
+ item2"
),
None
);
assert_eq!(
List::parse(
r#"- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Nulla et dolor vitae elit placerat sagittis. Aliquam a lobortis massa,
aliquam efficitur arcu.
- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Phasellus auctor lacus a orci imperdiet, ut facilisis neque lobortis.
- Proin condimentum id orci vitae lobortis. Nunc sollicitudin risus neque,
dapibus malesuada sem faucibus vitae.
- Sed vitae dolor augue. Phasellus at rhoncus arcu. Suspendisse potenti.
- Nulla faucibus, metus ut porta hendrerit, urna lorem porta metus, in tempus
nibh orci sed sapien.
- Morbi tortor mi, dapibus vel faucibus a, iaculis sed turpis."#
),
Some((0, false, 666, 666))
);
}
#[test]
fn is_item() {
assert!(List::is_item("+ item"));
assert!(List::is_item("- item"));
assert!(List::is_item("10. item"));
assert!(List::is_item("10) item"));
assert!(List::is_item("1. item"));
assert!(List::is_item("1) item"));
assert!(List::is_item("10. "));
assert!(List::is_item("10.\n"));
assert!(!List::is_item("10."));
assert!(!List::is_item("-item"));
assert!(!List::is_item("+item"));
}
#[test]
fn parse_item() {
assert_eq!(List::parse_item("+ Item1\n+ Item2", 0), (2, 8));
assert_eq!(List::parse_item("+ Item1\n\n+ Item2", 0), (2, 9));
assert_eq!(
List::parse_item(
r"+ item1
+ item1
+ item2",
0
),
(2, 25)
);
assert_eq!(
List::parse_item(
r" 1. item1
+ item2",
2
),
(5, 11)
);
assert_eq!(
List::parse_item(
r"+ It
em1
+ Item2",
0
),
(2, 11)
);
assert_eq!(
List::parse_item(
r#"1) Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec sit amet
ullamcorper ante, nec pellentesque nisi.
2) Sed pulvinar ut arcu id aliquam.Curabitur quis justo eu magna maximus sodales.
Curabitur nisl nisi, ornare in enim id, sagittis facilisis magna.
3) Curabitur venenatis molestie eros sit amet congue. Nunc at molestie leo, vitae
malesuada nisi."#,
0
),
(3, 119)
);
assert_eq!(
List::parse_item(
r#"- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Nulla et dolor vitae elit placerat sagittis. Aliquam a lobortis massa,
aliquam efficitur arcu.
- Lorem ipsum dolor sit amet, consectetur adipiscing elit.
- Phasellus auctor lacus a orci imperdiet, ut facilisis neque lobortis.
- Proin condimentum id orci vitae lobortis. Nunc sollicitudin risus neque,
dapibus malesuada sem faucibus vitae.
- Sed vitae dolor augue. Phasellus at rhoncus arcu. Suspendisse potenti.
- Nulla faucibus, metus ut porta hendrerit, urna lorem porta metus, in tempus
nibh orci sed sapien.
- Morbi tortor mi, dapibus vel faucibus a, iaculis sed turpis."#,
0
),
(2, 421)
); );
assert_eq!(List::parse("+\n", 0), ("+", 1, 1, 1, false));
assert_eq!(List::parse("+\n+ item2\n+ item3", 0), ("+", 1, 1, 1, true));
assert_eq!(List::parse("1) item1", 0), ("1) ", 3, 8, 8, false));
assert_eq!(List::parse("1) item1\n", 0), ("1) ", 3, 8, 9, false));
} }

View file

@ -12,7 +12,11 @@ pub use self::keyword::{Key, Keyword};
pub use self::list::List; pub use self::list::List;
pub use self::rule::Rule; pub use self::rule::Rule;
#[cfg_attr(test, derive(PartialEq, Debug))] use memchr::memchr;
use memchr::memchr_iter;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub enum Element<'a> { pub enum Element<'a> {
Paragraph { Paragraph {
cont_end: usize, cont_end: usize,
@ -77,217 +81,195 @@ pub enum Element<'a> {
List { List {
ident: usize, ident: usize,
ordered: bool, ordered: bool,
cont_end: usize,
end: usize,
}, },
} }
impl<'a> Element<'a> { impl<'a> Element<'a> {
pub fn next_2(src: &'a str) -> (usize, Option<Element<'a>>, Option<(Element<'a>, usize)>) { // return (element, off, next element, next offset)
let bytes = src.as_bytes(); // the end of first element is relative to the offset
// next offset is relative to the end of the first element
let mut pos = skip_empty_line!(src, 0); pub fn next_2(src: &'a str) -> (Option<Element<'a>>, usize, Option<(Element<'a>, usize)>) {
// skip empty lines
let mut pos = match src.chars().position(|c| c != '\n') {
Some(pos) => pos,
None => return (None, src.len(), None),
};
let start = pos; let start = pos;
let bytes = src.as_bytes();
if start == src.len() { let mut line_ends = memchr_iter(b'\n', &bytes[start..]).map(|i| i + start);
return (start, None, None);
}
loop { loop {
// Unlike other element, footnote definition must starts at column 0 let line_beg = pos;
if bytes[pos] == b'[' {
if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) { macro_rules! brk {
break if pos == start { ($ele:expr, $off:expr) => {
(off + 1, Some(Element::FnDef { label, cont }), None) break if line_beg == 0 || pos == start {
(Some($ele), start + $off, None)
} else { } else {
( (
start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: pos - 1, cont_end: line_beg - start - 1,
end: pos, end: line_beg - start,
}), }),
Some((Element::FnDef { label, cont }, off + 1)), start,
Some(($ele, $off)),
) )
}; };
};
}
// Unlike other element, footnote definition must starts at column 0
if bytes[pos..].starts_with(b"[fn:") {
if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) {
brk!(Element::FnDef { label, cont }, off + 1);
} }
} }
let end = pos; // FIXME:
if bytes[pos] == b'\n' {
break (
Some(Element::Paragraph {
cont_end: pos - start - 1,
end: pos - start + 1,
}),
start,
None,
);
}
pos = skip_space!(src, pos); pos = skip_space!(src, pos);
if pos <= src.len() { let (is_item, ordered) = List::is_item(&src[pos..]);
macro_rules! brk { if is_item {
($ele:expr, $off:expr) => { let list = Element::List {
break if pos == start { ident: pos - line_beg,
($off, Some($ele), None) ordered,
} else { };
( break if line_beg == start {
start, (Some(list), start, None)
Some(Element::Paragraph { } else {
cont_end: end, (
end: pos - 1,
}),
Some(($ele, $off)),
)
};
};
}
if bytes[pos] == b'+'
|| bytes[pos] == b'-'
|| bytes[pos] == b'*'
|| (bytes[pos] >= b'0' && bytes[pos] <= b'9')
{
if let Some((ident, ordered, cont_end, list_end)) = List::parse(&src[end..]) {
let list = Element::List {
ident,
ordered,
cont_end,
end: list_end,
};
break if pos == start {
(1, Some(list), None)
} else {
(
start,
Some(Element::Paragraph {
cont_end: end,
end: end,
}),
Some((list, 1)),
)
};
}
}
if bytes[pos] == b'\n' {
break (
start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: end, cont_end: line_beg - start - 1,
end: pos, end: line_beg - start,
}), }),
None, start,
); Some((list, 1)),
)
};
}
// TODO: LaTeX environment
if bytes[pos..].starts_with(b"\\begin{") {}
// Rule
if bytes[pos] == b'-' {
let off = Rule::parse(&src[pos..]);
if off != 0 {
brk!(Element::Rule, off);
} }
}
// TODO: LaTeX environment // TODO: multiple lines fixed width area
if bytes[pos] == b'\\' {} if bytes[pos..].starts_with(b": ") || bytes[pos..].starts_with(b":\n") {
let eol = memchr(b'\n', &bytes[pos..])
.map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos);
brk!(Element::FixedWidth(&src[pos + 1..pos + eol].trim()), eol);
}
// Rule if bytes[pos..].starts_with(b"#+") {
if bytes[pos] == b'-' { if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) {
let off = Rule::parse(&src[pos..]); let cont = &src[pos + cont_beg + 1..pos + cont_end - 1];
if off != 0 { match name.to_uppercase().as_str() {
brk!(Element::Rule, off); "COMMENT" => brk!(Element::CommentBlock { args, cont }, end),
} "EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end),
} "EXPORT" => brk!(Element::ExportBlock { args, cont }, end),
"SRC" => brk!(Element::SrcBlock { args, cont }, end),
// TODO: multiple lines fixed width area "VERSE" => brk!(Element::VerseBlock { args, cont }, end),
if bytes[pos] == b':' "CENTER" => brk!(
&& bytes Element::CtrBlock {
.get(pos + 1)
.map(|&b| b == b' ' || b == b'\n')
.unwrap_or(false)
{
let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..])
.map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos);
brk!(Element::FixedWidth(&src[pos + 1..pos + eol]), eol);
}
if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b'+').unwrap_or(false) {
if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) {
let cont = &src[pos + cont_beg + 1..pos + cont_end - 1];
match name.to_uppercase().as_str() {
"COMMENT" => brk!(Element::CommentBlock { args, cont }, end),
"EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end),
"EXPORT" => brk!(Element::ExportBlock { args, cont }, end),
"SRC" => brk!(Element::SrcBlock { args, cont }, end),
"VERSE" => brk!(Element::VerseBlock { args, cont }, end),
"CENTER" => brk!(
Element::CtrBlock {
args,
cont_end,
end,
},
cont_beg
),
"QUOTE" => brk!(
Element::QteBlock {
args,
cont_end,
end,
},
cont_beg
),
_ => brk!(
Element::SplBlock {
name,
args,
cont_end,
end
},
cont_beg
),
};
}
if let Some((name, args, cont_beg, cont_end, end)) =
DynBlock::parse(&src[pos..])
{
brk!(
Element::DynBlock {
name,
args, args,
cont_end, cont_end,
end, end,
}, },
cont_beg cont_beg
) ),
} "QUOTE" => brk!(
Element::QteBlock {
if let Some((key, value, off)) = Keyword::parse(&src[pos..]) { args,
brk!( cont_end,
if let Key::Call = key { end,
Element::Call { value }
} else {
Element::Keyword { key, value }
}, },
off cont_beg
) ),
} _ => brk!(
Element::SplBlock {
name,
args,
cont_end,
end
},
cont_beg
),
};
} }
// Comment if let Some((name, args, cont_beg, cont_end, end)) = DynBlock::parse(&src[pos..]) {
// TODO: multiple lines comment brk!(
if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b' ').unwrap_or(false) { Element::DynBlock {
let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..]) name,
.map(|i| i + 1) args,
.unwrap_or_else(|| src.len() - pos); cont_end,
brk!(Element::Comment(&src[pos + 1..pos + eol]), eol); end,
},
cont_beg
)
}
if let Some((key, value, off)) = Keyword::parse(&src[pos..]) {
brk!(
if let Key::Call = key {
Element::Call { value }
} else {
Element::Keyword { key, value }
},
off
)
} }
} }
if let Some(off) = memchr::memchr(b'\n', &src.as_bytes()[pos..]) { // Comment
pos += off + 1; // TODO: multiple lines comment
// last char if bytes[pos..].starts_with(b"# ") || bytes[pos..].starts_with(b"#\n") {
if pos == src.len() { let eol = memchr(b'\n', &bytes[pos..])
.map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos);
brk!(Element::Comment(&src[pos + 1..pos + eol].trim()), eol);
}
// move to the beginning of the next line
if let Some(off) = line_ends.next() {
pos = off + 1;
// the last character
if pos >= src.len() {
break ( break (
start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: pos - 1, cont_end: src.len() - start - 1,
end: pos, end: src.len() - start,
}), }),
start,
None, None,
); );
} }
} else { } else {
break ( break (
start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: src.len(), cont_end: src.len() - start,
end: src.len(), end: src.len() - start,
}), }),
start,
None, None,
); );
} }
@ -297,6 +279,102 @@ impl<'a> Element<'a> {
#[test] #[test]
fn next_2() { fn next_2() {
use self::Element::*;
assert_eq!(Element::next_2("\n\n\n"), (None, 3, None));
let len = "Lorem ipsum dolor sit amet.".len();
assert_eq!(
Element::next_2("\nLorem ipsum dolor sit amet.\n\n\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 2,
}),
1,
None
)
);
assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 2,
}),
2,
None
)
);
assert_eq!(
Element::next_2("\nLorem ipsum dolor sit amet.\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 1,
}),
1,
None
)
);
assert_eq!(
Element::next_2("\n\n\nLorem ipsum dolor sit amet."),
(
Some(Paragraph {
cont_end: len,
end: len,
}),
3,
None
)
);
assert_eq!(
Element::next_2("\n\n\n: Lorem ipsum dolor sit amet.\n"),
(
Some(FixedWidth("Lorem ipsum dolor sit amet.")),
"\n\n\n: Lorem ipsum dolor sit amet.\n".len(),
None
)
);
assert_eq!(
Element::next_2("\n\n\n: Lorem ipsum dolor sit amet."),
(
Some(FixedWidth("Lorem ipsum dolor sit amet.")),
"\n\n\n: Lorem ipsum dolor sit amet.".len(),
None
)
);
assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n: Lorem ipsum dolor sit amet.\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 1,
}),
2,
Some((FixedWidth("Lorem ipsum dolor sit amet."), 30))
)
);
assert_eq!(
Element::next_2("\n\nLorem ipsum dolor sit amet.\n+ Lorem ipsum dolor sit amet.\n"),
(
Some(Paragraph {
cont_end: len,
end: len + 1,
}),
2,
Some((
List {
ident: 0,
ordered: false,
},
1
))
)
);
// TODO: more tests // TODO: more tests
assert_eq!(Element::next_2("\n\n\n\n"), (4, None, None));
} }

View file

@ -73,7 +73,7 @@ impl<W: Write> Handler<W> for HtmlHandler {
fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()> { fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()> {
write!(w, "{}", if ordered { "</ol>" } else { "</ul>" }) write!(w, "{}", if ordered { "</ol>" } else { "</ul>" })
} }
fn handle_list_beg_item(&mut self, w: &mut W) -> Result<()> { fn handle_list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<()> {
write!(w, "<li>") write!(w, "<li>")
} }
fn handle_list_end_item(&mut self, w: &mut W) -> Result<()> { fn handle_list_end_item(&mut self, w: &mut W) -> Result<()> {

View file

@ -30,7 +30,7 @@ pub trait Handler<W: Write> {
fn handle_dyn_block_end(&mut self, w: &mut W) -> Result<()>; fn handle_dyn_block_end(&mut self, w: &mut W) -> Result<()>;
fn handle_list_beg(&mut self, w: &mut W, ordered: bool) -> Result<()>; fn handle_list_beg(&mut self, w: &mut W, ordered: bool) -> Result<()>;
fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()>; fn handle_list_end(&mut self, w: &mut W, ordered: bool) -> Result<()>;
fn handle_list_beg_item(&mut self, w: &mut W) -> Result<()>; fn handle_list_beg_item(&mut self, w: &mut W, bullet: &str) -> Result<()>;
fn handle_list_end_item(&mut self, w: &mut W) -> Result<()>; fn handle_list_end_item(&mut self, w: &mut W) -> Result<()>;
fn handle_call(&mut self, w: &mut W, value: &str) -> Result<()>; fn handle_call(&mut self, w: &mut W, value: &str) -> Result<()>;
fn handle_clock(&mut self, w: &mut W) -> Result<()>; fn handle_clock(&mut self, w: &mut W) -> Result<()>;
@ -113,7 +113,7 @@ impl<'a, W: Write, H: Handler<W>> Render<'a, W, H> {
DynBlockEnd => h.handle_dyn_block_end(w)?, DynBlockEnd => h.handle_dyn_block_end(w)?,
ListBeg { ordered } => h.handle_list_beg(w, ordered)?, ListBeg { ordered } => h.handle_list_beg(w, ordered)?,
ListEnd { ordered } => h.handle_list_end(w, ordered)?, ListEnd { ordered } => h.handle_list_end(w, ordered)?,
ListItemBeg => h.handle_list_beg_item(w)?, ListItemBeg { bullet } => h.handle_list_beg_item(w, bullet)?,
ListItemEnd => h.handle_list_end_item(w)?, ListItemEnd => h.handle_list_end_item(w)?,
Call { value } => h.handle_call(w, value)?, Call { value } => h.handle_call(w, value)?,
Clock => h.handle_clock(w)?, Clock => h.handle_clock(w)?,

View file

@ -8,6 +8,7 @@ mod utils;
mod elements; mod elements;
mod export; mod export;
mod headline; mod headline;
mod lines;
mod objects; mod objects;
mod parser; mod parser;

54
src/lines.rs Normal file
View file

@ -0,0 +1,54 @@
use memchr::{memchr_iter, Memchr};
use std::iter::{once, Chain, Once};
pub struct Lines<'a> {
src: &'a str,
iter: Chain<Memchr<'a>, Once<usize>>,
start: usize,
pre_cont_end: usize,
}
impl<'a> Lines<'a> {
pub fn new(src: &'a str) -> Lines<'a> {
Lines {
src,
iter: memchr_iter(b'\n', &src.as_bytes()).chain(once(src.len())),
start: 0,
pre_cont_end: 0,
}
}
}
impl<'a> Iterator for Lines<'a> {
type Item = (usize, usize, &'a str);
#[inline]
fn next(&mut self) -> Option<(usize, usize, &'a str)> {
self.iter.next().map(|i| {
let (line, cont_end) = if i != self.src.len() && self.src.as_bytes()[i - 1] == b'\r' {
(&self.src[self.start..i - 1], i - 1)
} else {
(&self.src[self.start..i], i)
};
self.start = if i != self.src.len() { i + 1 } else { i };
(cont_end, self.start, line)
})
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
#[test]
fn lines() {
let mut lines = Lines::new("foo\r\nbar\n\nbaz\n");
assert_eq!(Some((3, 5, "foo")), lines.next());
assert_eq!(Some((8, 9, "bar")), lines.next());
assert_eq!(Some((9, 10, "")), lines.next());
assert_eq!(Some((13, 14, "baz")), lines.next());
assert_eq!(Some((14, 14, "")), lines.next());
assert_eq!(None, lines.next());
}

View file

@ -51,13 +51,13 @@ impl<'a> Object<'a> {
return (Object::Text(src), src.len(), None); return (Object::Text(src), src.len(), None);
} }
let chars = ascii_chars!('@', ' ', '"', '(', '\n', '{', '<', '['); let bs = bytes!(b'@', b' ', b'"', b'(', b'\n', b'{', b'<', b'[');
let mut pos = 0; let mut pos = 0;
loop { loop {
macro_rules! brk { macro_rules! brk {
($obj:expr, $off:expr, $pos:expr) => { ($obj:expr, $off:expr, $pos:expr) => {
break if pos == 0 { break if $pos == 0 {
($obj, $off, None) ($obj, $off, None)
} else { } else {
(Object::Text(&src[0..$pos]), $pos, Some(($obj, $off))) (Object::Text(&src[0..$pos]), $pos, Some(($obj, $off)))
@ -159,8 +159,8 @@ impl<'a> Object<'a> {
_ => (), _ => (),
} }
if let Some(off) = chars if let Some(off) = bs
.find(&src[pos + 1..]) .find(&bytes[pos + 1..])
.map(|i| i + pos + 1) .map(|i| i + pos + 1)
.filter(|&i| i < src.len() - 2) .filter(|&i| i < src.len() - 2)
{ {

View file

@ -5,54 +5,19 @@ use objects::*;
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub enum Container { pub enum Container {
Headline { Headline { beg: usize, end: usize },
beg: usize, Section { end: usize },
end: usize, Paragraph { cont_end: usize, end: usize },
}, CtrBlock { cont_end: usize, end: usize },
Section { QteBlock { cont_end: usize, end: usize },
end: usize, SplBlock { cont_end: usize, end: usize },
}, DynBlock { cont_end: usize, end: usize },
Paragraph { List { ident: usize, ordered: bool },
cont_end: usize, ListItem { cont_end: usize, end: usize },
end: usize, Italic { end: usize },
}, Strike { end: usize },
CtrBlock { Bold { end: usize },
cont_end: usize, Underline { end: usize },
end: usize,
},
QteBlock {
cont_end: usize,
end: usize,
},
SplBlock {
cont_end: usize,
end: usize,
},
DynBlock {
cont_end: usize,
end: usize,
},
List {
ident: usize,
ordered: bool,
cont_end: usize,
end: usize,
},
ListItem {
end: usize,
},
Italic {
end: usize,
},
Strike {
end: usize,
},
Bold {
end: usize,
},
Underline {
end: usize,
},
} }
#[cfg_attr(test, derive(PartialEq))] #[cfg_attr(test, derive(PartialEq))]
@ -109,7 +74,9 @@ pub enum Event<'a> {
ListEnd { ListEnd {
ordered: bool, ordered: bool,
}, },
ListItemBeg, ListItemBeg {
bullet: &'a str,
},
ListItemEnd, ListItemEnd,
Call { Call {
@ -166,6 +133,7 @@ pub struct Parser<'a> {
off: usize, off: usize,
ele_buf: Option<(Element<'a>, usize)>, ele_buf: Option<(Element<'a>, usize)>,
obj_buf: Option<(Object<'a>, usize)>, obj_buf: Option<(Object<'a>, usize)>,
has_more_item: bool,
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
@ -176,11 +144,13 @@ impl<'a> Parser<'a> {
off: 0, off: 0,
ele_buf: None, ele_buf: None,
obj_buf: None, obj_buf: None,
has_more_item: false,
} }
} }
fn next_sec_or_hdl(&mut self) -> Event<'a> { fn next_sec_or_hdl(&mut self) -> Event<'a> {
let end = Headline::find_level(&self.text[self.off..], std::usize::MAX); let end = Headline::find_level(&self.text[self.off..], std::usize::MAX);
debug_assert!(end <= self.text.len());
if end != 0 { if end != 0 {
self.stack.push(Container::Section { self.stack.push(Container::Section {
end: self.off + end, end: self.off + end,
@ -192,7 +162,9 @@ impl<'a> Parser<'a> {
} }
fn next_hdl(&mut self) -> Event<'a> { fn next_hdl(&mut self) -> Event<'a> {
let (hdl, off, end) = Headline::parse(&self.text[self.off..]); let tail = &self.text[self.off..];
let (hdl, off, end) = Headline::parse(tail);
debug_assert!(end <= self.text.len());
self.stack.push(Container::Headline { self.stack.push(Container::Headline {
beg: self.off + off, beg: self.off + off,
end: self.off + end, end: self.off + end,
@ -201,104 +173,126 @@ impl<'a> Parser<'a> {
Event::HeadlineBeg(hdl) Event::HeadlineBeg(hdl)
} }
fn next_ele(&mut self, end: usize) -> Event<'a> { fn next_ele(&mut self, text: &'a str) -> Event<'a> {
let (ele, off) = self let (ele, off) = self
.ele_buf .ele_buf
.take() .take()
.map(|(ele, off)| (Some(ele), off)) .map(|(ele, off)| (Some(ele), off))
.unwrap_or_else(|| { .unwrap_or_else(|| {
let (off, ele, next_2) = Element::next_2(&self.text[self.off..end]); let (ele, off, next_ele) = Element::next_2(text);
self.ele_buf = next_2; self.ele_buf = next_ele;
(ele, off) (ele, off)
}); });
debug_assert!(self.off + off <= end); debug_assert!(off <= text.len());
if let Some(ele) = ele { self.off += off;
match ele {
Element::Paragraph { cont_end, end } => self.stack.push(Container::Paragraph { match ele {
Some(Element::Paragraph { cont_end, end }) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::Paragraph {
cont_end: cont_end + self.off, cont_end: cont_end + self.off,
end: end + self.off, end: end + self.off,
}), });
Element::QteBlock { end, cont_end, .. } => self.stack.push(Container::QteBlock { Event::ParagraphBeg
cont_end: cont_end + self.off,
end: end + self.off,
}),
Element::CtrBlock { end, cont_end, .. } => self.stack.push(Container::CtrBlock {
cont_end: cont_end + self.off,
end: end + self.off,
}),
Element::SplBlock { end, cont_end, .. } => self.stack.push(Container::SplBlock {
cont_end: cont_end + self.off,
end: end + self.off,
}),
Element::DynBlock { end, cont_end, .. } => self.stack.push(Container::DynBlock {
cont_end: cont_end + self.off,
end: end + self.off,
}),
Element::List {
ident,
ordered,
cont_end,
end,
} => self.stack.push(Container::List {
ident,
ordered,
cont_end: cont_end + self.off,
end: end + self.off,
}),
_ => (),
} }
Some(Element::QteBlock { end, cont_end, .. }) => {
self.off += off; debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::QteBlock {
match ele { cont_end: cont_end + self.off,
Element::Call { value } => Event::Call { value }, end: end + self.off,
Element::Comment(c) => Event::Comment(c), });
Element::CommentBlock { args, cont } => Event::CommentBlock { args, cont }, Event::QteBlockBeg
Element::CtrBlock { .. } => Event::CtrBlockBeg,
Element::DynBlock { name, args, .. } => Event::DynBlockBeg { name, args },
Element::ExampleBlock { args, cont } => Event::ExampleBlock { args, cont },
Element::ExportBlock { args, cont } => Event::ExportBlock { args, cont },
Element::FixedWidth(f) => Event::FixedWidth(f),
Element::FnDef { label, cont } => Event::FnDef { label, cont },
Element::Keyword { key, value } => Event::Keyword { key, value },
Element::List { ordered, .. } => Event::ListBeg { ordered },
Element::Paragraph { .. } => Event::ParagraphBeg,
Element::QteBlock { .. } => Event::QteBlockBeg,
Element::Rule => Event::Rule,
Element::SplBlock { name, args, .. } => Event::SplBlockBeg { name, args },
Element::SrcBlock { args, cont } => Event::SrcBlock { args, cont },
Element::VerseBlock { args, cont } => Event::VerseBlock { args, cont },
} }
} else { Some(Element::CtrBlock { end, cont_end, .. }) => {
self.off += off; debug_assert!(cont_end <= text.len() && end <= text.len());
self.end() self.stack.push(Container::CtrBlock {
cont_end: cont_end + self.off,
end: end + self.off,
});
Event::CtrBlockBeg
}
Some(Element::SplBlock {
name,
args,
end,
cont_end,
}) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::SplBlock {
cont_end: cont_end + self.off,
end: end + self.off,
});
Event::SplBlockBeg { name, args }
}
Some(Element::DynBlock {
name,
args,
cont_end,
end,
}) => {
debug_assert!(cont_end <= text.len() && end <= text.len());
self.stack.push(Container::DynBlock {
cont_end: cont_end + self.off,
end: end + self.off,
});
Event::DynBlockBeg { name, args }
}
Some(Element::List { ident, ordered }) => {
self.stack.push(Container::List { ident, ordered });
self.has_more_item = true;
Event::ListBeg { ordered }
}
Some(Element::Call { value }) => Event::Call { value },
Some(Element::Comment(c)) => Event::Comment(c),
Some(Element::CommentBlock { args, cont }) => Event::CommentBlock { args, cont },
Some(Element::ExampleBlock { args, cont }) => Event::ExampleBlock { args, cont },
Some(Element::ExportBlock { args, cont }) => Event::ExportBlock { args, cont },
Some(Element::FixedWidth(f)) => Event::FixedWidth(f),
Some(Element::FnDef { label, cont }) => Event::FnDef { label, cont },
Some(Element::Keyword { key, value }) => Event::Keyword { key, value },
Some(Element::Rule) => Event::Rule,
Some(Element::SrcBlock { args, cont }) => Event::SrcBlock { args, cont },
Some(Element::VerseBlock { args, cont }) => Event::VerseBlock { args, cont },
None => self.end(),
} }
} }
fn next_obj(&mut self, end: usize) -> Event<'a> { fn next_obj(&mut self, text: &'a str) -> Event<'a> {
let (obj, off) = self.obj_buf.take().unwrap_or_else(|| { let (obj, off) = self.obj_buf.take().unwrap_or_else(|| {
let (obj, off, next_2) = Object::next_2(&self.text[self.off..end]); let (obj, off, next_obj) = Object::next_2(text);
self.obj_buf = next_2; self.obj_buf = next_obj;
(obj, off) (obj, off)
}); });
debug_assert!(self.off + off <= end); debug_assert!(off <= text.len());
match obj { match obj {
Object::Underline { end } => self.stack.push(Container::Underline { Object::Underline { end } => {
end: self.off + end, debug_assert!(end <= text.len());
}), self.stack.push(Container::Underline {
Object::Strike { end } => self.stack.push(Container::Strike { end: self.off + end,
end: self.off + end, });
}), }
Object::Italic { end } => self.stack.push(Container::Italic { Object::Strike { end } => {
end: self.off + end, debug_assert!(end <= text.len());
}), self.stack.push(Container::Strike {
Object::Bold { end } => self.stack.push(Container::Bold { end: self.off + end,
end: self.off + end, });
}), }
Object::Italic { end } => {
debug_assert!(end <= text.len());
self.stack.push(Container::Italic {
end: self.off + end,
});
}
Object::Bold { end } => {
debug_assert!(end <= text.len());
self.stack.push(Container::Bold {
end: self.off + end,
});
}
_ => (), _ => (),
} }
@ -324,13 +318,16 @@ impl<'a> Parser<'a> {
} }
} }
fn next_list_item(&mut self, end: usize, ident: usize) -> Event<'a> { fn next_list_item(&mut self, ident: usize) -> Event<'a> {
let (beg, end) = List::parse_item(&self.text[self.off..end], ident); let (bullet, cont_beg, cont_end, end, has_more) =
List::parse(&self.text[self.off..], ident);
self.stack.push(Container::ListItem { self.stack.push(Container::ListItem {
cont_end: self.off + cont_end,
end: self.off + end, end: self.off + end,
}); });
self.off += beg; self.off += cont_beg;
Event::ListItemBeg self.has_more_item = has_more;
Event::ListItemBeg { bullet }
} }
fn end(&mut self) -> Event<'a> { fn end(&mut self) -> Event<'a> {
@ -378,58 +375,50 @@ impl<'a> Iterator for Parser<'a> {
Container::DynBlock { cont_end, end, .. } Container::DynBlock { cont_end, end, .. }
| Container::CtrBlock { cont_end, end, .. } | Container::CtrBlock { cont_end, end, .. }
| Container::QteBlock { cont_end, end, .. } | Container::QteBlock { cont_end, end, .. }
| Container::SplBlock { cont_end, end, .. } => { | Container::SplBlock { cont_end, end, .. }
| Container::ListItem { cont_end, end } => {
let text = &self.text[self.off..cont_end];
if self.off >= cont_end { if self.off >= cont_end {
self.off = end; self.off = end;
self.end() self.end()
} else { } else {
self.next_ele(cont_end) self.next_ele(text)
} }
} }
Container::List { Container::List { ident, .. } => {
cont_end, if self.has_more_item {
end, self.next_list_item(ident)
ident,
..
} => {
if self.off >= cont_end {
self.off = end;
self.end()
} else { } else {
self.next_list_item(cont_end, ident)
}
}
Container::ListItem { end } => {
if self.off >= end {
self.end() self.end()
} else {
self.next_ele(end)
} }
} }
Container::Section { end } => { Container::Section { end } => {
let text = &self.text[self.off..end];
if self.off >= end { if self.off >= end {
self.end() self.end()
} else { } else {
self.next_ele(end) self.next_ele(text)
} }
} }
Container::Paragraph { cont_end, end } => { Container::Paragraph { cont_end, end } => {
let text = &self.text[self.off..cont_end];
if self.off >= cont_end { if self.off >= cont_end {
self.off = end; self.off = end;
self.end() self.end()
} else { } else {
self.next_obj(cont_end) self.next_obj(text)
} }
} }
Container::Bold { end } Container::Bold { end }
| Container::Underline { end } | Container::Underline { end }
| Container::Italic { end } | Container::Italic { end }
| Container::Strike { end } => { | Container::Strike { end } => {
let text = &self.text[self.off..end];
if self.off >= end { if self.off >= end {
self.off += 1; self.off += 1;
self.end() self.end()
} else { } else {
self.next_obj(end) self.next_obj(text)
} }
} }
}) })