From ed762a8dd4f76192089341bf9df70027bf14764e Mon Sep 17 00:00:00 2001 From: PoiScript Date: Sun, 20 Jan 2019 19:04:12 +0800 Subject: [PATCH] feat: lines macros --- Cargo.toml | 1 + src/elements/block.rs | 8 +-- src/elements/dyn_block.rs | 8 +-- src/elements/list.rs | 117 ++++++++++++++++++++++---------------- src/elements/mod.rs | 8 ++- src/elements/rule.rs | 6 +- src/lib.rs | 1 + src/parser.rs | 18 ++++-- src/utils.rs | 13 ++++- 9 files changed, 105 insertions(+), 75 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 149608e..29a2fe8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,4 @@ authors = ["PoiScript "] [dependencies] jetscii = "0.4.3" +memchr = "2" diff --git a/src/elements/block.rs b/src/elements/block.rs index 48cb961..e33c421 100644 --- a/src/elements/block.rs +++ b/src/elements/block.rs @@ -15,13 +15,7 @@ impl Block { let mut pos = 0; let end = format!(r"#+END_{}", &src[8..name]); - while let Some(line_end) = src[pos..].find('\n').map(|i| i + pos + 1).or_else(|| { - if pos < src.len() { - Some(src.len()) - } else { - None - } - }) { + for line_end in lines!(src) { if src[pos..line_end].trim().eq_ignore_ascii_case(&end) { return Some(( &src[8..name], diff --git a/src/elements/dyn_block.rs b/src/elements/dyn_block.rs index 763f242..804be27 100644 --- a/src/elements/dyn_block.rs +++ b/src/elements/dyn_block.rs @@ -14,13 +14,7 @@ impl DynBlock { .is_ascii_alphabetic())?; let mut pos = 0; - while let Some(line_end) = src[pos..].find('\n').map(|i| i + pos + 1).or_else(|| { - if pos < src.len() { - Some(src.len()) - } else { - None - } - }) { + for line_end in lines!(src) { if src[pos..line_end].trim().eq_ignore_ascii_case("#+END:") { return Some(( &src[8..name].trim(), diff --git a/src/elements/list.rs b/src/elements/list.rs index f6f9f70..dad3808 100644 --- a/src/elements/list.rs +++ b/src/elements/list.rs @@ -1,3 +1,6 @@ +use memchr::memchr_iter; +use std::iter::once; + pub struct List; macro_rules! ident { @@ -73,8 +76,8 @@ impl List { (beg, pos) } - // return (ident, is_ordered, end) - pub fn parse(src: &str) -> Option<(usize, bool, usize)> { + // return (ident, is_ordered, contents_end, end) + pub fn parse(src: &str) -> Option<(usize, bool, usize, usize)> { let bytes = src.as_bytes(); let starting_ident = ident!(src); @@ -82,55 +85,46 @@ impl List { return None; } + let mut lines = lines!(src); + // skip the starting line + let mut pos = lines.next().unwrap(); let is_ordered = Self::is_ordered(bytes[starting_ident]); - let mut pos = starting_ident; - while let Some(i) = src[pos..] - .find('\n') - .map(|i| i + pos + 1) - .filter(|&i| i != src.len()) - { - let ident = ident!(src[i..]); - // less indented than its starting line + Some(loop { + let mut curr_line = match lines.next() { + Some(i) => i, + None => break (starting_ident, is_ordered, pos, pos), + }; + // current line is empty + if src[pos..curr_line].trim().is_empty() { + let next_line = match lines.next() { + Some(i) => i, + None => break (starting_ident, is_ordered, pos, pos), + }; + + // next line is emtpy, too + if src[curr_line..next_line].trim().is_empty() { + break (starting_ident, is_ordered, pos, next_line); + } else { + // move to next line + pos = curr_line; + curr_line = next_line; + } + } + + let ident = ident!(src[pos..curr_line]); + + // less indented than the starting line if ident < starting_ident { - return Some((starting_ident, is_ordered, i - 1)); + break (starting_ident, is_ordered, pos, pos); } - if ident > starting_ident { - pos = i; - continue; - } - - if bytes[ident + i] == b'\n' && pos < src.len() { - let nextline_ident = ident!(src[ident + i + 1..]); - - // check if it's two consecutive empty lines - if nextline_ident < starting_ident - || (ident + i + 1 + nextline_ident < src.len() - && bytes[ident + i + 1 + nextline_ident] == b'\n') - { - return Some((starting_ident, is_ordered, ident + i + 1 + nextline_ident)); - } - - if nextline_ident == starting_ident { - if Self::is_item(&src[i + nextline_ident + 1..]) { - pos = i + nextline_ident + 1; - continue; - } else { - return Some((starting_ident, is_ordered, ident + i + 1 + nextline_ident)); - } - } - } - - if Self::is_item(&src[i + ident..]) { - pos = i; - continue; + if ident > starting_ident || Self::is_item(&src[pos + ident..]) { + pos = curr_line; } else { - return Some((starting_ident, is_ordered, i - 1)); + break (starting_ident, is_ordered, pos, pos); } - } - - Some((starting_ident, is_ordered, src.len())) + }) } } @@ -142,7 +136,7 @@ fn parse() { + item2 + item3" ), - Some((0, false, 23)) + Some((0, false, 23, 23)) ); assert_eq!( List::parse( @@ -151,7 +145,7 @@ fn parse() { * item3" ), - Some((0, false, 24)) + Some((0, false, 24, 24)) ); assert_eq!( List::parse( @@ -161,7 +155,7 @@ fn parse() { - item1" ), - Some((0, false, 17)) + Some((0, false, 16, 18)) ); assert_eq!( List::parse( @@ -169,7 +163,7 @@ fn parse() { 2. item1 3. item2" ), - Some((0, true, 28)) + Some((0, true, 28, 28)) ); assert_eq!( List::parse( @@ -177,7 +171,7 @@ fn parse() { 2) item1 3) item2" ), - Some((2, true, 10)) + Some((2, true, 11, 11)) ); assert_eq!( List::parse( @@ -185,7 +179,7 @@ fn parse() { 1) item1 + item2" ), - Some((2, false, 32)) + Some((2, false, 32, 32)) ); assert_eq!( List::parse( @@ -195,6 +189,29 @@ fn parse() { ), None ); + assert_eq!( + List::parse( + r#" - Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + - Nulla et dolor vitae elit placerat sagittis. Aliquam a lobortis massa, + aliquam efficitur arcu. + + - Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + - Phasellus auctor lacus a orci imperdiet, ut facilisis neque lobortis. + + - Proin condimentum id orci vitae lobortis. Nunc sollicitudin risus neque, + dapibus malesuada sem faucibus vitae. + + - Sed vitae dolor augue. Phasellus at rhoncus arcu. Suspendisse potenti. + + - Nulla faucibus, metus ut porta hendrerit, urna lorem porta metus, in tempus + nibh orci sed sapien. + + - Morbi tortor mi, dapibus vel faucibus a, iaculis sed turpis."# + ), + Some((1, false, 677, 677)) + ); } #[test] diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 440ae6d..3d1d4ed 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -75,6 +75,7 @@ pub enum Element<'a> { List { ident: usize, is_ordered: bool, + contents_end: usize, end: usize, }, } @@ -135,14 +136,15 @@ impl<'a> Element<'a> { || bytes[pos] == b'*' || (bytes[pos] >= b'0' && bytes[pos] <= b'9') { - if let Some((ident, is_ordered, list_end)) = List::parse(&src[end..]) { + if let Some((ident, is_ordered, contents_end, end)) = List::parse(&src[end..]) { ret!( Element::List { ident, is_ordered, - end: list_end + contents_end, + end }, - end + 0 ); } } diff --git a/src/elements/rule.rs b/src/elements/rule.rs index bb6aabf..ad240f1 100644 --- a/src/elements/rule.rs +++ b/src/elements/rule.rs @@ -1,10 +1,14 @@ +use memchr::memchr; + #[cfg_attr(test, derive(PartialEq))] #[derive(Debug)] pub struct Rule; impl Rule { pub fn parse(src: &str) -> usize { - let end = src.find('\n').map(|i| i + 1).unwrap_or_else(|| src.len()); + let end = memchr(b'\n', src.as_bytes()) + .map(|i| i + 1) + .unwrap_or_else(|| src.len()); let rules = &src[0..end].trim(); if rules.len() >= 5 && rules.chars().all(|c| c == '-') { end diff --git a/src/lib.rs b/src/lib.rs index 4440456..d168696 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #[macro_use] extern crate jetscii; +extern crate memchr; #[macro_use] mod utils; diff --git a/src/parser.rs b/src/parser.rs index f2fcd96..2f1e1a4 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -37,6 +37,7 @@ pub enum Container { List { ident: usize, is_ordered: bool, + contents_end: usize, end: usize, }, ListItem { @@ -246,10 +247,12 @@ impl<'a> Parser<'a> { Element::List { ident, is_ordered, + contents_end, end, } => self.stack.push(Container::List { ident, is_ordered, + contents_end: contents_end + self.off, end: end + self.off, }), _ => (), @@ -333,7 +336,7 @@ impl<'a> Parser<'a> { assert!(self.off <= end); } Paragraph { end, trailing } => { - assert!(self.off <= trailing); + // assert!(self.off <= trailing); assert!(self.off <= end); } CenterBlock { contents_end, end } @@ -352,8 +355,7 @@ impl<'a> Iterator for Parser<'a> { type Item = Event<'a>; fn next(&mut self) -> Option> { - // - self.check_off(); + // self.check_off(); if self.stack.is_empty() { if self.off >= self.text.len() { @@ -395,8 +397,14 @@ impl<'a> Iterator for Parser<'a> { self.next_ele(contents_end) } } - Container::List { end, ident, .. } => { - if self.off >= end { + Container::List { + contents_end, + end, + ident, + .. + } => { + if self.off >= contents_end { + self.off = end; self.end() } else { self.next_list_item(end, ident) diff --git a/src/utils.rs b/src/utils.rs index c6fc278..501f3bd 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -112,10 +112,10 @@ macro_rules! starts_with { #[macro_export] macro_rules! skip_space { ($src:ident) => { - until!($src, |c| c != b' ').unwrap_or(0) + until!($src, |c| c != b' ' && c != b'\t').unwrap_or(0) }; ($src:ident, $from:expr) => { - until!($src[$from..], |c| c != b' ').unwrap_or(0) + $from + until!($src[$from..], |c| c != b' ' && c != b'\t').unwrap_or(0) + $from }; } @@ -154,3 +154,12 @@ macro_rules! parse_succ { ); }; } + +#[macro_export] +macro_rules! lines { + ($src:ident) => { + memchr::memchr_iter(b'\n', $src.as_bytes()) + .map(|i| i + 1) + .chain(std::iter::once($src.len())) + }; +}