refactor: objects parse

This commit is contained in:
PoiScript 2019-01-23 00:11:09 +08:00
parent 74781e6e7e
commit 763ec00434
4 changed files with 133 additions and 144 deletions

View file

@ -97,7 +97,7 @@ impl<'a> Element<'a> {
// Unlike other element, footnote definition must starts at column 0 // Unlike other element, footnote definition must starts at column 0
if bytes[pos] == b'[' { if bytes[pos] == b'[' {
if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) { if let Some((label, cont, off)) = FnDef::parse(&src[pos..]) {
return if pos == start { break if pos == start {
(off + 1, Some(Element::FnDef { label, cont }), None) (off + 1, Some(Element::FnDef { label, cont }), None)
} else { } else {
( (
@ -116,9 +116,9 @@ impl<'a> Element<'a> {
pos = skip_space!(src, pos); pos = skip_space!(src, pos);
if pos <= src.len() { if pos <= src.len() {
macro_rules! ret { macro_rules! brk {
($ele:expr, $off:expr) => { ($ele:expr, $off:expr) => {
return if pos == start { break if pos == start {
($off, Some($ele), None) ($off, Some($ele), None)
} else { } else {
( (
@ -145,7 +145,7 @@ impl<'a> Element<'a> {
cont_end, cont_end,
end: list_end, end: list_end,
}; };
return if pos == start { break if pos == start {
(1, Some(list), None) (1, Some(list), None)
} else { } else {
( (
@ -161,7 +161,7 @@ impl<'a> Element<'a> {
} }
if bytes[pos] == b'\n' { if bytes[pos] == b'\n' {
return ( break (
start, start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: end, cont_end: end,
@ -178,73 +178,76 @@ impl<'a> Element<'a> {
if bytes[pos] == b'-' { if bytes[pos] == b'-' {
let off = Rule::parse(&src[pos..]); let off = Rule::parse(&src[pos..]);
if off != 0 { if off != 0 {
ret!(Element::Rule, off); brk!(Element::Rule, off);
} }
} }
// TODO: multiple lines fixed width area // TODO: multiple lines fixed width area
if bytes[pos] == b':' && bytes.get(pos + 1).map(|&b| b == b' ').unwrap_or(false) { if bytes[pos] == b':'
&& bytes
.get(pos + 1)
.map(|&b| b == b' ' || b == b'\n')
.unwrap_or(false)
{
let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..]) let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..])
.map(|i| i + 1) .map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos); .unwrap_or_else(|| src.len() - pos);
ret!(Element::FixedWidth(&src[pos + 1..pos + eol]), eol); brk!(Element::FixedWidth(&src[pos + 1..pos + eol]), eol);
} }
if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b'+').unwrap_or(false) { if bytes[pos] == b'#' && bytes.get(pos + 1).map(|&b| b == b'+').unwrap_or(false) {
if let Some((name, args, contents_beg, cont_end, end)) = if let Some((name, args, cont_beg, cont_end, end)) = Block::parse(&src[pos..]) {
Block::parse(&src[pos..]) let cont = &src[pos + cont_beg + 1..pos + cont_end - 1];
{
let cont = &src[pos + contents_beg + 1..pos + cont_end - 1];
match name.to_uppercase().as_str() { match name.to_uppercase().as_str() {
"COMMENT" => ret!(Element::CommentBlock { args, cont }, pos + end), "COMMENT" => brk!(Element::CommentBlock { args, cont }, end),
"EXAMPLE" => ret!(Element::ExampleBlock { args, cont }, pos + end), "EXAMPLE" => brk!(Element::ExampleBlock { args, cont }, end),
"EXPORT" => ret!(Element::ExportBlock { args, cont }, pos + end), "EXPORT" => brk!(Element::ExportBlock { args, cont }, end),
"SRC" => ret!(Element::SrcBlock { args, cont }, pos + end), "SRC" => brk!(Element::SrcBlock { args, cont }, end),
"VERSE" => ret!(Element::VerseBlock { args, cont }, pos + end), "VERSE" => brk!(Element::VerseBlock { args, cont }, end),
"CENTER" => ret!( "CENTER" => brk!(
Element::CtrBlock { Element::CtrBlock {
args, args,
cont_end, cont_end,
end, end,
}, },
pos + contents_beg cont_beg
), ),
"QUOTE" => ret!( "QUOTE" => brk!(
Element::QteBlock { Element::QteBlock {
args, args,
cont_end, cont_end,
end, end,
}, },
pos + contents_beg cont_beg
), ),
_ => ret!( _ => brk!(
Element::SplBlock { Element::SplBlock {
name, name,
args, args,
cont_end, cont_end,
end, end
}, },
pos + contents_beg cont_beg
), ),
}; };
} }
if let Some((name, args, contents_beg, cont_end, end)) = if let Some((name, args, cont_beg, cont_end, end)) =
DynBlock::parse(&src[pos..]) DynBlock::parse(&src[pos..])
{ {
ret!( brk!(
Element::DynBlock { Element::DynBlock {
name, name,
args, args,
cont_end, cont_end,
end, end,
}, },
pos + contents_beg cont_beg
) )
} }
if let Some((key, value, off)) = Keyword::parse(&src[pos..]) { if let Some((key, value, off)) = Keyword::parse(&src[pos..]) {
ret!( brk!(
if let Key::Call = key { if let Key::Call = key {
Element::Call { value } Element::Call { value }
} else { } else {
@ -261,15 +264,15 @@ impl<'a> Element<'a> {
let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..]) let eol = memchr::memchr(b'\n', &src.as_bytes()[pos..])
.map(|i| i + 1) .map(|i| i + 1)
.unwrap_or_else(|| src.len() - pos); .unwrap_or_else(|| src.len() - pos);
ret!(Element::Comment(&src[pos + 1..pos + eol]), eol); brk!(Element::Comment(&src[pos + 1..pos + eol]), eol);
} }
} }
if let Some(off) = &src[pos..].find('\n') { if let Some(off) = memchr::memchr(b'\n', &src.as_bytes()[pos..]) {
pos += off + 1; pos += off + 1;
// last char // last char
if pos == src.len() { if pos == src.len() {
return ( break (
start, start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: pos - 1, cont_end: pos - 1,
@ -279,7 +282,7 @@ impl<'a> Element<'a> {
); );
} }
} else { } else {
return ( break (
start, start,
Some(Element::Paragraph { Some(Element::Paragraph {
cont_end: src.len(), cont_end: src.len(),

View file

@ -1,3 +1,4 @@
#[macro_use]
extern crate jetscii; extern crate jetscii;
extern crate memchr; extern crate memchr;

View file

@ -47,111 +47,128 @@ impl<'a> Object<'a> {
pub fn next_2(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)>) { pub fn next_2(src: &'a str) -> (Object<'a>, usize, Option<(Object<'a>, usize)>) {
let bytes = src.as_bytes(); let bytes = src.as_bytes();
if src.len() < 2 { if src.len() <= 2 {
return (Object::Text(src), src.len(), None); return (Object::Text(src), src.len(), None);
} }
// TODO: refactor with src[..].find(..) let chars = ascii_chars!('@', ' ', '"', '(', '\n', '{', '<', '[');
for pos in 0..src.len() - 2 {
macro_rules! ret { let mut pos = 0;
($obj:expr, $off:expr) => { loop {
return if pos == 0 { macro_rules! brk {
($obj:expr, $off:expr, $pos:expr) => {
break if pos == 0 {
($obj, $off, None) ($obj, $off, None)
} else { } else {
(Object::Text(&src[0..pos]), pos, Some(($obj, $off))) (Object::Text(&src[0..$pos]), $pos, Some(($obj, $off)))
}; };
}; };
} }
let first = bytes[pos]; let mut pre = pos;
let second = bytes[pos + 1];
let third = bytes[pos + 2];
if first == b'@' && second == b'@' { match (bytes[pos], bytes[pos + 1], bytes[pos + 2]) {
if let Some((snippet, off)) = Snippet::parse(&src[pos..]) { (b'@', b'@', _) => {
ret!(Object::Snippet(snippet), off); if let Some((snippet, off)) = Snippet::parse(&src[pos..]) {
} brk!(Object::Snippet(snippet), off, pos);
}
if first == b'[' {
if second == b'f' && third == b'n' {
if let Some((fn_ref, off)) = FnRef::parse(&src[pos..]) {
ret!(Object::FnRef(fn_ref), off);
} }
} else if second == b'[' {
if let Some((link, off)) = Link::parse(&src[pos..]) {
ret!(Object::Link(link), off);
}
} else {
if let Some((cookie, off)) = Cookie::parse(&src[pos..]) {
ret!(Object::Cookie(cookie), off);
}
// TODO: Timestamp
} }
} (b'{', b'{', b'{') => {
if let Some((macros, off)) = Macros::parse(&src[pos..]) {
if first == b'{' && second == b'{' && third == b'{' { brk!(Object::Macros(macros), off, pos);
if let Some((macros, off)) = Macros::parse(&src[pos..]) { }
ret!(Object::Macros(macros), off);
} }
} (b'<', b'<', b'<') => {
if first == b'<' && second == b'<' {
if third == b'<' {
if let Some((target, off)) = RadioTarget::parse(&src[pos..]) { if let Some((target, off)) = RadioTarget::parse(&src[pos..]) {
ret!(Object::RadioTarget(target), off); brk!(Object::RadioTarget(target), off, pos);
}
} else if third != b'<' && third != b'\n' {
if let Some((target, off)) = Target::parse(&src[pos..]) {
ret!(Object::Target(target), off);
} }
} }
} (b'<', b'<', third) => {
if third != b'\n' {
if pos == 0 if let Some((target, off)) = Target::parse(&src[pos..]) {
|| bytes[pos - 1] == b' ' brk!(Object::Target(target), off, pos);
|| bytes[pos - 1] == b'"'
|| bytes[pos - 1] == b'('
|| bytes[pos - 1] == b','
|| bytes[pos - 1] == b'\n'
|| bytes[pos - 1] == b'{'
{
if (first == b'*'
|| first == b'+'
|| first == b'/'
|| first == b'='
|| first == b'_'
|| first == b'~')
&& !second.is_ascii_whitespace()
{
if let Some(end) = Emphasis::parse(&src[pos..], first) {
match first {
b'*' => ret!(Object::Bold { end }, 1),
b'+' => ret!(Object::Strike { end }, 1),
b'/' => ret!(Object::Italic { end }, 1),
b'_' => ret!(Object::Underline { end }, 1),
b'~' => ret!(Object::Code(&src[pos + 1..pos + end]), end + 1),
b'=' => ret!(Object::Verbatim(&src[pos + 1..pos + end]), end + 1),
_ => unreachable!(),
} }
} }
} }
(b'[', b'f', b'n') => {
if first == b'c' && second == b'a' && third == b'l' { if let Some((fn_ref, off)) = FnRef::parse(&src[pos..]) {
if let Some((call, off)) = InlineCall::parse(&src[pos..]) { brk!(Object::FnRef(fn_ref), off, pos);
ret!(Object::InlineCall(call), off);
} }
} }
(b'[', b'[', _) => {
if first == b's' && second == b'r' && third == b'c' { if let Some((link, off)) = Link::parse(&src[pos..]) {
if let Some((src, off)) = InlineSrc::parse(&src[pos..]) { brk!(Object::Link(link), off, pos);
ret!(Object::InlineSrc(src), off);
} }
} }
(b'[', _, _) => {
if let Some((cookie, off)) = Cookie::parse(&src[pos..]) {
brk!(Object::Cookie(cookie), off, pos);
}
// TODO: Timestamp
}
(b'{', _, _)
| (b' ', _, _)
| (b'"', _, _)
| (b',', _, _)
| (b'(', _, _)
| (b'\n', _, _) => pre += 1,
_ => (),
}
match bytes[pre] {
b'*' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'*') {
brk!(Object::Bold { end }, 1, pre);
}
}
b'+' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'+') {
brk!(Object::Strike { end }, 1, pre);
}
}
b'/' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'/') {
brk!(Object::Italic { end }, 1, pre);
}
}
b'_' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'_') {
brk!(Object::Underline { end }, 1, pre);
}
}
b'=' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'=') {
brk!(Object::Verbatim(&src[pre + 1..pre + end]), end + 1, pre);
}
}
b'~' => {
if let Some(end) = Emphasis::parse(&src[pre..], b'~') {
brk!(Object::Code(&src[pre + 1..pre + end]), end + 1, pre);
}
}
b'c' => {
if let Some((call, off)) = InlineCall::parse(&src[pre..]) {
brk!(Object::InlineCall(call), off, pre);
}
}
b's' => {
if let Some((src, off)) = InlineSrc::parse(&src[pre..]) {
brk!(Object::InlineSrc(src), off, pre);
}
}
_ => (),
}
if let Some(off) = chars
.find(&src[pos + 1..])
.map(|i| i + pos + 1)
.filter(|&i| i < src.len() - 2)
{
pos = off;
} else {
break (Object::Text(src), src.len(), None);
} }
} }
(Object::Text(src), src.len(), None)
} }
} }

View file

@ -350,44 +350,12 @@ impl<'a> Parser<'a> {
Container::Underline { .. } => Event::UnderlineEnd, Container::Underline { .. } => Event::UnderlineEnd,
} }
} }
fn check_off(&self) {
use self::Container::*;
if let Some(container) = self.stack.last() {
match *container {
Headline { end, .. }
| Section { end }
| List { end, .. }
| ListItem { end }
| Italic { end }
| Strike { end }
| Bold { end }
| Underline { end } => {
debug_assert!(self.off <= end);
}
Paragraph { cont_end, end } => {
debug_assert!(self.off <= end);
debug_assert!(self.off <= cont_end);
}
CtrBlock { cont_end, end }
| QteBlock { cont_end, end }
| SplBlock { cont_end, end }
| DynBlock { cont_end, end } => {
debug_assert!(self.off <= cont_end);
debug_assert!(self.off <= end);
}
}
}
}
} }
impl<'a> Iterator for Parser<'a> { impl<'a> Iterator for Parser<'a> {
type Item = Event<'a>; type Item = Event<'a>;
fn next(&mut self) -> Option<Event<'a>> { fn next(&mut self) -> Option<Event<'a>> {
// self.check_off();
if self.stack.is_empty() { if self.stack.is_empty() {
if self.off >= self.text.len() { if self.off >= self.text.len() {
None None