orgize/src/parser.rs

553 lines
16 KiB
Rust
Raw Normal View History

2019-01-04 08:53:20 +00:00
use elements::*;
use headline::*;
use objects::*;
#[cfg_attr(test, derive(PartialEq))]
#[derive(Copy, Clone, Debug)]
pub enum Container {
2019-01-17 03:47:14 +00:00
Headline {
beg: usize,
end: usize,
},
Section {
end: usize,
},
Paragraph {
end: usize,
trailing: usize,
},
CenterBlock {
contents_end: usize,
end: usize,
},
QuoteBlock {
contents_end: usize,
end: usize,
},
SpecialBlock {
contents_end: usize,
end: usize,
},
DynBlock {
contents_end: usize,
end: usize,
},
List {
ident: usize,
is_ordered: bool,
2019-01-20 11:04:12 +00:00
contents_end: usize,
2019-01-17 03:47:14 +00:00
end: usize,
},
ListItem {
end: usize,
},
Italic {
end: usize,
},
Strike {
end: usize,
},
Bold {
end: usize,
},
Underline {
end: usize,
},
2019-01-04 08:53:20 +00:00
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
2019-01-04 08:53:20 +00:00
pub enum Event<'a> {
StartHeadline(Headline<'a>),
EndHeadline,
StartSection,
EndSection,
2019-01-10 12:58:13 +00:00
StartParagraph,
EndParagraph,
2019-01-10 16:57:08 +00:00
StartCenterBlock,
EndCenterBlock,
StartQuoteBlock,
EndQuoteBlock,
StartSpecialBlock {
name: &'a str,
args: Option<&'a str>,
},
EndSpecialBlock,
StartDynBlock {
name: &'a str,
args: Option<&'a str>,
},
EndDynBlock,
2019-01-10 16:57:08 +00:00
CommentBlock {
args: Option<&'a str>,
contents: &'a str,
2019-01-10 16:57:08 +00:00
},
ExampleBlock {
args: Option<&'a str>,
contents: &'a str,
2019-01-10 16:57:08 +00:00
},
ExportBlock {
args: Option<&'a str>,
contents: &'a str,
2019-01-10 16:57:08 +00:00
},
SrcBlock {
args: Option<&'a str>,
contents: &'a str,
2019-01-10 16:57:08 +00:00
},
VerseBlock {
args: Option<&'a str>,
contents: &'a str,
2019-01-10 16:57:08 +00:00
},
2019-01-17 03:47:14 +00:00
StartList {
is_ordered: bool,
},
EndList {
is_ordered: bool,
},
StartListItem,
EndListItem,
2019-01-04 08:53:20 +00:00
AffKeywords,
Call,
Clock,
2019-01-10 12:58:13 +00:00
Comment(&'a str),
2019-01-04 08:53:20 +00:00
TableStart,
TableEnd,
TableCell,
LatexEnv,
FnDef {
label: &'a str,
contents: &'a str,
},
Keyword {
key: &'a str,
value: &'a str,
},
2019-01-04 08:53:20 +00:00
Rule,
2019-01-10 12:58:13 +00:00
2019-01-04 08:53:20 +00:00
Cookie(Cookie<'a>),
FnRef(FnRef<'a>),
InlineCall(InlineCall<'a>),
InlineSrc(InlineSrc<'a>),
Link(Link<'a>),
Macros(Macros<'a>),
RadioTarget(RadioTarget<'a>),
Snippet(Snippet<'a>),
Target(Target<'a>),
2019-01-10 12:58:13 +00:00
StartBold,
EndBold,
StartItalic,
EndItalic,
StartStrike,
EndStrike,
StartUnderline,
EndUnderline,
2019-01-04 08:53:20 +00:00
Verbatim(&'a str),
Code(&'a str),
Text(&'a str),
}
pub struct Parser<'a> {
text: &'a str,
stack: Vec<Container>,
off: usize,
2019-01-10 12:58:13 +00:00
ele_buf: Option<(Element<'a>, usize)>,
obj_buf: Option<(Object<'a>, usize)>,
2019-01-04 08:53:20 +00:00
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Parser<'a> {
Parser {
text,
stack: Vec::new(),
off: 0,
2019-01-10 12:58:13 +00:00
ele_buf: None,
obj_buf: None,
2019-01-04 08:53:20 +00:00
}
}
fn start_section_or_headline(&mut self, tail: &'a str) -> Event<'a> {
let end = Headline::find_level(tail, std::usize::MAX);
if end != 0 {
self.stack.push(Container::Section {
end: self.off + end,
});
Event::StartSection
} else {
self.start_headline(tail)
}
}
fn start_headline(&mut self, tail: &'a str) -> Event<'a> {
let (hdl, off, end) = Headline::parse(tail);
self.stack.push(Container::Headline {
beg: self.off + off,
end: self.off + end,
});
self.off += off;
Event::StartHeadline(hdl)
}
2019-01-10 12:58:13 +00:00
fn next_ele(&mut self, end: usize) -> Event<'a> {
2019-01-11 14:35:06 +00:00
let (ele, off) = self
.ele_buf
.take()
.map(|(ele, off)| (Some(ele), off))
.unwrap_or_else(|| {
let (off, ele, next_2) = Element::next_2(&self.text[self.off..end]);
self.ele_buf = next_2;
(ele, off)
});
2019-01-10 12:58:13 +00:00
if let Some(ele) = ele {
2019-01-10 16:57:08 +00:00
match ele {
Element::Paragraph { end, trailing } => self.stack.push(Container::Paragraph {
2019-01-11 14:35:06 +00:00
end: end + self.off,
trailing: trailing + self.off,
2019-01-10 16:57:08 +00:00
}),
Element::QuoteBlock {
end, contents_end, ..
2019-01-10 16:57:08 +00:00
} => self.stack.push(Container::QuoteBlock {
contents_end: contents_end + self.off,
2019-01-11 14:35:06 +00:00
end: end + self.off,
2019-01-10 16:57:08 +00:00
}),
Element::CenterBlock {
end, contents_end, ..
2019-01-10 16:57:08 +00:00
} => self.stack.push(Container::CenterBlock {
contents_end: contents_end + self.off,
2019-01-11 14:35:06 +00:00
end: end + self.off,
2019-01-10 16:57:08 +00:00
}),
Element::SpecialBlock {
end, contents_end, ..
2019-01-10 16:57:08 +00:00
} => self.stack.push(Container::SpecialBlock {
contents_end: contents_end + self.off,
end: end + self.off,
}),
Element::DynBlock {
end, contents_end, ..
} => self.stack.push(Container::DynBlock {
contents_end: contents_end + self.off,
2019-01-11 14:35:06 +00:00
end: end + self.off,
2019-01-10 16:57:08 +00:00
}),
2019-01-17 03:47:14 +00:00
Element::List {
ident,
is_ordered,
2019-01-20 11:04:12 +00:00
contents_end,
2019-01-17 03:47:14 +00:00
end,
} => self.stack.push(Container::List {
ident,
is_ordered,
2019-01-20 11:04:12 +00:00
contents_end: contents_end + self.off,
2019-01-17 03:47:14 +00:00
end: end + self.off,
}),
2019-01-10 16:57:08 +00:00
_ => (),
2019-01-10 12:58:13 +00:00
}
2019-01-11 14:35:06 +00:00
self.off += off;
2019-01-10 12:58:13 +00:00
ele.into()
} else {
2019-01-11 14:35:06 +00:00
self.off += off;
2019-01-10 12:58:13 +00:00
self.end()
}
}
fn next_obj(&mut self, end: usize) -> Event<'a> {
2019-01-11 14:35:06 +00:00
let (obj, off) = self.obj_buf.take().unwrap_or_else(|| {
2019-01-10 12:58:13 +00:00
let (obj, off, next_2) = Object::next_2(&self.text[self.off..end]);
self.obj_buf = next_2;
(obj, off)
2019-01-11 14:35:06 +00:00
});
2019-01-10 12:58:13 +00:00
match obj {
Object::Underline { end } => self.stack.push(Container::Underline {
end: self.off + end,
}),
Object::Strike { end } => self.stack.push(Container::Strike {
end: self.off + end,
}),
Object::Italic { end } => self.stack.push(Container::Italic {
end: self.off + end,
}),
Object::Bold { end } => self.stack.push(Container::Bold {
end: self.off + end,
}),
_ => (),
}
2019-01-11 14:35:06 +00:00
self.off += off;
2019-01-10 12:58:13 +00:00
obj.into()
}
2019-01-17 03:47:14 +00:00
fn next_list_item(&mut self, end: usize, ident: usize) -> Event<'a> {
let (beg, end) = List::parse_item(&self.text[self.off..end], ident);
self.stack.push(Container::ListItem {
end: self.off + end,
});
self.off += beg;
Event::StartListItem
}
2019-01-10 12:58:13 +00:00
fn end(&mut self) -> Event<'a> {
match self.stack.pop().unwrap() {
Container::Paragraph { .. } => Event::EndParagraph,
Container::Underline { .. } => Event::EndUnderline,
Container::Section { .. } => Event::EndSection,
Container::Strike { .. } => Event::EndStrike,
Container::Headline { .. } => Event::EndHeadline,
Container::Italic { .. } => Event::EndItalic,
Container::Bold { .. } => Event::EndBold,
2019-01-10 16:57:08 +00:00
Container::CenterBlock { .. } => Event::EndCenterBlock,
Container::QuoteBlock { .. } => Event::EndQuoteBlock,
Container::SpecialBlock { .. } => Event::EndSpecialBlock,
Container::DynBlock { .. } => Event::EndDynBlock,
2019-01-17 03:47:14 +00:00
Container::List { is_ordered, .. } => Event::EndList { is_ordered },
Container::ListItem { .. } => Event::EndListItem,
}
}
fn check_off(&self) {
use self::Container::*;
if let Some(container) = self.stack.last() {
match *container {
Headline { end, .. }
| Section { end }
| List { end, .. }
| ListItem { end }
| Italic { end }
| Strike { end }
| Bold { end }
| Underline { end } => {
assert!(self.off <= end);
}
Paragraph { end, trailing } => {
2019-01-20 11:04:12 +00:00
// assert!(self.off <= trailing);
2019-01-17 03:47:14 +00:00
assert!(self.off <= end);
}
CenterBlock { contents_end, end }
| QuoteBlock { contents_end, end }
| SpecialBlock { contents_end, end }
| DynBlock { contents_end, end } => {
assert!(self.off <= contents_end);
assert!(self.off <= end);
}
}
2019-01-10 12:58:13 +00:00
}
2019-01-04 08:53:20 +00:00
}
}
impl<'a> Iterator for Parser<'a> {
type Item = Event<'a>;
fn next(&mut self) -> Option<Event<'a>> {
2019-01-20 11:04:12 +00:00
// self.check_off();
2019-01-17 03:47:14 +00:00
2019-01-04 08:53:20 +00:00
if self.stack.is_empty() {
if self.off >= self.text.len() {
None
} else {
let tail = &self.text[self.off..];
2019-01-04 08:53:20 +00:00
Some(self.start_section_or_headline(tail))
}
} else {
2019-01-10 16:57:08 +00:00
let last = *self.stack.last_mut().unwrap();
2019-01-04 08:53:20 +00:00
Some(match last {
Container::Headline { beg, end } => {
let tail = &self.text[self.off..];
2019-01-04 08:53:20 +00:00
if self.off >= end {
2019-01-10 12:58:13 +00:00
self.end()
2019-01-04 08:53:20 +00:00
} else if self.off == beg {
self.start_section_or_headline(tail)
} else {
self.start_headline(tail)
}
}
Container::DynBlock {
contents_end, end, ..
}
| Container::CenterBlock {
contents_end, end, ..
2019-01-10 16:57:08 +00:00
}
| Container::QuoteBlock {
contents_end, end, ..
2019-01-10 16:57:08 +00:00
}
| Container::SpecialBlock {
contents_end, end, ..
2019-01-10 16:57:08 +00:00
} => {
if self.off >= contents_end {
2019-01-10 16:57:08 +00:00
self.off = end;
self.end()
} else {
self.next_ele(contents_end)
2019-01-10 16:57:08 +00:00
}
}
2019-01-20 11:04:12 +00:00
Container::List {
contents_end,
end,
ident,
..
} => {
if self.off >= contents_end {
self.off = end;
2019-01-17 03:47:14 +00:00
self.end()
} else {
self.next_list_item(end, ident)
}
}
Container::ListItem { end } => {
if self.off >= end {
self.end()
} else {
// TODO: handle nested list
self.next_obj(end)
}
}
2019-01-04 08:53:20 +00:00
Container::Section { end } => {
if self.off >= end {
2019-01-10 12:58:13 +00:00
self.end()
} else {
self.next_ele(end)
}
}
Container::Paragraph { end, trailing } => {
if self.off >= end {
self.off = trailing;
self.end()
2019-01-04 08:53:20 +00:00
} else {
2019-01-10 12:58:13 +00:00
self.next_obj(end)
}
}
Container::Bold { end }
| Container::Underline { end }
| Container::Italic { end }
| Container::Strike { end } => {
if self.off >= end {
self.off += 1;
self.end()
} else {
self.next_obj(end)
2019-01-04 08:53:20 +00:00
}
}
})
}
}
}
2019-01-10 12:58:13 +00:00
impl<'a> From<Object<'a>> for Event<'a> {
fn from(obj: Object<'a>) -> Self {
match obj {
Object::Bold { .. } => Event::StartBold,
Object::Code(c) => Event::Code(c),
Object::Cookie(c) => Event::Cookie(c),
Object::FnRef(f) => Event::FnRef(f),
Object::InlineCall(i) => Event::InlineCall(i),
Object::InlineSrc(i) => Event::InlineSrc(i),
Object::Italic { .. } => Event::StartItalic,
Object::Link(l) => Event::Link(l),
Object::Macros(m) => Event::Macros(m),
Object::RadioTarget(r) => Event::RadioTarget(r),
Object::Snippet(s) => Event::Snippet(s),
Object::Strike { .. } => Event::StartStrike,
Object::Target(t) => Event::Target(t),
Object::Text(t) => Event::Text(t),
Object::Underline { .. } => Event::StartUnderline,
Object::Verbatim(v) => Event::Verbatim(v),
}
}
}
impl<'a> From<Element<'a>> for Event<'a> {
fn from(ele: Element<'a>) -> Self {
match ele {
Element::Comment(c) => Event::Comment(c),
Element::FnDef { label, contents } => Event::FnDef { label, contents },
Element::Keyword { key, value } => Event::Keyword { key, value },
2019-01-10 12:58:13 +00:00
Element::Paragraph { .. } => Event::StartParagraph,
Element::Rule => Event::Rule,
2019-01-10 16:57:08 +00:00
Element::CenterBlock { .. } => Event::StartCenterBlock,
Element::QuoteBlock { .. } => Event::StartQuoteBlock,
Element::DynBlock { name, args, .. } => Event::StartDynBlock { name, args },
2019-01-10 16:57:08 +00:00
Element::SpecialBlock { name, args, .. } => Event::StartSpecialBlock { name, args },
Element::CommentBlock { args, contents } => Event::CommentBlock { args, contents },
Element::ExampleBlock { args, contents } => Event::ExampleBlock { args, contents },
Element::ExportBlock { args, contents } => Event::ExportBlock { args, contents },
Element::SrcBlock { args, contents } => Event::SrcBlock { args, contents },
Element::VerseBlock { args, contents } => Event::VerseBlock { args, contents },
2019-01-17 03:47:14 +00:00
Element::List { is_ordered, .. } => Event::StartList { is_ordered },
2019-01-10 12:58:13 +00:00
}
}
}
2019-01-04 08:53:20 +00:00
#[test]
fn parse() {
use self::Event::*;
let expected = vec![
StartHeadline(Headline::new(1, None, None, "Title 1", None)),
StartSection,
2019-01-10 12:58:13 +00:00
StartParagraph,
StartBold,
Text("Section 1"),
EndBold,
EndParagraph,
2019-01-04 08:53:20 +00:00
EndSection,
StartHeadline(Headline::new(2, None, None, "Title 2", None)),
StartSection,
2019-01-10 12:58:13 +00:00
StartParagraph,
StartUnderline,
Text("Section 2"),
EndUnderline,
EndParagraph,
2019-01-04 08:53:20 +00:00
EndSection,
EndHeadline,
EndHeadline,
StartHeadline(Headline::new(1, None, None, "Title 3", None)),
StartSection,
2019-01-10 12:58:13 +00:00
StartParagraph,
StartItalic,
Text("Section 3"),
EndItalic,
EndParagraph,
2019-01-04 08:53:20 +00:00
EndSection,
EndHeadline,
2019-01-10 12:58:13 +00:00
StartHeadline(Headline::new(1, None, None, "Title 4", None)),
2019-01-04 08:53:20 +00:00
StartSection,
2019-01-10 12:58:13 +00:00
StartParagraph,
Verbatim("Section 4"),
EndParagraph,
2019-01-04 08:53:20 +00:00
EndSection,
EndHeadline,
];
assert_eq!(
2019-01-10 12:58:13 +00:00
Parser::new(
r#"* Title 1
*Section 1*
** Title 2
_Section 2_
* Title 3
/Section 3/
* Title 4
=Section 4="#
)
.collect::<Vec<_>>(),
2019-01-04 08:53:20 +00:00
expected
);
}