Fix drawer parsing to not take headlines.

Currently, when parsing a drawer for a node of level N, any headlines of
level N + 1 will be absorbed into the drawer. This changes drawer
parsing to reject headlines. For example:

```
* Hello
:PROPERTIES:
** World
:END:
```

should be parsed as two nodes. The drawer is ill-formed, and will be
parsed as part of the body instead of as a drawer.
This commit is contained in:
Alex Roper 2020-05-14 02:00:13 -07:00
parent 9cf38d1dc5
commit 849005c107
4 changed files with 87 additions and 16 deletions

View file

@ -7,7 +7,10 @@ use nom::{
IResult, IResult,
}; };
use crate::parse::combinators::{blank_lines_count, eol, lines_till}; use crate::{
parse::combinators::{blank_lines_count, eol, lines_till},
parsers::lines_until_headline_at_level_le,
};
/// Drawer Element /// Drawer Element
#[derive(Debug, Default, Clone)] #[derive(Debug, Default, Clone)]
@ -59,7 +62,18 @@ pub fn parse_drawer_without_blank(input: &str) -> IResult<&str, (Drawer, &str),
tag(":"), tag(":"),
)(input)?; )(input)?;
let (input, _) = eol(input)?; let (input, _) = eol(input)?;
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input)?;
// Restrict the search for the end of the drawer to the current headline.
let (_input_after_headline, (input_until_headline, _level)) =
lines_until_headline_at_level_le(input, std::usize::MAX)?;
// tail is the remaining not used for the drawer out of
// input_until_headline.
let (tail, contents) =
lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input_until_headline)?;
// Skip over the amount used by the drawer.
let input = &input[input_until_headline.len() - tail.len()..];
Ok(( Ok((
input, input,
@ -118,4 +132,30 @@ fn parse() {
// https://github.com/PoiScript/orgize/issues/9 // https://github.com/PoiScript/orgize/issues/9
assert!(parse_drawer(":SPAGHETTI:\n").is_err()); assert!(parse_drawer(":SPAGHETTI:\n").is_err());
// https://github.com/PoiScript/orgize/issues/24
// A drawer may not contain a headline.
assert!(parse_drawer(
r#":MYDRAWER:
* Node
:END:"#
)
.is_err(),);
// A drawer may not contain another drawer. An attempt to do so will result
// in the drawer ending at the first end line.
assert_eq!(
parse_drawer(":OUTER:\nOuter Text\n:INNER:\nInner Text\n:END:\n:END:"),
Ok((
":END:",
(
Drawer {
name: "OUTER".into(),
pre_blank: 0,
post_blank: 0
},
"Outer Text\n:INNER:\nInner Text\n"
)
))
);
} }

View file

@ -453,5 +453,5 @@ fn parse_properties_drawer_() {
.into_iter() .into_iter()
.collect::<HashMap<_, _>>() .collect::<HashMap<_, _>>()
)) ))
) );
} }

View file

@ -644,7 +644,7 @@ pub fn blank_lines_count(input: &str) -> (&str, usize) {
// line, including the terminal \n if one is present. Unlike org-mode (but like // line, including the terminal \n if one is present. Unlike org-mode (but like
// org-element), we accept '\n' and EOF to terminate the stars. Returns the // org-element), we accept '\n' and EOF to terminate the stars. Returns the
// number of stars. Must only be called at the start of a line. // number of stars. Must only be called at the start of a line.
fn parse_headline_level_le(input: &str, max_level: usize) -> IResult<&str, usize, ()> { pub(crate) fn parse_headline_level_le(input: &str, max_level: usize) -> IResult<&str, usize, ()> {
let (input, level) = verify( let (input, level) = verify(
map(is_a("*"), |s: &str| s.chars().count()), map(is_a("*"), |s: &str| s.chars().count()),
|level: &usize| *level <= max_level, |level: &usize| *level <= max_level,
@ -667,31 +667,44 @@ fn line_length(input: &str) -> IResult<&str, usize, ()> {
} }
} }
pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> { // Returns all text until a headline with level <= max_level is found. Must
// Consume the headline. // start at the start of the line. Can return nothing if immediately at a
let (text, level) = parse_headline_level_le(input, std::usize::MAX).ok()?; // headline.
//
// This is a separate function from lines_while/lines_until because we need to
// treat EOF differently from EOL when the file ends with \r.
pub fn lines_until_headline_at_level_le(
input: &str,
max_level: usize,
) -> IResult<&str, (&str, usize), ()> {
// Collect lines until EOF or a headline. // Collect lines until EOF or a headline.
let mut last = 0; let mut last = 0;
for i in memchr_iter(b'\n', text.as_bytes()) { for i in memchr_iter(b'\n', input.as_bytes()) {
// Check the first byte after the newline to skip parsing unnecessarily. // Check the first byte after the newline to skip parsing unnecessarily.
if text.as_bytes()[last] == b'*' && parse_headline_level_le(&text[last..], level).is_ok() { if input.as_bytes()[last] == b'*'
&& parse_headline_level_le(&input[last..], max_level).is_ok()
{
break; break;
} }
last = i + 1; last = i + 1;
} }
if last < text.len() && parse_headline_level_le(&text[last..], level).is_err() { if last < input.len() && parse_headline_level_le(&input[last..], max_level).is_err() {
Some(("", (input, level))) Ok(("", (input, max_level)))
} else { } else {
Some(( Ok((&input[last..], (&input[..last], max_level)))
&text[last..],
(&input[..(input.len() - text.len()) + last], level),
))
} }
} }
pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> {
// Consume the headline.
let (text, level) = parse_headline_level_le(input, std::usize::MAX).ok()?;
let (text, _content) = lines_until_headline_at_level_le(text, level).ok()?;
let split = input.len() - text.len();
Some((&input[split..], (&input[..split], level)))
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

18
tests/issue_24.rs Normal file
View file

@ -0,0 +1,18 @@
use orgize::Org;
#[test]
fn headline_in_drawer() {
// https://github.com/PoiScript/orgize/issues/24
// A drawer may not contain a headline.
const STARS: &str = "****";
for h1 in 1..STARS.len() {
for h2 in 1..STARS.len() {
let org = crate::Org::parse_string(format!(
"{} Hello\n:PROPERTIES:\n{} World\n:END:",
&STARS[..h1],
&STARS[..h2]
));
assert_eq!(org.headlines().count(), 2);
}
}
}