Fix drawer parsing to not take headlines.
Currently, when parsing a drawer for a node of level N, any headlines of level N + 1 will be absorbed into the drawer. This changes drawer parsing to reject headlines. For example: ``` * Hello :PROPERTIES: ** World :END: ``` should be parsed as two nodes. The drawer is ill-formed, and will be parsed as part of the body instead of as a drawer.
This commit is contained in:
parent
9cf38d1dc5
commit
849005c107
|
@ -7,7 +7,10 @@ use nom::{
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::parse::combinators::{blank_lines_count, eol, lines_till};
|
use crate::{
|
||||||
|
parse::combinators::{blank_lines_count, eol, lines_till},
|
||||||
|
parsers::lines_until_headline_at_level_le,
|
||||||
|
};
|
||||||
|
|
||||||
/// Drawer Element
|
/// Drawer Element
|
||||||
#[derive(Debug, Default, Clone)]
|
#[derive(Debug, Default, Clone)]
|
||||||
|
@ -59,7 +62,18 @@ pub fn parse_drawer_without_blank(input: &str) -> IResult<&str, (Drawer, &str),
|
||||||
tag(":"),
|
tag(":"),
|
||||||
)(input)?;
|
)(input)?;
|
||||||
let (input, _) = eol(input)?;
|
let (input, _) = eol(input)?;
|
||||||
let (input, contents) = lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input)?;
|
|
||||||
|
// Restrict the search for the end of the drawer to the current headline.
|
||||||
|
let (_input_after_headline, (input_until_headline, _level)) =
|
||||||
|
lines_until_headline_at_level_le(input, std::usize::MAX)?;
|
||||||
|
|
||||||
|
// tail is the remaining not used for the drawer out of
|
||||||
|
// input_until_headline.
|
||||||
|
let (tail, contents) =
|
||||||
|
lines_till(|line| line.trim().eq_ignore_ascii_case(":END:"))(input_until_headline)?;
|
||||||
|
|
||||||
|
// Skip over the amount used by the drawer.
|
||||||
|
let input = &input[input_until_headline.len() - tail.len()..];
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
input,
|
input,
|
||||||
|
@ -118,4 +132,30 @@ fn parse() {
|
||||||
|
|
||||||
// https://github.com/PoiScript/orgize/issues/9
|
// https://github.com/PoiScript/orgize/issues/9
|
||||||
assert!(parse_drawer(":SPAGHETTI:\n").is_err());
|
assert!(parse_drawer(":SPAGHETTI:\n").is_err());
|
||||||
|
|
||||||
|
// https://github.com/PoiScript/orgize/issues/24
|
||||||
|
// A drawer may not contain a headline.
|
||||||
|
assert!(parse_drawer(
|
||||||
|
r#":MYDRAWER:
|
||||||
|
* Node
|
||||||
|
:END:"#
|
||||||
|
)
|
||||||
|
.is_err(),);
|
||||||
|
|
||||||
|
// A drawer may not contain another drawer. An attempt to do so will result
|
||||||
|
// in the drawer ending at the first end line.
|
||||||
|
assert_eq!(
|
||||||
|
parse_drawer(":OUTER:\nOuter Text\n:INNER:\nInner Text\n:END:\n:END:"),
|
||||||
|
Ok((
|
||||||
|
":END:",
|
||||||
|
(
|
||||||
|
Drawer {
|
||||||
|
name: "OUTER".into(),
|
||||||
|
pre_blank: 0,
|
||||||
|
post_blank: 0
|
||||||
|
},
|
||||||
|
"Outer Text\n:INNER:\nInner Text\n"
|
||||||
|
)
|
||||||
|
))
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -453,5 +453,5 @@ fn parse_properties_drawer_() {
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.collect::<HashMap<_, _>>()
|
.collect::<HashMap<_, _>>()
|
||||||
))
|
))
|
||||||
)
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -644,7 +644,7 @@ pub fn blank_lines_count(input: &str) -> (&str, usize) {
|
||||||
// line, including the terminal \n if one is present. Unlike org-mode (but like
|
// line, including the terminal \n if one is present. Unlike org-mode (but like
|
||||||
// org-element), we accept '\n' and EOF to terminate the stars. Returns the
|
// org-element), we accept '\n' and EOF to terminate the stars. Returns the
|
||||||
// number of stars. Must only be called at the start of a line.
|
// number of stars. Must only be called at the start of a line.
|
||||||
fn parse_headline_level_le(input: &str, max_level: usize) -> IResult<&str, usize, ()> {
|
pub(crate) fn parse_headline_level_le(input: &str, max_level: usize) -> IResult<&str, usize, ()> {
|
||||||
let (input, level) = verify(
|
let (input, level) = verify(
|
||||||
map(is_a("*"), |s: &str| s.chars().count()),
|
map(is_a("*"), |s: &str| s.chars().count()),
|
||||||
|level: &usize| *level <= max_level,
|
|level: &usize| *level <= max_level,
|
||||||
|
@ -667,31 +667,44 @@ fn line_length(input: &str) -> IResult<&str, usize, ()> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> {
|
// Returns all text until a headline with level <= max_level is found. Must
|
||||||
// Consume the headline.
|
// start at the start of the line. Can return nothing if immediately at a
|
||||||
let (text, level) = parse_headline_level_le(input, std::usize::MAX).ok()?;
|
// headline.
|
||||||
|
//
|
||||||
|
// This is a separate function from lines_while/lines_until because we need to
|
||||||
|
// treat EOF differently from EOL when the file ends with \r.
|
||||||
|
pub fn lines_until_headline_at_level_le(
|
||||||
|
input: &str,
|
||||||
|
max_level: usize,
|
||||||
|
) -> IResult<&str, (&str, usize), ()> {
|
||||||
// Collect lines until EOF or a headline.
|
// Collect lines until EOF or a headline.
|
||||||
let mut last = 0;
|
let mut last = 0;
|
||||||
for i in memchr_iter(b'\n', text.as_bytes()) {
|
for i in memchr_iter(b'\n', input.as_bytes()) {
|
||||||
// Check the first byte after the newline to skip parsing unnecessarily.
|
// Check the first byte after the newline to skip parsing unnecessarily.
|
||||||
if text.as_bytes()[last] == b'*' && parse_headline_level_le(&text[last..], level).is_ok() {
|
if input.as_bytes()[last] == b'*'
|
||||||
|
&& parse_headline_level_le(&input[last..], max_level).is_ok()
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
last = i + 1;
|
last = i + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if last < text.len() && parse_headline_level_le(&text[last..], level).is_err() {
|
if last < input.len() && parse_headline_level_le(&input[last..], max_level).is_err() {
|
||||||
Some(("", (input, level)))
|
Ok(("", (input, max_level)))
|
||||||
} else {
|
} else {
|
||||||
Some((
|
Ok((&input[last..], (&input[..last], max_level)))
|
||||||
&text[last..],
|
|
||||||
(&input[..(input.len() - text.len()) + last], level),
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> {
|
||||||
|
// Consume the headline.
|
||||||
|
let (text, level) = parse_headline_level_le(input, std::usize::MAX).ok()?;
|
||||||
|
let (text, _content) = lines_until_headline_at_level_le(text, level).ok()?;
|
||||||
|
let split = input.len() - text.len();
|
||||||
|
Some((&input[split..], (&input[..split], level)))
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
18
tests/issue_24.rs
Normal file
18
tests/issue_24.rs
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
use orgize::Org;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn headline_in_drawer() {
|
||||||
|
// https://github.com/PoiScript/orgize/issues/24
|
||||||
|
// A drawer may not contain a headline.
|
||||||
|
const STARS: &str = "****";
|
||||||
|
for h1 in 1..STARS.len() {
|
||||||
|
for h2 in 1..STARS.len() {
|
||||||
|
let org = crate::Org::parse_string(format!(
|
||||||
|
"{} Hello\n:PROPERTIES:\n{} World\n:END:",
|
||||||
|
&STARS[..h1],
|
||||||
|
&STARS[..h2]
|
||||||
|
));
|
||||||
|
assert_eq!(org.headlines().count(), 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue