From 020548fad91b8fd18a99c7408c983ddf4f06266a Mon Sep 17 00:00:00 2001 From: PoiScript Date: Tue, 14 Apr 2020 17:55:19 +0800 Subject: [PATCH] feat(parse): create combinators module --- src/parse/combinators.rs | 159 +++++++++++++++++++++++++++++++++++++++ src/parse/mod.rs | 1 + 2 files changed, 160 insertions(+) create mode 100644 src/parse/combinators.rs create mode 100644 src/parse/mod.rs diff --git a/src/parse/combinators.rs b/src/parse/combinators.rs new file mode 100644 index 0000000..74b48e1 --- /dev/null +++ b/src/parse/combinators.rs @@ -0,0 +1,159 @@ +//! Parsers combinators + +use memchr::memchr; +use nom::{ + bytes::complete::take_while1, + combinator::verify, + error::{ErrorKind, ParseError}, + Err, IResult, +}; + +// read until the first line_ending, if line_ending is not present, return the input directly +pub fn line<'a, E>(input: &'a str) -> IResult<&'a str, &'a str, E> +where + E: ParseError<&'a str>, +{ + if let Some(i) = memchr(b'\n', input.as_bytes()) { + if i > 0 && input.as_bytes()[i - 1] == b'\r' { + Ok((&input[i + 1..], &input[0..i - 1])) + } else { + Ok((&input[i + 1..], &input[0..i])) + } + } else { + Ok(("", input)) + } +} + +pub fn lines_till<'a, F, E>(predicate: F) -> impl Fn(&'a str) -> IResult<&str, &str, E> +where + F: Fn(&str) -> bool, + E: ParseError<&'a str>, +{ + move |i| { + let mut input = i; + + loop { + // TODO: better error kind + if input.is_empty() { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0))); + } + + let (input_, line_) = line(input)?; + + debug_assert_ne!(input, input_); + + if predicate(line_) { + let offset = i.len() - input.len(); + return Ok((input_, &i[0..offset])); + } + + input = input_; + } + } +} + +pub fn lines_while<'a, F, E>(predicate: F) -> impl Fn(&'a str) -> IResult<&str, &str, E> +where + F: Fn(&str) -> bool, + E: ParseError<&'a str>, +{ + move |i| { + let mut input = i; + + loop { + // unlike lines_till, line_while won't return error + if input.is_empty() { + return Ok(("", i)); + } + + let (input_, line_) = line(input)?; + + debug_assert_ne!(input, input_); + + if !predicate(line_) { + let offset = i.len() - input.len(); + return Ok((input, &i[0..offset])); + } + + input = input_; + } + } +} + +#[test] +fn test_lines_while() { + assert_eq!( + lines_while::<_, ()>(|line| line == "foo")("foo"), + Ok(("", "foo")) + ); + assert_eq!( + lines_while::<_, ()>(|line| line == "foo")("bar"), + Ok(("bar", "")) + ); + assert_eq!( + lines_while::<_, ()>(|line| line == "foo")("foo\n\n"), + Ok(("\n", "foo\n")) + ); + assert_eq!( + lines_while::<_, ()>(|line| line.trim().is_empty())("\n\n\n"), + Ok(("", "\n\n\n")) + ); +} + +pub fn eol<'a, E>(input: &'a str) -> IResult<&str, &str, E> +where + E: ParseError<&'a str>, +{ + verify(line, |s: &str| { + s.as_bytes().iter().all(u8::is_ascii_whitespace) + })(input) +} + +pub fn one_word<'a, E>(input: &'a str) -> IResult<&str, &str, E> +where + E: ParseError<&'a str>, +{ + take_while1(|c: char| !c.is_ascii_whitespace())(input) +} + +pub fn blank_lines_count<'a, E>(input: &'a str) -> IResult<&str, usize, E> +where + E: ParseError<&'a str>, +{ + let mut count = 0; + let mut input = input; + + loop { + if input.is_empty() { + return Ok(("", count)); + } + + let (input_, line_) = line(input)?; + + debug_assert_ne!(input, input_); + + if !line_.as_bytes().iter().all(u8::is_ascii_whitespace) { + return Ok((input, count)); + } + + count += 1; + + input = input_; + } +} + +#[test] +fn test_blank_lines_count() { + assert_eq!(blank_lines_count::<()>("foo"), Ok(("foo", 0))); + assert_eq!(blank_lines_count::<()>(" foo"), Ok((" foo", 0))); + assert_eq!(blank_lines_count::<()>(" \t\nfoo\n"), Ok(("foo\n", 1))); + assert_eq!( + blank_lines_count::<()>("\n \r\n\nfoo\n"), + Ok(("foo\n", 3)) + ); + assert_eq!( + blank_lines_count::<()>("\r\n \n \r\n foo\n"), + Ok((" foo\n", 3)) + ); + assert_eq!(blank_lines_count::<()>("\r\n \n \r\n \n"), Ok(("", 4))); +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 0000000..0c49327 --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1 @@ +pub mod combinators;