diff --git a/README.md b/README.md index 4e2813e..6603903 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A Rust library for parsing orgmode files. -Live demo: https://orgize.herokuapp.com/ +[Live demo](https://orgize.herokuapp.com/) ## Parse @@ -27,7 +27,7 @@ Org::parse_with_config( "* TASK Title 1", &ParseConfig { // custom todo keywords - todo_keywords: vec!["TASK".to_string()], + todo_keywords: (vec!["TASK".to_string()], vec![]), ..Default::default() }, ); @@ -157,7 +157,7 @@ And handler will silently ignores all end events from non-container elements. So if you want to change how a non-container element renders, just redefine the `start` function and leave the `end` function unchanged. -# Serde +## Serde `Org` struct have already implemented serde's `Serialize` trait. It means you can serialize it into any format supported by serde, such as json: @@ -195,7 +195,7 @@ println!("{}", to_string(&org).unwrap()); ## Features -By now, orgize provides two features: +By now, orgize provides three features: + `ser`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default. diff --git a/src/config.rs b/src/config.rs index b5b057d..955252d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,19 +1,14 @@ -//! Parse configuration module - /// Parse configuration #[derive(Clone, Debug)] pub struct ParseConfig { - /// Headline's todo keywords, todo type - pub todo_keywords: Vec, - /// Headline's todo keywords, done type - pub done_keywords: Vec, + /// Headline's todo keywords + pub todo_keywords: (Vec, Vec), } impl Default for ParseConfig { fn default() -> Self { ParseConfig { - todo_keywords: vec![String::from("TODO")], - done_keywords: vec![String::from("DONE")], + todo_keywords: (vec![String::from("TODO")], vec![String::from("DONE")]), } } } diff --git a/src/elements/mod.rs b/src/elements/mod.rs index 56e1510..587a2c5 100644 --- a/src/elements/mod.rs +++ b/src/elements/mod.rs @@ -50,7 +50,7 @@ pub use self::{ use std::borrow::Cow; -/// Orgize Element Enum +/// Element Enum #[derive(Debug)] #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] diff --git a/src/elements/timestamp.rs b/src/elements/timestamp.rs index 8e0c8bc..d87173f 100644 --- a/src/elements/timestamp.rs +++ b/src/elements/timestamp.rs @@ -9,7 +9,7 @@ use nom::{ IResult, }; -/// Orgize Datetime Struct +/// Datetime Struct #[cfg_attr(test, derive(PartialEq))] #[cfg_attr(feature = "ser", derive(serde::Serialize))] #[derive(Debug, Clone)] diff --git a/src/elements/title.rs b/src/elements/title.rs index 134baf3..9d09fc5 100644 --- a/src/elements/title.rs +++ b/src/elements/title.rs @@ -14,9 +14,11 @@ use nom::{ Err, IResult, }; -use crate::config::ParseConfig; -use crate::elements::{drawer::parse_drawer, Planning, Timestamp}; -use crate::parsers::{line, skip_empty_lines, take_one_word}; +use crate::{ + config::ParseConfig, + elements::{drawer::parse_drawer, Planning, Timestamp}, + parsers::{line, skip_empty_lines, take_one_word}, +}; /// Title Elemenet #[cfg_attr(test, derive(PartialEq))] @@ -28,10 +30,10 @@ pub struct Title<'a> { /// Headline priority cookie #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] pub priority: Option, - /// Headline title tags, including the sparated colons + /// Headline title tags #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Vec::is_empty"))] pub tags: Vec>, - /// Headline title keyword + /// Headline todo keyword #[cfg_attr(feature = "ser", serde(skip_serializing_if = "Option::is_none"))] pub keyword: Option>, /// Raw headline's text, without the stars and the tags @@ -130,8 +132,8 @@ fn parse_title<'a, E: ParseError<&'a str>>( let (input, keyword) = opt(preceded( space1, verify(take_one_word, |s: &str| { - config.todo_keywords.iter().any(|x| x == s) - || config.done_keywords.iter().any(|x| x == s) + config.todo_keywords.0.iter().any(|x| x == s) + || config.todo_keywords.1.iter().any(|x| x == s) }), ))(input)?; @@ -353,7 +355,7 @@ fn parse_title_() { parse_title::>( "**** DONE Title", &ParseConfig { - done_keywords: vec![], + todo_keywords: (vec![], vec![]), ..Default::default() } ), @@ -377,7 +379,7 @@ fn parse_title_() { parse_title::>( "**** TASK [#A] Title", &ParseConfig { - todo_keywords: vec!["TASK".to_string()], + todo_keywords: (vec!["TASK".to_string()], vec![]), ..Default::default() } ), diff --git a/src/export/html.rs b/src/export/html.rs index 5a1e423..65829e1 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -6,19 +6,32 @@ use jetscii::{bytes, BytesConst}; use crate::elements::Element; use crate::export::write_datetime; +/// A wrapper for escaping sensitive characters in html. +/// +/// ```rust +/// use orgize::export::html::Escape; +/// +/// assert_eq!(format!("{}", Escape("< < <")), "< < <"); +/// assert_eq!( +/// format!("{}", Escape("")), +/// "<script>alert('Hello XSS')</script>" +/// ); +/// ``` pub struct Escape>(pub S); impl> fmt::Display for Escape { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut pos = 0; - let bytes = self.0.as_ref().as_bytes(); + + let content = self.0.as_ref(); + let bytes = content.as_bytes(); lazy_static::lazy_static! { static ref ESCAPE_BYTES: BytesConst = bytes!(b'<', b'>', b'&', b'\'', b'"'); } while let Some(off) = ESCAPE_BYTES.find(&bytes[pos..]) { - write!(f, "{}", &self.0.as_ref()[pos..pos + off])?; + write!(f, "{}", &content[pos..pos + off])?; pos += off + 1; @@ -26,13 +39,13 @@ impl> fmt::Display for Escape { b'<' => write!(f, "<")?, b'>' => write!(f, ">")?, b'&' => write!(f, "&")?, - b'\'' => write!(f, "'")?, + b'\'' => write!(f, "'")?, b'"' => write!(f, """)?, _ => unreachable!(), } } - write!(f, "{}", &self.0.as_ref()[pos..]) + write!(f, "{}", &content[pos..]) } } diff --git a/src/export/mod.rs b/src/export/mod.rs index de5da3b..f42b111 100644 --- a/src/export/mod.rs +++ b/src/export/mod.rs @@ -3,8 +3,8 @@ pub mod html; pub mod org; -pub use html::*; -pub use org::*; +pub use html::{DefaultHtmlHandler, Escape as HtmlEscape, HtmlHandler}; +pub use org::{DefaultOrgHandler, OrgHandler}; use std::io::{Error, Write}; diff --git a/src/headline.rs b/src/headline.rs index 8ee7333..07fae0a 100644 --- a/src/headline.rs +++ b/src/headline.rs @@ -63,11 +63,10 @@ impl Document { self.0.last_child(org) } - pub fn set_section_content<'a, S: Into>>( - &mut self, - content: S, - org: &mut Org<'a>, - ) { + pub fn set_section_content<'a, S>(&mut self, content: S, org: &mut Org<'a>) + where + S: Into>, + { let sec_n = if let Some(sec_n) = self.0.sec_n { let children: Vec<_> = sec_n.children(&org.arena).collect(); for child in children { @@ -188,6 +187,7 @@ impl Headline { Element::Section => Some(n), _ => None, }); + Headline { lvl, hdl_n, @@ -226,7 +226,10 @@ impl Headline { } } - pub fn set_title_content<'a, S: Into>>(self, content: S, org: &mut Org<'a>) { + pub fn set_title_content<'a, S>(self, content: S, org: &mut Org<'a>) + where + S: Into>, + { let content = content.into(); let children: Vec<_> = self.ttl_n.children(&org.arena).collect(); @@ -258,11 +261,10 @@ impl Headline { org.debug_validate(); } - pub fn set_section_content<'a, S: Into>>( - &mut self, - content: S, - org: &mut Org<'a>, - ) { + pub fn set_section_content<'a, S>(&mut self, content: S, org: &mut Org<'a>) + where + S: Into>, + { let sec_n = if let Some(sec_n) = self.sec_n { let children: Vec<_> = sec_n.children(&org.arena).collect(); for child in children { @@ -471,3 +473,21 @@ impl Headline { } } } + +impl Org<'_> { + /// Return a `Document` + pub fn document(&self) -> Document { + Document::from_org(self) + } + + /// Return an iterator of `Headline` + pub fn headlines(&self) -> impl Iterator + '_ { + self.root + .descendants(&self.arena) + .skip(1) + .filter_map(move |node| match &self.arena[node].get() { + Element::Headline { level } => Some(Headline::from_node(node, *level, self)), + _ => None, + }) + } +} diff --git a/src/lib.rs b/src/lib.rs index d7148b1..8328c62 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ //! A Rust library for parsing orgmode files. //! -//! Live demo: https://orgize.herokuapp.com/ +//! [Live demo](https://orgize.herokuapp.com/) //! //! # Parse //! @@ -18,14 +18,14 @@ //! //! [`Org::parse_with_config`]: org/struct.Org.html#method.parse_with_config //! -//! ``` rust +//! ```rust //! use orgize::{Org, ParseConfig}; //! //! Org::parse_with_config( //! "* TASK Title 1", //! &ParseConfig { //! // custom todo keywords -//! todo_keywords: vec!["TASK".to_string()], +//! todo_keywords: (vec!["TASK".to_string()], vec![]), //! ..Default::default() //! }, //! ); @@ -206,7 +206,7 @@ //! //! # Features //! -//! By now, orgize provides two features: +//! By now, orgize provides three features: //! //! + `ser`: adds the ability to serialize `Org` and other elements using `serde`, enabled by default. //! diff --git a/src/org.rs b/src/org.rs index 5cdc33d..0a24de0 100644 --- a/src/org.rs +++ b/src/org.rs @@ -5,7 +5,6 @@ use crate::{ config::{ParseConfig, DEFAULT_CONFIG}, elements::Element, export::{DefaultHtmlHandler, DefaultOrgHandler, HtmlHandler, OrgHandler}, - headline::{Document, Headline}, parsers::{parse_container, Container}, }; @@ -35,13 +34,13 @@ impl<'a> Org<'a> { } /// Create a new Org struct from parsing `text`, using a custom ParseConfig - pub fn parse_with_config(content: &'a str, config: &ParseConfig) -> Org<'a> { + pub fn parse_with_config(text: &'a str, config: &ParseConfig) -> Org<'a> { let mut org = Org::new(); parse_container( &mut org.arena, Container::Document { - content, + content: text, node: org.root, }, config, @@ -52,22 +51,6 @@ impl<'a> Org<'a> { org } - /// Return a `Document` - pub fn document(&self) -> Document { - Document::from_org(self) - } - - /// Return an iterator of `Headline` - pub fn headlines<'b>(&'b self) -> impl Iterator + 'b { - self.root - .descendants(&self.arena) - .skip(1) - .filter_map(move |node| match &self.arena[node].get() { - Element::Headline { level } => Some(Headline::from_node(node, *level, self)), - _ => None, - }) - } - /// Return a refrence to underlay arena pub fn arena(&self) -> &Arena> { &self.arena @@ -86,8 +69,11 @@ impl<'a> Org<'a> { }) } - pub fn html(&self, wrtier: W) -> Result<(), Error> { - self.html_with_handler(wrtier, &mut DefaultHtmlHandler) + pub fn html(&self, writer: W) -> Result<(), Error> + where + W: Write, + { + self.html_with_handler(writer, &mut DefaultHtmlHandler) } pub fn html_with_handler(&self, mut writer: W, handler: &mut H) -> Result<(), E> @@ -106,8 +92,11 @@ impl<'a> Org<'a> { Ok(()) } - pub fn org(&self, wrtier: W) -> Result<(), Error> { - self.org_with_handler(wrtier, &mut DefaultOrgHandler) + pub fn org(&self, writer: W) -> Result<(), Error> + where + W: Write, + { + self.org_with_handler(writer, &mut DefaultOrgHandler) } pub fn org_with_handler(&self, mut writer: W, handler: &mut H) -> Result<(), E> diff --git a/src/parsers.rs b/src/parsers.rs index e87f8c8..244e8ca 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -362,22 +362,22 @@ pub fn match_block<'a, T: ElementArena<'a>>( parent: NodeId, containers: &mut Vec>, name: Cow<'a, str>, - args: Option>, + parameters: Option>, content: &'a str, ) { match &*name.to_uppercase() { "CENTER" => { - let node = arena.append_element(CenterBlock { parameters: args }, parent); + let node = arena.append_element(CenterBlock { parameters }, parent); containers.push(Container::Block { content, node }); } "QUOTE" => { - let node = arena.append_element(QuoteBlock { parameters: args }, parent); + let node = arena.append_element(QuoteBlock { parameters }, parent); containers.push(Container::Block { content, node }); } "COMMENT" => { arena.append_element( CommentBlock { - data: args, + data: parameters, contents: content.into(), }, parent, @@ -386,7 +386,7 @@ pub fn match_block<'a, T: ElementArena<'a>>( "EXAMPLE" => { arena.append_element( ExampleBlock { - data: args, + data: parameters, contents: content.into(), }, parent, @@ -395,14 +395,14 @@ pub fn match_block<'a, T: ElementArena<'a>>( "EXPORT" => { arena.append_element( ExportBlock { - data: args.unwrap_or_default(), + data: parameters.unwrap_or_default(), contents: content.into(), }, parent, ); } "SRC" => { - let (language, arguments) = match &args { + let (language, arguments) = match ¶meters { Some(Cow::Borrowed(args)) => { let (language, arguments) = args.split_at(args.find(' ').unwrap_or_else(|| args.len())); @@ -421,17 +421,11 @@ pub fn match_block<'a, T: ElementArena<'a>>( ); } "VERSE" => { - let node = arena.append_element(VerseBlock { parameters: args }, parent); + let node = arena.append_element(VerseBlock { parameters }, parent); containers.push(Container::Block { content, node }); } _ => { - let node = arena.append_element( - SpecialBlock { - parameters: args, - name, - }, - parent, - ); + let node = arena.append_element(SpecialBlock { parameters, name }, parent); containers.push(Container::Block { content, node }); } } @@ -439,7 +433,7 @@ pub fn match_block<'a, T: ElementArena<'a>>( struct InlinePositions<'a> { bytes: &'a [u8], - position: usize, + pos: usize, next: Option, } @@ -447,7 +441,7 @@ impl InlinePositions<'_> { fn new(bytes: &[u8]) -> InlinePositions { InlinePositions { bytes, - position: 0, + pos: 0, next: Some(0), } } @@ -463,16 +457,16 @@ impl Iterator for InlinePositions<'_> { } self.next.take().or_else(|| { - PRE_BYTES.find(&self.bytes[self.position..]).map(|i| { - self.position += i + 1; + PRE_BYTES.find(&self.bytes[self.pos..]).map(|i| { + self.pos += i + 1; - match self.bytes[self.position - 1] { + match self.bytes[self.pos - 1] { b'{' => { - self.next = Some(self.position); - self.position - 1 + self.next = Some(self.pos); + self.pos - 1 } - b' ' | b'(' | b'\'' | b'"' | b'\n' => self.position, - _ => self.position - 1, + b' ' | b'(' | b'\'' | b'"' | b'\n' => self.pos, + _ => self.pos - 1, } }) }) @@ -683,7 +677,9 @@ pub fn line<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E } pub fn eol<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&str, &str, E> { - verify(line, |s: &str| s.trim().is_empty())(input) + verify(line, |s: &str| { + s.as_bytes().iter().all(|c| c.is_ascii_whitespace()) + })(input) } pub fn take_lines_while(predicate: impl Fn(&str) -> bool) -> impl Fn(&str) -> (&str, &str) { @@ -708,7 +704,7 @@ pub fn take_lines_while(predicate: impl Fn(&str) -> bool) -> impl Fn(&str) -> (& } pub fn skip_empty_lines(input: &str) -> &str { - take_lines_while(|line| line.trim().is_empty())(input).0 + take_lines_while(|line| line.as_bytes().iter().all(|c| c.is_ascii_whitespace()))(input).0 } pub fn parse_headline(input: &str) -> Option<(&str, (&str, usize))> { diff --git a/src/validate.rs b/src/validate.rs index 559a536..44edc80 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,7 +1,7 @@ use indextree::NodeId; use std::ops::RangeInclusive; -use crate::elements::{Element, Table, TableRow, Title}; +use crate::elements::{Element, Table, TableRow}; use crate::Org; /// Validation Error @@ -104,8 +104,8 @@ impl Org<'_> { errors.push(ValidationError::ExpectedChildren { at: node_id }); } } - Element::Title(Title { raw, .. }) => { - if !raw.is_empty() && node.first_child().is_none() { + Element::Title(title) => { + if !title.raw.is_empty() && node.first_child().is_none() { errors.push(ValidationError::ExpectedChildren { at: node_id }); } }