From 9c1e6ccc97e53995660e1a18eb7c1d1bf77f9ac9 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 25 Aug 2023 00:48:34 -0400 Subject: [PATCH] Add a detect_element function. This is an optimization. When you have something like plain text which ends when it hits the next element, we only need to parse enough to detect that an element is about to occur. For elements like plain lists, this is as simple as parsing a line starting with optional whitespace and then a bullet, which avoids parsing the entire plain list tree. The benefit is most noticeable in deeply nested plain lists. --- src/parser/element_parser.rs | 28 +++++++++++++++++++++++++++- src/parser/paragraph.rs | 4 ++-- src/parser/plain_list.rs | 23 ++++++++++++++++++++++- 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/parser/element_parser.rs b/src/parser/element_parser.rs index d781a7a..2c79e46 100644 --- a/src/parser/element_parser.rs +++ b/src/parser/element_parser.rs @@ -21,16 +21,19 @@ use super::lesser_block::src_block; use super::lesser_block::verse_block; use super::org_source::OrgSource; use super::paragraph::paragraph; +use super::plain_list::detect_plain_list; use super::plain_list::plain_list; use super::source::SetSource; use super::util::get_consumed; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; use super::Context; +use crate::error::CustomError; +use crate::error::MyError; use crate::error::Res; use crate::parser::parser_with_context::parser_with_context; use crate::parser::table::org_mode_table; -pub fn element( +pub const fn element( can_be_paragraph: bool, ) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res, Element<'s>> { move |context: Context, input: OrgSource<'_>| _element(context, input, can_be_paragraph) @@ -108,3 +111,26 @@ fn _element<'r, 's>( Ok((remaining, element)) } + +pub const fn detect_element( + can_be_paragraph: bool, +) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res, ()> { + move |context: Context, input: OrgSource<'_>| _detect_element(context, input, can_be_paragraph) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn _detect_element<'r, 's>( + context: Context<'r, 's>, + input: OrgSource<'s>, + can_be_paragraph: bool, +) -> Res, ()> { + if detect_plain_list(context, input).is_ok() { + return Ok((input, ())); + } + if _element(context, input, can_be_paragraph).is_ok() { + return Ok((input, ())); + } + return Err(nom::Err::Error(CustomError::MyError(MyError( + "No element detected.".into(), + )))); +} diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index 15b449d..9cfda4d 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -6,13 +6,13 @@ use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; +use super::element_parser::detect_element; use super::lesser_element::Paragraph; use super::org_source::OrgSource; use super::util::blank_line; use super::util::get_consumed; use super::Context; use crate::error::Res; -use crate::parser::element_parser::element; use crate::parser::exiting::ExitClass; use crate::parser::object_parser::standard_set_object; use crate::parser::parser_context::ContextElement; @@ -57,7 +57,7 @@ fn paragraph_end<'r, 's>( context: Context<'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { - let non_paragraph_element_matcher = parser_with_context!(element(false))(context); + let non_paragraph_element_matcher = parser_with_context!(detect_element(false))(context); alt(( recognize(tuple((start_of_line, many1(blank_line)))), recognize(non_paragraph_element_matcher), diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index ed41f0c..d39b8f7 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -32,6 +32,27 @@ use crate::parser::util::get_consumed; use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::parser::util::start_of_line; +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_plain_list<'r, 's>( + _context: Context<'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + // TODO: Add support for plain list items that do not have content on the first line. + if verify( + tuple((start_of_line, space0, bullet, space1)), + |(_start, indent, bull, _after_whitespace)| { + Into::<&str>::into(bull) != "*" || indent.len() > 0 + }, + )(input) + .is_ok() + { + return Ok((input, ())); + } + return Err(nom::Err::Error(CustomError::MyError(MyError( + "No element detected.".into(), + )))); +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn plain_list<'r, 's>( context: Context<'r, 's>, @@ -228,7 +249,7 @@ const fn line_indented_lte( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn _line_indented_lte<'r, 's>( - context: Context<'r, 's>, + _context: Context<'r, 's>, input: OrgSource<'s>, indent_level: usize, ) -> Res, OrgSource<'s>> {