Merge branch 'plain_list_perf_investigation'
All checks were successful
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
rustfmt Build rustfmt has succeeded

This commit is contained in:
Tom Alexander 2023-08-25 01:10:04 -04:00
commit fc79507ef3
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
17 changed files with 119 additions and 71 deletions

View File

@ -44,7 +44,7 @@ fn path_angle<'r, 's>(
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &path_angle_end,
}));

View File

@ -90,7 +90,7 @@ fn global_prefix<'r, 's>(
depth: 0,
}))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &global_prefix_end,
}));
let (remaining, (children, _exit_contents)) = verify(
@ -151,7 +151,7 @@ fn global_suffix<'r, 's>(
depth: 0,
}))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &global_suffix_end,
}));
let (remaining, (children, _exit_contents)) = verify(

View File

@ -76,7 +76,7 @@ fn key_prefix<'r, 's>(
depth: 0,
}))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &key_prefix_end,
}));
let (remaining, (children, _exit_contents)) = verify(
@ -101,7 +101,7 @@ fn key_suffix<'r, 's>(
depth: 0,
}))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &key_suffix_end,
}));
let (remaining, (children, _exit_contents)) = verify(

View File

@ -21,16 +21,19 @@ use super::lesser_block::src_block;
use super::lesser_block::verse_block;
use super::org_source::OrgSource;
use super::paragraph::paragraph;
use super::plain_list::detect_plain_list;
use super::plain_list::plain_list;
use super::source::SetSource;
use super::util::get_consumed;
use super::util::maybe_consume_trailing_whitespace_if_not_exiting;
use super::Context;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::table::org_mode_table;
pub fn element(
pub const fn element(
can_be_paragraph: bool,
) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res<OrgSource<'s>, Element<'s>> {
move |context: Context, input: OrgSource<'_>| _element(context, input, can_be_paragraph)
@ -108,3 +111,26 @@ fn _element<'r, 's>(
Ok((remaining, element))
}
pub const fn detect_element(
can_be_paragraph: bool,
) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
move |context: Context, input: OrgSource<'_>| _detect_element(context, input, can_be_paragraph)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _detect_element<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
can_be_paragraph: bool,
) -> Res<OrgSource<'s>, ()> {
if detect_plain_list(context, input).is_ok() {
return Ok((input, ()));
}
if _element(context, input, can_be_paragraph).is_ok() {
return Ok((input, ()));
}
return Err(nom::Err::Error(CustomError::MyError(MyError(
"No element detected.".into(),
))));
}

View File

@ -8,6 +8,9 @@ pub enum ExitClass {
/// Elements who cede priority to alpha elements when matching.
Beta = 300,
/// Elements who cede priority to alpha and beta elements when matching.
Gamma = 4000,
}
impl std::fmt::Display for ExitClass {

View File

@ -27,7 +27,7 @@ pub fn export_snippet<'r, 's>(
let (remaining, backend_name) = backend(context, remaining)?;
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &export_snippet_end,
}));
let (remaining, backend_contents) = opt(tuple((

View File

@ -49,7 +49,7 @@ fn name<'r, 's>(
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &name_end,
}));
let (remaining, name) = recognize(many_till(
@ -80,7 +80,7 @@ fn header<'r, 's>(
depth: 0,
}))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &header_end,
}));
@ -131,7 +131,7 @@ fn argument<'r, 's>(
depth: 0,
}))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &argument_end,
}));

View File

@ -265,13 +265,14 @@ fn _lesser_block_end<'r, 's, 'x>(
Ok((remaining, source))
}
fn lesser_block_begin(
current_name: &str,
/// Parser for the beginning of a lesser block
///
/// current_name MUST be lowercase. We do not do the conversion ourselves because it is not allowed in a const fn.
const fn lesser_block_begin(
current_name: &'static str,
) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
let current_name_lower = current_name.to_lowercase();
move |context: Context, input: OrgSource<'_>| {
_lesser_block_begin(context, input, current_name_lower.as_str())
}
// TODO: Since this is a const fn, is there ANY way to "generate" functions at compile time?
move |context: Context, input: OrgSource<'_>| _lesser_block_begin(context, input, current_name)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]

View File

@ -1,6 +1,5 @@
use nom::branch::alt;
use nom::combinator::map;
use nom::combinator::not;
use super::org_source::OrgSource;
use super::parser_with_context::parser_with_context;
@ -34,8 +33,6 @@ pub fn standard_set_object<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Object<'s>> {
not(|i| context.check_exit_matcher(i))(input)?;
alt((
map(parser_with_context!(timestamp)(context), Object::Timestamp),
map(parser_with_context!(subscript)(context), Object::Subscript),
@ -93,8 +90,6 @@ pub fn minimal_set_object<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Object<'s>> {
not(|i| context.check_exit_matcher(i))(input)?;
alt((
map(parser_with_context!(subscript)(context), Object::Subscript),
map(
@ -116,7 +111,6 @@ pub fn any_object_except_plain_text<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Object<'s>> {
// Used for exit matchers so this does not check exit matcher condition.
alt((
map(parser_with_context!(timestamp)(context), Object::Timestamp),
map(parser_with_context!(subscript)(context), Object::Subscript),

View File

@ -6,13 +6,13 @@ use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::element_parser::detect_element;
use super::lesser_element::Paragraph;
use super::org_source::OrgSource;
use super::util::blank_line;
use super::util::get_consumed;
use super::Context;
use crate::error::Res;
use crate::parser::element_parser::element;
use crate::parser::exiting::ExitClass;
use crate::parser::object_parser::standard_set_object;
use crate::parser::parser_context::ContextElement;
@ -28,7 +28,7 @@ pub fn paragraph<'r, 's>(
) -> Res<OrgSource<'s>, Paragraph<'s>> {
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &paragraph_end,
}));
let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);
@ -57,7 +57,7 @@ fn paragraph_end<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let non_paragraph_element_matcher = parser_with_context!(element(false))(context);
let non_paragraph_element_matcher = parser_with_context!(detect_element(false))(context);
alt((
recognize(tuple((start_of_line, many1(blank_line)))),
recognize(non_paragraph_element_matcher),

View File

@ -62,7 +62,7 @@ impl<'r, 's> ContextTree<'r, 's> {
// exit_matcher: ChainBehavior::IgnoreParent(Some(&always_fail)),
// }));
let mut current_class_filter = ExitClass::Beta;
let mut current_class_filter = ExitClass::Gamma;
for current_node in self.iter() {
let context_element = current_node.get_data();
match context_element {
@ -112,9 +112,6 @@ pub enum ContextElement<'r, 's> {
ExitMatcherNode(ExitMatcherNode<'r>),
Context(&'r str),
/// Stores the indentation level of the current list item.
ListItem(usize),
/// Stores the name of the greater block.
GreaterBlock(&'s str),

View File

@ -113,7 +113,7 @@ fn path_plain<'r, 's>(
// TODO: "optionally containing parenthesis-wrapped non-whitespace non-bracket substrings up to a depth of two. The string must end with either a non-punctation non-whitespace character, a forwards slash, or a parenthesis-wrapped substring"
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &path_plain_end,
}));

View File

@ -32,6 +32,27 @@ use crate::parser::util::get_consumed;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::start_of_line;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn detect_plain_list<'r, 's>(
_context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
// TODO: Add support for plain list items that do not have content on the first line.
if verify(
tuple((start_of_line, space0, bullet, space1)),
|(_start, indent, bull, _after_whitespace)| {
Into::<&str>::into(bull) != "*" || indent.len() > 0
},
)(input)
.is_ok()
{
return Ok((input, ()));
}
return Err(nom::Err::Error(CustomError::MyError(MyError(
"No element detected.".into(),
))));
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn plain_list<'r, 's>(
context: Context<'r, 's>,
@ -90,9 +111,6 @@ pub fn plain_list<'r, 's>(
parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
children.push((final_child_start, reparsed_final_item));
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
@ -116,6 +134,7 @@ pub fn plain_list_item<'r, 's>(
Into::<&str>::into(bull) != "*" || indent_level > 0
})(remaining)?;
// TODO: This isn't taking into account items that immediately line break and then have contents
let maybe_contentless_item: Res<OrgSource<'_>, OrgSource<'_>> =
alt((eof, line_ending))(remaining);
match maybe_contentless_item {
@ -135,12 +154,12 @@ pub fn plain_list_item<'r, 's>(
};
let (remaining, _ws) = space1(remaining)?;
let exit_matcher = plain_list_item_end(indent_level);
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_item_end,
exit_matcher: &exit_matcher,
}));
let (remaining, (children, _exit_contents)) = verify(
@ -194,47 +213,55 @@ fn plain_list_end<'r, 's>(
)))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn plain_list_item_end<'r, 's>(
const fn plain_list_item_end(
indent_level: usize,
) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
let line_indented_lte_matcher = line_indented_lte(indent_level);
move |context: Context, input: OrgSource<'_>| {
_plain_list_item_end(context, input, &line_indented_lte_matcher)
}
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(line_indented_lte_matcher))
)]
fn _plain_list_item_end<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
line_indented_lte_matcher: impl for<'rr, 'ss> Fn(
Context<'rr, 'ss>,
OrgSource<'ss>,
) -> Res<OrgSource<'ss>, OrgSource<'ss>>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
start_of_line(input)?;
recognize(tuple((
opt(blank_line),
parser_with_context!(line_indented_lte)(context),
parser_with_context!(line_indented_lte_matcher)(context),
)))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn line_indented_lte<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let current_item_indent_level: &usize =
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
"Not inside a plain list item".into(),
))))?;
const fn line_indented_lte(
indent_level: usize,
) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
move |context: Context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _line_indented_lte<'r, 's>(
_context: Context<'r, 's>,
input: OrgSource<'s>,
indent_level: usize,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let matched = recognize(verify(
tuple((space0::<OrgSource<'_>, _>, non_whitespace_character)),
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
|(_space0, _anychar)| _space0.len() <= indent_level,
))(input)?;
Ok(matched)
}
fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
for thing in context.iter() {
match thing.get_data() {
ContextElement::ListItem(depth) => return Some(depth),
_ => {}
};
}
None
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -91,7 +91,7 @@ pub fn radio_target<'r, 's>(
let (remaining, _opening) = tag("<<<")(input)?;
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &radio_target_end,
}));

View File

@ -162,7 +162,7 @@ fn script_with_braces<'r, 's>(
},
))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &script_with_braces_end,
}));

View File

@ -179,7 +179,7 @@ fn _text_markup_object<'r, 's, 'x>(
let text_markup_end_specialized = text_markup_end(open.into());
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &text_markup_end_specialized,
}));
@ -229,7 +229,7 @@ fn _text_markup_string<'r, 's, 'x>(
let text_markup_end_specialized = text_markup_end(open.into());
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &text_markup_end_specialized,
}));
@ -338,7 +338,7 @@ fn _rematch_text_markup_object<'r, 's, 'x>(
let text_markup_end_specialized = text_markup_end(open.into());
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &text_markup_end_specialized,
}));

View File

@ -67,7 +67,7 @@ fn sexp<'r, 's>(
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &sexp_end,
}));
@ -99,7 +99,7 @@ fn active_timestamp<'r, 's>(
let (remaining, _date) = date(context, remaining)?;
let time_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &active_time_rest_end,
}));
let (remaining, _time) =
@ -132,7 +132,7 @@ fn inactive_timestamp<'r, 's>(
let (remaining, _date) = date(context, remaining)?;
let time_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &inactive_time_rest_end,
}));
let (remaining, _time) =
@ -186,12 +186,12 @@ fn active_time_range_timestamp<'r, 's>(
let (remaining, _date) = date(context, remaining)?;
let time_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &active_time_rest_end,
}));
let first_time_context =
time_context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &time_range_rest_end,
}));
let (remaining, _first_time) =
@ -247,12 +247,12 @@ fn inactive_time_range_timestamp<'r, 's>(
let (remaining, _date) = date(context, remaining)?;
let time_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &inactive_time_rest_end,
}));
let first_time_context =
time_context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &time_range_rest_end,
}));
let (remaining, _first_time) =
@ -303,7 +303,7 @@ fn dayname<'r, 's>(
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
class: ExitClass::Gamma,
exit_matcher: &dayname_end,
}));