Merge branch 'list_perf_improvement'
This commit is contained in:
commit
8db9038c53
@ -3,6 +3,7 @@ use nom::bytes::complete::tag;
|
|||||||
use nom::character::complete::anychar;
|
use nom::character::complete::anychar;
|
||||||
use nom::character::complete::digit1;
|
use nom::character::complete::digit1;
|
||||||
use nom::character::complete::line_ending;
|
use nom::character::complete::line_ending;
|
||||||
|
use nom::character::complete::multispace1;
|
||||||
use nom::character::complete::one_of;
|
use nom::character::complete::one_of;
|
||||||
use nom::character::complete::space0;
|
use nom::character::complete::space0;
|
||||||
use nom::character::complete::space1;
|
use nom::character::complete::space1;
|
||||||
@ -17,6 +18,7 @@ use nom::multi::many0;
|
|||||||
use nom::multi::many1;
|
use nom::multi::many1;
|
||||||
use nom::multi::many_till;
|
use nom::multi::many_till;
|
||||||
use nom::sequence::tuple;
|
use nom::sequence::tuple;
|
||||||
|
use nom::InputTake;
|
||||||
|
|
||||||
use super::affiliated_keyword::parse_affiliated_keywords;
|
use super::affiliated_keyword::parse_affiliated_keywords;
|
||||||
use super::element_parser::element;
|
use super::element_parser::element;
|
||||||
@ -25,6 +27,7 @@ use super::org_source::OrgSource;
|
|||||||
use super::util::include_input;
|
use super::util::include_input;
|
||||||
use super::util::indentation_level;
|
use super::util::indentation_level;
|
||||||
use super::util::non_whitespace_character;
|
use super::util::non_whitespace_character;
|
||||||
|
use crate::context::bind_context;
|
||||||
use crate::context::parser_with_context;
|
use crate::context::parser_with_context;
|
||||||
use crate::context::ContextElement;
|
use crate::context::ContextElement;
|
||||||
use crate::context::ContextMatcher;
|
use crate::context::ContextMatcher;
|
||||||
@ -80,6 +83,46 @@ where
|
|||||||
return Err(nom::Err::Error(CustomError::Static("No element detected.")));
|
return Err(nom::Err::Error(CustomError::Static("No element detected.")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(
|
||||||
|
feature = "tracing",
|
||||||
|
tracing::instrument(ret, level = "debug", skip(context))
|
||||||
|
)]
|
||||||
|
pub(crate) fn detect_not_plain_list_item_indent<'b, 'g, 'r, 's>(
|
||||||
|
context: RefContext<'b, 'g, 'r, 's>,
|
||||||
|
input: OrgSource<'s>,
|
||||||
|
) -> Res<OrgSource<'s>, (u16, OrgSource<'s>)> {
|
||||||
|
if let Ok((_remaining, (_, indent, _))) = tuple((
|
||||||
|
start_of_line,
|
||||||
|
parser_with_context!(indentation_level)(context),
|
||||||
|
not(tuple((
|
||||||
|
parser_with_context!(bullet)(context),
|
||||||
|
alt((space1, line_ending, eof)),
|
||||||
|
))),
|
||||||
|
))(input)
|
||||||
|
{
|
||||||
|
return Ok((input, indent));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Headlines are not plain list items.
|
||||||
|
if let Ok((_remaining, (_, indent, _))) = verify(
|
||||||
|
tuple((
|
||||||
|
start_of_line,
|
||||||
|
parser_with_context!(indentation_level)(context),
|
||||||
|
tuple((
|
||||||
|
parser_with_context!(bullet)(context),
|
||||||
|
alt((space1, line_ending, eof)),
|
||||||
|
)),
|
||||||
|
)),
|
||||||
|
|(_, (depth, _), ((_, bullet), _))| {
|
||||||
|
*depth == 0 && Into::<&str>::into(bullet).starts_with('*')
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
{
|
||||||
|
return Ok((input, indent));
|
||||||
|
}
|
||||||
|
return Err(nom::Err::Error(CustomError::Static("No element detected.")));
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
feature = "tracing",
|
feature = "tracing",
|
||||||
tracing::instrument(ret, level = "debug", skip(context, affiliated_keywords))
|
tracing::instrument(ret, level = "debug", skip(context, affiliated_keywords))
|
||||||
@ -120,7 +163,7 @@ where
|
|||||||
// While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
|
// While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
|
let list_item = plain_list_item(&parser_context, remaining);
|
||||||
match (&first_item_list_type, &list_item) {
|
match (&first_item_list_type, &list_item) {
|
||||||
(None, Ok((_remain, (list_type, _item)))) => {
|
(None, Ok((_remain, (list_type, _item)))) => {
|
||||||
let _ = first_item_list_type.insert(*list_type);
|
let _ = first_item_list_type.insert(*list_type);
|
||||||
@ -140,25 +183,17 @@ where
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
|
let maybe_exit = exit_matcher_parser(&parser_context, remaining);
|
||||||
if maybe_exit.is_ok() {
|
if maybe_exit.is_ok() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let (final_child_start, _final_item_first_parse) = match children.pop() {
|
if children.is_empty() {
|
||||||
Some(final_child) => final_child,
|
|
||||||
None => {
|
|
||||||
return Err(nom::Err::Error(CustomError::Static(
|
return Err(nom::Err::Error(CustomError::Static(
|
||||||
"Plain lists require at least one element.",
|
"Plain lists require at least one element.",
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
};
|
|
||||||
let final_item_context = ContextElement::ConsumeTrailingWhitespace(false);
|
|
||||||
let final_item_context = parser_context.with_additional_node(&final_item_context);
|
|
||||||
let (remaining, (_, reparsed_final_item)) =
|
|
||||||
parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
|
|
||||||
children.push((final_child_start, reparsed_final_item));
|
|
||||||
|
|
||||||
let (remaining, _trailing_ws) =
|
let (remaining, _trailing_ws) =
|
||||||
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
|
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
|
||||||
@ -187,10 +222,10 @@ fn plain_list_item<'b, 'g, 'r, 's>(
|
|||||||
) -> Res<OrgSource<'s>, (PlainListType, PlainListItem<'s>)> {
|
) -> Res<OrgSource<'s>, (PlainListType, PlainListItem<'s>)> {
|
||||||
start_of_line(input)?;
|
start_of_line(input)?;
|
||||||
let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?;
|
let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?;
|
||||||
let (remaining, (bullet_type, bull)) = verify(
|
let (remaining, (bullet_type, bull)) =
|
||||||
parser_with_context!(bullet)(context),
|
verify(bind_context!(bullet, context), |(_bullet_type, bull)| {
|
||||||
|(_bullet_type, bull)| !Into::<&str>::into(bull).starts_with('*') || indent_level > 0,
|
!Into::<&str>::into(bull).starts_with('*') || indent_level > 0
|
||||||
)(remaining)?;
|
})(remaining)?;
|
||||||
|
|
||||||
let (remaining, maybe_counter_set) =
|
let (remaining, maybe_counter_set) =
|
||||||
opt(tuple((space1, tag("[@"), counter_set_value, tag("]"))))(remaining)?;
|
opt(tuple((space1, tag("[@"), counter_set_value, tag("]"))))(remaining)?;
|
||||||
@ -199,7 +234,7 @@ fn plain_list_item<'b, 'g, 'r, 's>(
|
|||||||
let (remaining, maybe_checkbox) = opt(tuple((space1, item_checkbox)))(remaining)?;
|
let (remaining, maybe_checkbox) = opt(tuple((space1, item_checkbox)))(remaining)?;
|
||||||
|
|
||||||
let (remaining, maybe_tag) = if let BulletType::Unordered = bullet_type {
|
let (remaining, maybe_tag) = if let BulletType::Unordered = bullet_type {
|
||||||
opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)?
|
opt(tuple((space1, bind_context!(item_tag, context))))(remaining)?
|
||||||
} else {
|
} else {
|
||||||
(remaining, None)
|
(remaining, None)
|
||||||
};
|
};
|
||||||
@ -211,6 +246,12 @@ fn plain_list_item<'b, 'g, 'r, 's>(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let exit_matcher = plain_list_item_end(indent_level);
|
let exit_matcher = plain_list_item_end(indent_level);
|
||||||
|
let final_item_whitespace_cutoff = final_item_whitespace_cutoff(indent_level);
|
||||||
|
let final_whitespace_context = ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||||
|
class: ExitClass::Beta,
|
||||||
|
exit_matcher: &final_item_whitespace_cutoff,
|
||||||
|
});
|
||||||
|
let final_whitespace_context = context.with_additional_node(&final_whitespace_context);
|
||||||
let contexts = [
|
let contexts = [
|
||||||
ContextElement::ConsumeTrailingWhitespace(true),
|
ContextElement::ConsumeTrailingWhitespace(true),
|
||||||
ContextElement::ExitMatcherNode(ExitMatcherNode {
|
ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||||
@ -218,17 +259,21 @@ fn plain_list_item<'b, 'g, 'r, 's>(
|
|||||||
exit_matcher: &exit_matcher,
|
exit_matcher: &exit_matcher,
|
||||||
}),
|
}),
|
||||||
];
|
];
|
||||||
let parser_context = context.with_additional_node(&contexts[0]);
|
let parser_context = final_whitespace_context.with_additional_node(&contexts[0]);
|
||||||
let parser_context = parser_context.with_additional_node(&contexts[1]);
|
let parser_context = parser_context.with_additional_node(&contexts[1]);
|
||||||
|
|
||||||
let maybe_contentless_item: Res<OrgSource<'_>, ()> = peek(parser_with_context!(
|
let maybe_contentless_item: Res<OrgSource<'_>, ()> =
|
||||||
detect_contentless_item_contents
|
detect_contentless_item_contents(&parser_context, remaining);
|
||||||
)(&parser_context))(remaining);
|
|
||||||
if let Ok((_rem, _ws)) = maybe_contentless_item {
|
if let Ok((_rem, _ws)) = maybe_contentless_item {
|
||||||
let (remaining, _trailing_ws) = if context.should_consume_trailing_whitespace() {
|
let (remaining, _trailing_ws) = if tuple((
|
||||||
recognize(alt((recognize(many1(blank_line)), eof)))(remaining)?
|
blank_line,
|
||||||
} else {
|
bind_context!(final_item_whitespace_cutoff, context),
|
||||||
|
))(remaining)
|
||||||
|
.is_ok()
|
||||||
|
{
|
||||||
recognize(alt((blank_line, eof)))(remaining)?
|
recognize(alt((blank_line, eof)))(remaining)?
|
||||||
|
} else {
|
||||||
|
recognize(alt((recognize(many1(blank_line)), eof)))(remaining)?
|
||||||
};
|
};
|
||||||
let source = get_consumed(input, remaining);
|
let source = get_consumed(input, remaining);
|
||||||
return Ok((
|
return Ok((
|
||||||
@ -256,26 +301,14 @@ fn plain_list_item<'b, 'g, 'r, 's>(
|
|||||||
.filter(|b| *b == b'\n')
|
.filter(|b| *b == b'\n')
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
let (mut remaining, (mut children, _exit_contents)) = many_till(
|
let (remaining, (children, _exit_contents)) = many_till(
|
||||||
include_input(parser_with_context!(element(true))(&parser_context)),
|
include_input(bind_context!(element(true), &parser_context)),
|
||||||
parser_with_context!(exit_matcher_parser)(&parser_context),
|
bind_context!(exit_matcher_parser, &parser_context),
|
||||||
)(remaining)?;
|
)(remaining)?;
|
||||||
|
|
||||||
if !children.is_empty() && !context.should_consume_trailing_whitespace() {
|
// We have to use the parser_context here to include the whitespace cut-off
|
||||||
let final_item_context = ContextElement::ConsumeTrailingWhitespace(false);
|
|
||||||
let final_item_context = parser_context.with_additional_node(&final_item_context);
|
|
||||||
let (final_child_start, _original_final_child) = children
|
|
||||||
.pop()
|
|
||||||
.expect("if-statement already checked that children was non-empty.");
|
|
||||||
let (remain, reparsed_final_element) = include_input(parser_with_context!(element(true))(
|
|
||||||
&final_item_context,
|
|
||||||
))(final_child_start)?;
|
|
||||||
remaining = remain;
|
|
||||||
children.push(reparsed_final_element);
|
|
||||||
}
|
|
||||||
|
|
||||||
let (remaining, _trailing_ws) =
|
let (remaining, _trailing_ws) =
|
||||||
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
|
maybe_consume_trailing_whitespace_if_not_exiting(&final_whitespace_context, remaining)?;
|
||||||
|
|
||||||
let source = get_consumed(input, remaining);
|
let source = get_consumed(input, remaining);
|
||||||
return Ok((
|
return Ok((
|
||||||
@ -322,7 +355,7 @@ fn bullet<'b, 'g, 'r, 's>(
|
|||||||
map(tag("+"), |bull| (BulletType::Unordered, bull)),
|
map(tag("+"), |bull| (BulletType::Unordered, bull)),
|
||||||
map(
|
map(
|
||||||
recognize(tuple((
|
recognize(tuple((
|
||||||
parser_with_context!(counter)(context),
|
bind_context!(counter, context),
|
||||||
alt((tag("."), tag(")"))),
|
alt((tag("."), tag(")"))),
|
||||||
))),
|
))),
|
||||||
|bull| (BulletType::Ordered, bull),
|
|bull| (BulletType::Ordered, bull),
|
||||||
@ -377,6 +410,52 @@ fn counter_set_value<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, PlainListIt
|
|||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const fn final_item_whitespace_cutoff(indent_level: IndentationLevel) -> impl ContextMatcher {
|
||||||
|
move |context, input: OrgSource<'_>| {
|
||||||
|
impl_final_item_whitespace_cutoff(context, input, indent_level)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(
|
||||||
|
feature = "tracing",
|
||||||
|
tracing::instrument(ret, level = "debug", skip(context))
|
||||||
|
)]
|
||||||
|
fn impl_final_item_whitespace_cutoff<'b, 'g, 'r, 's>(
|
||||||
|
context: RefContext<'b, 'g, 'r, 's>,
|
||||||
|
input: OrgSource<'s>,
|
||||||
|
indent_level: IndentationLevel,
|
||||||
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||||
|
start_of_line(input)?;
|
||||||
|
// element!(plain_list_end, context, input);
|
||||||
|
|
||||||
|
if let Ok((_remaining, _)) = verify(
|
||||||
|
tuple((
|
||||||
|
opt(blank_line),
|
||||||
|
bind_context!(indentation_level, context),
|
||||||
|
not(multispace1),
|
||||||
|
)),
|
||||||
|
|(_, (depth, _stars), _not_whitespace)| *depth < indent_level,
|
||||||
|
)(input)
|
||||||
|
{
|
||||||
|
return Ok((input, input.take(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Ok((_remaining, _)) = tuple((
|
||||||
|
opt(blank_line),
|
||||||
|
verify(
|
||||||
|
bind_context!(detect_not_plain_list_item_indent, context),
|
||||||
|
|(depth, _)| *depth == indent_level,
|
||||||
|
),
|
||||||
|
))(input)
|
||||||
|
{
|
||||||
|
return Ok((input, input.take(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(nom::Err::Error(CustomError::Static(
|
||||||
|
"No whitespace cut-off.",
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
feature = "tracing",
|
feature = "tracing",
|
||||||
tracing::instrument(ret, level = "debug", skip(_context))
|
tracing::instrument(ret, level = "debug", skip(_context))
|
||||||
@ -412,7 +491,7 @@ fn _plain_list_item_end<'b, 'g, 'r, 's>(
|
|||||||
start_of_line(input)?;
|
start_of_line(input)?;
|
||||||
recognize(tuple((
|
recognize(tuple((
|
||||||
opt(blank_line),
|
opt(blank_line),
|
||||||
parser_with_context!(line_indented_lte_matcher)(context),
|
bind_context!(line_indented_lte_matcher, context),
|
||||||
)))(input)
|
)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -431,7 +510,7 @@ fn _line_indented_lte<'b, 'g, 'r, 's>(
|
|||||||
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||||
let matched = recognize(verify(
|
let matched = recognize(verify(
|
||||||
tuple((
|
tuple((
|
||||||
parser_with_context!(indentation_level)(context),
|
bind_context!(indentation_level, context),
|
||||||
non_whitespace_character,
|
non_whitespace_character,
|
||||||
)),
|
)),
|
||||||
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
||||||
@ -457,8 +536,8 @@ fn item_tag<'b, 'g, 'r, 's>(
|
|||||||
let (remaining, (children, _exit_contents)) = verify(
|
let (remaining, (children, _exit_contents)) = verify(
|
||||||
many_till(
|
many_till(
|
||||||
// TODO: Should this be using a different set like the minimal set?
|
// TODO: Should this be using a different set like the minimal set?
|
||||||
parser_with_context!(standard_set_object)(&parser_context),
|
bind_context!(standard_set_object, &parser_context),
|
||||||
parser_with_context!(exit_matcher_parser)(&parser_context),
|
bind_context!(exit_matcher_parser, &parser_context),
|
||||||
),
|
),
|
||||||
|(children, _exit_contents)| !children.is_empty(),
|
|(children, _exit_contents)| !children.is_empty(),
|
||||||
)(input)?;
|
)(input)?;
|
||||||
@ -508,7 +587,7 @@ fn item_tag_post_gap<'b, 'g, 'r, 's>(
|
|||||||
alt((
|
alt((
|
||||||
peek(recognize(not(blank_line))),
|
peek(recognize(not(blank_line))),
|
||||||
peek(recognize(tuple((many0(blank_line), eof)))),
|
peek(recognize(tuple((many0(blank_line), eof)))),
|
||||||
parser_with_context!(exit_matcher_parser)(context),
|
bind_context!(exit_matcher_parser, context),
|
||||||
)),
|
)),
|
||||||
),
|
),
|
||||||
))),
|
))),
|
||||||
@ -538,7 +617,7 @@ fn detect_contentless_item_contents<'b, 'g, 'r, 's>(
|
|||||||
) -> Res<OrgSource<'s>, ()> {
|
) -> Res<OrgSource<'s>, ()> {
|
||||||
let (remaining, _) = recognize(many_till(
|
let (remaining, _) = recognize(many_till(
|
||||||
blank_line,
|
blank_line,
|
||||||
parser_with_context!(exit_matcher_parser)(context),
|
bind_context!(exit_matcher_parser, context),
|
||||||
))(input)?;
|
))(input)?;
|
||||||
Ok((remaining, ()))
|
Ok((remaining, ()))
|
||||||
}
|
}
|
||||||
|
@ -243,6 +243,10 @@ pub(crate) fn org_line_ending(input: OrgSource<'_>) -> Res<OrgSource<'_>, OrgSou
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Match the whitespace at the beginning of a line and give it an indentation level.
|
/// Match the whitespace at the beginning of a line and give it an indentation level.
|
||||||
|
#[cfg_attr(
|
||||||
|
feature = "tracing",
|
||||||
|
tracing::instrument(ret, level = "debug", skip(context))
|
||||||
|
)]
|
||||||
pub(crate) fn indentation_level<'s>(
|
pub(crate) fn indentation_level<'s>(
|
||||||
context: RefContext<'_, '_, '_, 's>,
|
context: RefContext<'_, '_, '_, 's>,
|
||||||
input: OrgSource<'s>,
|
input: OrgSource<'s>,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user