organic/src/parser/paragraph.rs
Tom Alexander 9c1e6ccc97
All checks were successful
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
Add a detect_element function.
This is an optimization. When you have something like plain text which ends when it hits the next element, we only need to parse enough to detect that an element is about to occur. For elements like plain lists, this is as simple as parsing a line starting with optional whitespace and then a bullet, which avoids parsing the entire plain list tree. The benefit is most noticeable in deeply nested plain lists.
2023-08-25 01:07:53 -04:00

89 lines
3.1 KiB
Rust

use nom::branch::alt;
use nom::combinator::eof;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::element_parser::detect_element;
use super::lesser_element::Paragraph;
use super::org_source::OrgSource;
use super::util::blank_line;
use super::util::get_consumed;
use super::Context;
use crate::error::Res;
use crate::parser::exiting::ExitClass;
use crate::parser::object_parser::standard_set_object;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::start_of_line;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn paragraph<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Paragraph<'s>> {
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Gamma,
exit_matcher: &paragraph_end,
}));
let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let (remaining, (children, _exit_contents)) = verify(
many_till(standard_set_object_matcher, exit_matcher),
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
// Not checking parent exit matcher because if there are any children matched then we have a valid paragraph.
let source = get_consumed(input, remaining);
Ok((
remaining,
Paragraph {
source: source.into(),
children,
},
))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn paragraph_end<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let non_paragraph_element_matcher = parser_with_context!(detect_element(false))(context);
alt((
recognize(tuple((start_of_line, many1(blank_line)))),
recognize(non_paragraph_element_matcher),
eof,
))(input)
}
#[cfg(test)]
mod tests {
use crate::parser::element_parser::element;
use crate::parser::org_source::OrgSource;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::source::Source;
#[test]
fn two_paragraphs() {
let input = OrgSource::new("foo bar baz\n\nlorem ipsum");
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let paragraph_matcher = parser_with_context!(element(true))(&initial_context);
let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph");
let (remaining, second_paragraph) =
paragraph_matcher(remaining).expect("Parse second paragraph.");
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(first_paragraph.get_source(), "foo bar baz\n\n");
assert_eq!(second_paragraph.get_source(), "lorem ipsum");
}
}