From 13b95cd0a13a6d01b8fed8786284d2d2d2276dc3 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 9 Oct 2023 16:44:59 -0400 Subject: [PATCH] Fix handling of text markup at the start/end of regular link descriptions and radio targets. --- .gitignore | 1 + src/context/context.rs | 5 ----- src/parser/radio_link.rs | 37 +++++++++++++++++-------------------- src/parser/regular_link.rs | 32 +++++++++++++++++--------------- src/parser/text_markup.rs | 17 ++++------------- src/parser/util.rs | 27 +++++++++++++++++++++++++++ 6 files changed, 66 insertions(+), 53 deletions(-) diff --git a/.gitignore b/.gitignore index 96ef6c0b..8d2488e6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target Cargo.lock +TODO.org diff --git a/src/context/context.rs b/src/context/context.rs index 6787273d..def26bfc 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -33,11 +33,6 @@ pub(crate) enum ContextElement<'r, 's> { /// The value stored is the start of the element after the affiliated keywords. In this way, we can ensure that we do not exit an element immediately after the affiliated keyword had been consumed. HasAffiliatedKeyword(HasAffiliatedKeywordInner<'r, 's>), - /// Indicate the position that we started parsing a text section. - /// - /// This value is stored because "<<<" is not a valid prefix for text markup UNLESS it is starting a radio target. Likewise "[" is not a valid prefix for text markup UNLESS it is the start of a regular link description. - StartTextSection(OrgSource<'s>), - /// This is just here to use the 's lifetime until I'm sure we can eliminate it from ContextElement. #[allow(dead_code)] Placeholder(PhantomData<&'s str>), diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index c25ec3cb..35a80533 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -2,15 +2,17 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::line_ending; use nom::character::complete::space0; +use nom::combinator::all_consuming; use nom::combinator::consumed; -use nom::combinator::map; +use nom::combinator::map_parser; use nom::combinator::verify; -use nom::multi::many_till; +use nom::multi::many1; use super::object_parser::minimal_set_object; use super::org_source::OrgSource; -use super::util::exit_matcher_parser; +use super::util::confine_context; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; +use super::util::text_until_exit; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ExitClass; @@ -103,25 +105,20 @@ pub(crate) fn radio_target<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, RadioTarget<'s>> { let (remaining, _opening) = tag("<<<")(input)?; - let contexts = [ - ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Gamma, - exit_matcher: &radio_target_end, - }), - ContextElement::StartTextSection(remaining), - ]; - let parser_context = context.with_additional_node(&contexts[0]); - let parser_context = parser_context.with_additional_node(&contexts[1]); + let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Gamma, + exit_matcher: &radio_target_end, + }); + let parser_context = context.with_additional_node(&parser_context); - let (remaining, (raw_value, children)) = consumed(verify( - map( - many_till( - parser_with_context!(minimal_set_object)(&parser_context), - parser_with_context!(exit_matcher_parser)(&parser_context), - ), - |(children, _)| children, + let (remaining, (raw_value, children)) = consumed(map_parser( + verify( + parser_with_context!(text_until_exit)(&parser_context), + |text| text.len() > 0, ), - |children: &Vec<_>| !children.is_empty(), + confine_context(|i| { + all_consuming(many1(parser_with_context!(minimal_set_object)(context)))(i) + }), ))(remaining)?; let (remaining, _closing) = tag(">>>")(remaining)?; diff --git a/src/parser/regular_link.rs b/src/parser/regular_link.rs index ce9ce374..6ce8c2ec 100644 --- a/src/parser/regular_link.rs +++ b/src/parser/regular_link.rs @@ -8,6 +8,7 @@ use nom::bytes::complete::take; use nom::bytes::complete::take_till1; use nom::bytes::complete::take_until; use nom::character::complete::anychar; +use nom::combinator::all_consuming; use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::flat_map; @@ -18,6 +19,7 @@ use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::rest; use nom::combinator::verify; +use nom::multi::many1; use nom::multi::many1_count; use nom::multi::many_till; use nom::sequence::tuple; @@ -28,7 +30,7 @@ use super::org_source::BracketDepth; use super::org_source::OrgSource; use super::plain_link::parse_file_and_application; use super::plain_link::protocol; -use super::util::exit_matcher_parser; +use super::util::confine_context; use super::util::get_consumed; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; use super::util::text_until_exit; @@ -397,21 +399,21 @@ fn description<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Vec>> { - let contexts = [ - ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Beta, - exit_matcher: &description_end, - }), - ContextElement::StartTextSection(input), - ]; - let parser_context = context.with_additional_node(&contexts[0]); - let parser_context = parser_context.with_additional_node(&contexts[1]); - let (remaining, (children, _exit_contents)) = verify( - many_till( - parser_with_context!(regular_link_description_set_object)(&parser_context), - parser_with_context!(exit_matcher_parser)(&parser_context), + let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &description_end, + }); + let parser_context = context.with_additional_node(&parser_context); + let (remaining, children) = map_parser( + verify( + parser_with_context!(text_until_exit)(&parser_context), + |text| text.len() > 0, ), - |(children, _exit_contents)| !children.is_empty(), + confine_context(|i| { + all_consuming(many1(parser_with_context!( + regular_link_description_set_object + )(context)))(i) + }), )(input)?; Ok((remaining, children)) diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 7bf7bf12..b114444a 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -1,7 +1,6 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::anychar; -use nom::character::complete::line_ending; use nom::character::complete::multispace1; use nom::character::complete::one_of; use nom::character::complete::space0; @@ -20,6 +19,7 @@ use super::org_source::OrgSource; use super::radio_link::RematchObject; use super::util::in_object_section; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; +use super::util::org_line_ending; use super::util::start_of_line; use crate::context::parser_with_context; use crate::context::ContextElement; @@ -283,7 +283,7 @@ fn _text_markup_string<'b, 'g, 'r, 's, 'c>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn pre<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, + _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { if start_of_line(input).is_ok() { @@ -292,16 +292,6 @@ fn pre<'b, 'g, 'r, 's>( if preceded_by_whitespace(true)(input).is_ok() { return Ok((input, ())); } - let radio_target_start = context - .iter() - .find_map(|c| match c { - ContextElement::StartTextSection(text) => Some(text), - _ => None, - }) - .map(|text| text.get_byte_offset()); - if Some(input.get_byte_offset()) == radio_target_start { - return Ok((input, ())); - } let preceding_character = input.get_preceding_character(); match preceding_character { // If None, we are at the start of the file which is technically the beginning of a line. @@ -321,7 +311,8 @@ fn post<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { - let (remaining, _) = alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"\\")), line_ending))(input)?; + let (remaining, _) = + alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"\\")), org_line_ending))(input)?; Ok((remaining, ())) } diff --git a/src/parser/util.rs b/src/parser/util.rs index 5770de4b..57f03e74 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,3 +1,5 @@ +use std::fmt::Debug; + use nom::branch::alt; use nom::character::complete::anychar; use nom::character::complete::line_ending; @@ -301,3 +303,28 @@ pub(crate) fn get_has_affiliated_keyword<'b, 'g, 'r, 's>( } None } + +/// Reset the input OrgSource as if it was starting a fresh document. +/// +/// This is important for making start-of-document, end-of-document, and other context-dependent tests succeed. +pub(crate) fn confine_context<'s, O: Debug, I: Fn(OrgSource<'s>) -> Res, O>>( + inner: I, +) -> impl Fn(OrgSource<'s>) -> Res, O> { + move |input| impl_confine_context(input, &inner) +} + +/// Reset the input OrgSource as if it was starting a fresh document. +/// +/// This is important for making start-of-document, end-of-document, and other context-dependent tests succeed. +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(inner)) +)] +fn impl_confine_context<'s, O: Debug, I: Fn(OrgSource<'s>) -> Res, O>>( + input: OrgSource<'s>, + inner: I, +) -> Res, O> { + let raw_str = Into::<&str>::into(input); + let back_to_org_source = Into::>::into(raw_str); + inner(back_to_org_source) +}