From 13b95cd0a13a6d01b8fed8786284d2d2d2276dc3 Mon Sep 17 00:00:00 2001
From: Tom Alexander <tom@fizz.buzz>
Date: Mon, 9 Oct 2023 16:44:59 -0400
Subject: [PATCH] Fix handling of text markup at the start/end of regular link
 descriptions and radio targets.

---
 .gitignore                 |  1 +
 src/context/context.rs     |  5 -----
 src/parser/radio_link.rs   | 37 +++++++++++++++++--------------------
 src/parser/regular_link.rs | 32 +++++++++++++++++---------------
 src/parser/text_markup.rs  | 17 ++++-------------
 src/parser/util.rs         | 27 +++++++++++++++++++++++++++
 6 files changed, 66 insertions(+), 53 deletions(-)

diff --git a/.gitignore b/.gitignore
index 96ef6c0..8d2488e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 /target
 Cargo.lock
+TODO.org
diff --git a/src/context/context.rs b/src/context/context.rs
index 6787273..def26bf 100644
--- a/src/context/context.rs
+++ b/src/context/context.rs
@@ -33,11 +33,6 @@ pub(crate) enum ContextElement<'r, 's> {
     /// The value stored is the start of the element after the affiliated keywords. In this way, we can ensure that we do not exit an element immediately after the affiliated keyword had been consumed.
     HasAffiliatedKeyword(HasAffiliatedKeywordInner<'r, 's>),
 
-    /// Indicate the position that we started parsing a text section.
-    ///
-    /// This value is stored because "<<<" is not a valid prefix for text markup UNLESS it is starting a radio target. Likewise "[" is not a valid prefix for text markup UNLESS it is the start of a regular link description.
-    StartTextSection(OrgSource<'s>),
-
     /// This is just here to use the 's lifetime until I'm sure we can eliminate it from ContextElement.
     #[allow(dead_code)]
     Placeholder(PhantomData<&'s str>),
diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs
index c25ec3c..35a8053 100644
--- a/src/parser/radio_link.rs
+++ b/src/parser/radio_link.rs
@@ -2,15 +2,17 @@ use nom::branch::alt;
 use nom::bytes::complete::tag;
 use nom::character::complete::line_ending;
 use nom::character::complete::space0;
+use nom::combinator::all_consuming;
 use nom::combinator::consumed;
-use nom::combinator::map;
+use nom::combinator::map_parser;
 use nom::combinator::verify;
-use nom::multi::many_till;
+use nom::multi::many1;
 
 use super::object_parser::minimal_set_object;
 use super::org_source::OrgSource;
-use super::util::exit_matcher_parser;
+use super::util::confine_context;
 use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting;
+use super::util::text_until_exit;
 use crate::context::parser_with_context;
 use crate::context::ContextElement;
 use crate::context::ExitClass;
@@ -103,25 +105,20 @@ pub(crate) fn radio_target<'b, 'g, 'r, 's>(
     input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, RadioTarget<'s>> {
     let (remaining, _opening) = tag("<<<")(input)?;
-    let contexts = [
-        ContextElement::ExitMatcherNode(ExitMatcherNode {
-            class: ExitClass::Gamma,
-            exit_matcher: &radio_target_end,
-        }),
-        ContextElement::StartTextSection(remaining),
-    ];
-    let parser_context = context.with_additional_node(&contexts[0]);
-    let parser_context = parser_context.with_additional_node(&contexts[1]);
+    let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode {
+        class: ExitClass::Gamma,
+        exit_matcher: &radio_target_end,
+    });
+    let parser_context = context.with_additional_node(&parser_context);
 
-    let (remaining, (raw_value, children)) = consumed(verify(
-        map(
-            many_till(
-                parser_with_context!(minimal_set_object)(&parser_context),
-                parser_with_context!(exit_matcher_parser)(&parser_context),
-            ),
-            |(children, _)| children,
+    let (remaining, (raw_value, children)) = consumed(map_parser(
+        verify(
+            parser_with_context!(text_until_exit)(&parser_context),
+            |text| text.len() > 0,
         ),
-        |children: &Vec<_>| !children.is_empty(),
+        confine_context(|i| {
+            all_consuming(many1(parser_with_context!(minimal_set_object)(context)))(i)
+        }),
     ))(remaining)?;
 
     let (remaining, _closing) = tag(">>>")(remaining)?;
diff --git a/src/parser/regular_link.rs b/src/parser/regular_link.rs
index ce9ce37..6ce8c2e 100644
--- a/src/parser/regular_link.rs
+++ b/src/parser/regular_link.rs
@@ -8,6 +8,7 @@ use nom::bytes::complete::take;
 use nom::bytes::complete::take_till1;
 use nom::bytes::complete::take_until;
 use nom::character::complete::anychar;
+use nom::combinator::all_consuming;
 use nom::combinator::consumed;
 use nom::combinator::eof;
 use nom::combinator::flat_map;
@@ -18,6 +19,7 @@ use nom::combinator::peek;
 use nom::combinator::recognize;
 use nom::combinator::rest;
 use nom::combinator::verify;
+use nom::multi::many1;
 use nom::multi::many1_count;
 use nom::multi::many_till;
 use nom::sequence::tuple;
@@ -28,7 +30,7 @@ use super::org_source::BracketDepth;
 use super::org_source::OrgSource;
 use super::plain_link::parse_file_and_application;
 use super::plain_link::protocol;
-use super::util::exit_matcher_parser;
+use super::util::confine_context;
 use super::util::get_consumed;
 use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting;
 use super::util::text_until_exit;
@@ -397,21 +399,21 @@ fn description<'b, 'g, 'r, 's>(
     context: RefContext<'b, 'g, 'r, 's>,
     input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, Vec<Object<'s>>> {
-    let contexts = [
-        ContextElement::ExitMatcherNode(ExitMatcherNode {
-            class: ExitClass::Beta,
-            exit_matcher: &description_end,
-        }),
-        ContextElement::StartTextSection(input),
-    ];
-    let parser_context = context.with_additional_node(&contexts[0]);
-    let parser_context = parser_context.with_additional_node(&contexts[1]);
-    let (remaining, (children, _exit_contents)) = verify(
-        many_till(
-            parser_with_context!(regular_link_description_set_object)(&parser_context),
-            parser_with_context!(exit_matcher_parser)(&parser_context),
+    let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode {
+        class: ExitClass::Beta,
+        exit_matcher: &description_end,
+    });
+    let parser_context = context.with_additional_node(&parser_context);
+    let (remaining, children) = map_parser(
+        verify(
+            parser_with_context!(text_until_exit)(&parser_context),
+            |text| text.len() > 0,
         ),
-        |(children, _exit_contents)| !children.is_empty(),
+        confine_context(|i| {
+            all_consuming(many1(parser_with_context!(
+                regular_link_description_set_object
+            )(context)))(i)
+        }),
     )(input)?;
 
     Ok((remaining, children))
diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs
index 7bf7bf1..b114444 100644
--- a/src/parser/text_markup.rs
+++ b/src/parser/text_markup.rs
@@ -1,7 +1,6 @@
 use nom::branch::alt;
 use nom::bytes::complete::tag;
 use nom::character::complete::anychar;
-use nom::character::complete::line_ending;
 use nom::character::complete::multispace1;
 use nom::character::complete::one_of;
 use nom::character::complete::space0;
@@ -20,6 +19,7 @@ use super::org_source::OrgSource;
 use super::radio_link::RematchObject;
 use super::util::in_object_section;
 use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting;
+use super::util::org_line_ending;
 use super::util::start_of_line;
 use crate::context::parser_with_context;
 use crate::context::ContextElement;
@@ -283,7 +283,7 @@ fn _text_markup_string<'b, 'g, 'r, 's, 'c>(
 
 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
 fn pre<'b, 'g, 'r, 's>(
-    context: RefContext<'b, 'g, 'r, 's>,
+    _context: RefContext<'b, 'g, 'r, 's>,
     input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, ()> {
     if start_of_line(input).is_ok() {
@@ -292,16 +292,6 @@ fn pre<'b, 'g, 'r, 's>(
     if preceded_by_whitespace(true)(input).is_ok() {
         return Ok((input, ()));
     }
-    let radio_target_start = context
-        .iter()
-        .find_map(|c| match c {
-            ContextElement::StartTextSection(text) => Some(text),
-            _ => None,
-        })
-        .map(|text| text.get_byte_offset());
-    if Some(input.get_byte_offset()) == radio_target_start {
-        return Ok((input, ()));
-    }
     let preceding_character = input.get_preceding_character();
     match preceding_character {
         // If None, we are at the start of the file which is technically the beginning of a line.
@@ -321,7 +311,8 @@ fn post<'b, 'g, 'r, 's>(
     _context: RefContext<'b, 'g, 'r, 's>,
     input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, ()> {
-    let (remaining, _) = alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"\\")), line_ending))(input)?;
+    let (remaining, _) =
+        alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"\\")), org_line_ending))(input)?;
     Ok((remaining, ()))
 }
 
diff --git a/src/parser/util.rs b/src/parser/util.rs
index 5770de4..57f03e7 100644
--- a/src/parser/util.rs
+++ b/src/parser/util.rs
@@ -1,3 +1,5 @@
+use std::fmt::Debug;
+
 use nom::branch::alt;
 use nom::character::complete::anychar;
 use nom::character::complete::line_ending;
@@ -301,3 +303,28 @@ pub(crate) fn get_has_affiliated_keyword<'b, 'g, 'r, 's>(
     }
     None
 }
+
+/// Reset the input OrgSource as if it was starting a fresh document.
+///
+/// This is important for making start-of-document, end-of-document, and other context-dependent tests succeed.
+pub(crate) fn confine_context<'s, O: Debug, I: Fn(OrgSource<'s>) -> Res<OrgSource<'s>, O>>(
+    inner: I,
+) -> impl Fn(OrgSource<'s>) -> Res<OrgSource<'s>, O> {
+    move |input| impl_confine_context(input, &inner)
+}
+
+/// Reset the input OrgSource as if it was starting a fresh document.
+///
+/// This is important for making start-of-document, end-of-document, and other context-dependent tests succeed.
+#[cfg_attr(
+    feature = "tracing",
+    tracing::instrument(ret, level = "debug", skip(inner))
+)]
+fn impl_confine_context<'s, O: Debug, I: Fn(OrgSource<'s>) -> Res<OrgSource<'s>, O>>(
+    input: OrgSource<'s>,
+    inner: I,
+) -> Res<OrgSource<'s>, O> {
+    let raw_str = Into::<&str>::into(input);
+    let back_to_org_source = Into::<OrgSource<'_>>::into(raw_str);
+    inner(back_to_org_source)
+}