From 397083c65809c91d2791fa228cc41ebc49154cab Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Apr 2023 20:27:28 -0400 Subject: [PATCH 01/28] Define a RematchObject trait for matching radio links based on radio targets. --- src/parser/radio_link.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index 890f408..2066f24 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -6,6 +6,7 @@ use nom::combinator::verify; use nom::multi::many_till; use super::Context; +use super::Object; use crate::error::Res; use crate::parser::exiting::ExitClass; use crate::parser::object_parser::minimal_set_object; @@ -54,3 +55,11 @@ pub fn radio_target<'r, 's>( fn radio_target_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { alt((tag("<"), tag(">"), line_ending))(input) } + +pub trait RematchObject { + fn rematch_object<'r, 's>( + &self, + context: Context<'r, 's>, + input: &'s str, + ) -> Res<&'s str, Object<'s>>; +} From 22bb42882b355d9a1fcbee9baf5dc5903df44112 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Apr 2023 20:32:59 -0400 Subject: [PATCH 02/28] Implement rematch for plain text. --- src/parser/plain_text.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 71e2745..123fabf 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -1,12 +1,16 @@ use nom::branch::alt; +use nom::bytes::complete::tag; use nom::character::complete::anychar; +use nom::combinator::map; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many_till; use super::object::PlainText; +use super::radio_link::RematchObject; use super::Context; +use super::Object; use crate::error::Res; use crate::parser::object_parser::any_object_except_plain_text; use crate::parser::parser_with_context::parser_with_context; @@ -33,12 +37,23 @@ fn plain_text_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s recognize(parser_with_context!(any_object_except_plain_text)(context))(input) } +impl<'x> RematchObject for PlainText<'x> { + fn rematch_object<'r, 's>( + &self, + _context: Context<'r, 's>, + input: &'s str, + ) -> Res<&'s str, Object<'s>> { + map(tag(self.source), |s| { + Object::PlainText(PlainText { source: s }) + })(input) + } +} + #[cfg(test)] mod tests { use nom::combinator::map; use super::*; - use crate::parser::object::Object; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_with_context::parser_with_context; From 3fc3ba58aad714fba6dea1f18baa77b2ee0ec387 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Apr 2023 20:41:40 -0400 Subject: [PATCH 03/28] Defining the RadioTarget context element. --- src/parser/parser_context.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 232db31..616b983 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -6,6 +6,7 @@ use nom::IResult; use super::list::List; use super::list::Node; use super::Context; +use super::Object; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; @@ -133,6 +134,13 @@ pub enum ContextElement<'r, 's> { /// Indicates if elements should consume the whitespace after them. ConsumeTrailingWhitespace(bool), + + /// The contents of a radio target. + /// + /// If any are found, this will force a 2nd parse through the + /// org-mode document since text needs to be re-parsed to look for + /// radio links matching the contents of radio targets. + RadioTarget(Vec>>), } pub struct ExitMatcherNode<'r> { From c44e7d642f22e78dc46b4cbcbf83cd0c2e2910a9 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Apr 2023 22:10:24 -0400 Subject: [PATCH 04/28] Starting code for iterating over the parsed ast. --- src/parser/document.rs | 29 +++++++++++++++++++++++++++++ src/parser/mod.rs | 1 + src/parser/token.rs | 12 ++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 src/parser/token.rs diff --git a/src/parser/document.rs b/src/parser/document.rs index 9954fab..98e570c 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -18,6 +18,7 @@ use super::element::Element; use super::object::Object; use super::parser_with_context::parser_with_context; use super::source::Source; +use super::token::Token; use super::util::exit_matcher_parser; use super::util::get_consumed; use super::util::start_of_line; @@ -255,3 +256,31 @@ fn headline<'r, 's>( fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { line_ending(input) } + +impl<'s> Document<'s> { + pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { + self.zeroth_section + .iter() + .map(Token::Section) + .chain(self.children.iter().map(Token::Heading)) + } +} + +impl<'s> Heading<'s> { + pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { + self.title.iter().map(Token::Object).chain(self.children.iter().map( + |de| { + match de { + DocumentElement::Heading(obj) => Token::Heading(obj), + DocumentElement::Section(obj) => Token::Section(obj), + } + } + )) + } +} + +impl<'s> Section<'s> { + pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { + self.children.iter().map(Token::Element) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index df09653..dcd61c6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -35,6 +35,7 @@ pub mod sexp; mod source; mod table; mod text_markup; +mod token; mod util; pub use document::document; pub use document::Document; diff --git a/src/parser/token.rs b/src/parser/token.rs new file mode 100644 index 0000000..6685773 --- /dev/null +++ b/src/parser/token.rs @@ -0,0 +1,12 @@ +use super::Document; +use super::Element; +use super::Heading; +use super::Object; +use super::Section; + +pub enum Token<'r, 's> { + Heading(&'r Heading<'s>), + Section(&'r Section<'s>), + Object(&'r Object<'s>), + Element(&'r Element<'s>), +} From f3592347c171e5b08b30a4e77f6b2ce5297e2934 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 1 May 2023 14:42:51 -0400 Subject: [PATCH 05/28] Add a test showing that radio targets only have to semantically match their links. --- .../radio_link/identical_or_semantically_identical.org | 1 + 1 file changed, 1 insertion(+) create mode 100644 org_mode_samples/radio_link/identical_or_semantically_identical.org diff --git a/org_mode_samples/radio_link/identical_or_semantically_identical.org b/org_mode_samples/radio_link/identical_or_semantically_identical.org new file mode 100644 index 0000000..a99a24d --- /dev/null +++ b/org_mode_samples/radio_link/identical_or_semantically_identical.org @@ -0,0 +1 @@ +alpha *bar* baz foo <<<*bar* baz>>> lorem ipsum *bar* baz dolar. From 0014dfc21f89f018c4fca101fbcafbdfa96eab36 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 16:18:04 -0400 Subject: [PATCH 06/28] Implement the radio link parser. The parser depends on the rematch_target parser which is not yet implemented. --- src/parser/radio_link.rs | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index 2066f24..c375708 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -7,6 +7,8 @@ use nom::multi::many_till; use super::Context; use super::Object; +use crate::error::CustomError; +use crate::error::MyError; use crate::error::Res; use crate::parser::exiting::ExitClass; use crate::parser::object_parser::minimal_set_object; @@ -15,14 +17,43 @@ use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::not_yet_implemented; use crate::parser::RadioLink; use crate::parser::RadioTarget; #[tracing::instrument(ret, level = "debug")] pub fn radio_link<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, RadioLink<'s>> { - not_yet_implemented()?; - todo!(); + let radio_targets = context + .iter() + .filter_map(|context_element| match context_element.get_data() { + ContextElement::RadioTarget(targets) => Some(targets), + _ => None, + }) + .flatten(); + for radio_target in radio_targets { + let rematched_target = rematch_target(context, radio_target, input); + if let Ok((remaining, rematched_target)) = rematched_target { + let source = get_consumed(input, remaining); + return Ok(( + remaining, + RadioLink { + source, + children: rematched_target, + }, + )); + } + } + Err(nom::Err::Error(CustomError::MyError(MyError( + "NoRadioLink", + )))) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn rematch_target<'r, 's>( + context: Context<'r, 's>, + target: &'r Vec>, + input: &'s str, +) -> Res<&'s str, Vec>> { + todo!() } #[tracing::instrument(ret, level = "debug")] From 66ae70e790b9d887fe3fb2b7b4257c46bc9fe075 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 16:32:52 -0400 Subject: [PATCH 07/28] Implement the rematch target parser. --- src/parser/radio_link.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index c375708..d9a94fe 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -53,7 +53,25 @@ pub fn rematch_target<'r, 's>( target: &'r Vec>, input: &'s str, ) -> Res<&'s str, Vec>> { - todo!() + let mut remaining = input; + let mut new_matches = Vec::with_capacity(target.len()); + for original_object in target { + match original_object { + // TODO: The rest of the minimal set of objects. + Object::PlainText(plaintext) => { + let (new_remaining, new_match) = plaintext.rematch_object(context, remaining)?; + remaining = new_remaining; + new_matches.push(new_match); + + } + _ => { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "OnlyMinimalSetObjectsAllowed", + )))); + } + }; + } + Ok((remaining, new_matches)) } #[tracing::instrument(ret, level = "debug")] From e5c1b68b0e0768b05824ac5ed687698548418849 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 16:51:23 -0400 Subject: [PATCH 08/28] Add a basic test showing the radio link rematching working on plain text. --- src/parser/radio_link.rs | 42 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index d9a94fe..ee81609 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -62,7 +62,6 @@ pub fn rematch_target<'r, 's>( let (new_remaining, new_match) = plaintext.rematch_object(context, remaining)?; remaining = new_remaining; new_matches.push(new_match); - } _ => { return Err(nom::Err::Error(CustomError::MyError(MyError( @@ -112,3 +111,44 @@ pub trait RematchObject { input: &'s str, ) -> Res<&'s str, Object<'s>>; } + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::element_parser::element; + use crate::parser::parser_context::ContextElement; + use crate::parser::parser_context::ContextTree; + use crate::parser::parser_with_context::parser_with_context; + use crate::parser::source::Source; + use crate::parser::PlainText; + + #[test] + fn plain_text_radio_target() { + let input = "foo bar baz"; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = initial_context + .with_additional_node(ContextElement::DocumentRoot(input)) + .with_additional_node(ContextElement::RadioTarget(vec![vec![Object::PlainText( + PlainText { source: "bar" }, + )]])); + let paragraph_matcher = parser_with_context!(element(true))(&document_context); + let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph"); + let first_paragraph = match first_paragraph { + crate::parser::Element::Paragraph(paragraph) => paragraph, + _ => panic!("Should be a paragraph!"), + }; + assert_eq!(remaining, ""); + assert_eq!(first_paragraph.get_source(), "foo bar baz"); + assert_eq!(first_paragraph.children.len(), 3); + assert_eq!( + first_paragraph + .children + .get(1) + .expect("Len already asserted to be 3"), + &Object::RadioLink(RadioLink { + source: "bar", + children: vec![Object::PlainText(PlainText { source: "bar" })] + }) + ); + } +} From ddaefdbf78c40def520760a16dc3f049f80ae44e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 17:37:46 -0400 Subject: [PATCH 09/28] Nearly done implementing re-matching of bold, but running into a lifetime issue. --- src/parser/text_markup.rs | 54 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index a7a9497..1ea98f6 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -12,8 +12,10 @@ use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many_till; use nom::sequence::terminated; +use nom::sequence::tuple; use tracing::span; +use super::radio_link::RematchObject; use super::Context; use crate::error::CustomError; use crate::error::MyError; @@ -23,6 +25,7 @@ use crate::parser::object_parser::standard_set_object; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::radio_link::rematch_target; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::get_one_before; @@ -142,7 +145,6 @@ fn _text_markup_object<'r, 's, 'x>( } } - // TODO: Sometimes its plain text, not objects let (remaining, _close) = text_markup_end_specialized(context, remaining)?; let (remaining, _trailing_whitespace) = space0(remaining)?; Ok((remaining, children)) @@ -189,7 +191,6 @@ fn _text_markup_string<'r, 's, 'x>( } } - // TODO: Sometimes its plain text, not objects let (remaining, _close) = text_markup_end_specialized(context, remaining)?; let (remaining, _trailing_whitespace) = space0(remaining)?; Ok((remaining, contents)) @@ -242,3 +243,52 @@ fn _text_markup_end<'r, 's, 'x>( let source = get_consumed(input, remaining); Ok((remaining, source)) } + +impl<'x> RematchObject for Bold<'x> { + fn rematch_object<'r, 's>( + &'r self, + _context: Context<'r, 's>, + input: &'s str, + ) -> Res<&'s str, Object<'s>> { + let (remaining, children) = + _rematch_text_markup_object(_context, input, "*", &self.children)?; + let source = get_consumed(input, remaining); + Ok((remaining, Object::Bold(Bold { source, children }))) + } +} + +#[tracing::instrument(ret, level = "debug")] +fn _rematch_text_markup_object<'r, 's, 'x>( + context: Context<'r, 's>, + input: &'s str, + marker_symbol: &'static str, + original_match_children: &'x Vec>, +) -> Res<&'s str, Vec>> { + let (remaining, _) = pre(context, input)?; + let (remaining, open) = tag(marker_symbol)(remaining)?; + let (remaining, _peek_not_whitespace) = peek(not(multispace1))(remaining)?; + let text_markup_end_specialized = text_markup_end(open); + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &text_markup_end_specialized, + })); + + let (remaining, children) = + // TODO: This doesn't really check the exit matcher between each object. I think it may be possible to construct an org document that parses incorrectly with the current code. + rematch_target(&parser_context, original_match_children, remaining)?; + + { + let span = span!(tracing::Level::DEBUG, "Checking parent exit."); + let _enter = span.enter(); + if exit_matcher_parser(context, remaining).is_ok() { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Parent exit matcher is triggering.", + )))); + } + } + + let (remaining, _close) = text_markup_end_specialized(context, remaining)?; + let (remaining, _trailing_whitespace) = space0(remaining)?; + Ok((remaining, children)) +} From 4966b02b79f7bfca6b15b04dd34326677935712e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 17:45:20 -0400 Subject: [PATCH 10/28] Expanded rematch_target to 3 lifetimes, no fix. --- src/parser/radio_link.rs | 4 ++-- src/parser/text_markup.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index ee81609..cd8fe0f 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -48,9 +48,9 @@ pub fn radio_link<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s } #[tracing::instrument(ret, level = "debug")] -pub fn rematch_target<'r, 's>( +pub fn rematch_target<'x, 'r, 's>( context: Context<'r, 's>, - target: &'r Vec>, + target: &'x Vec>, input: &'s str, ) -> Res<&'s str, Vec>> { let mut remaining = input; diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 1ea98f6..09f5442 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -246,7 +246,7 @@ fn _text_markup_end<'r, 's, 'x>( impl<'x> RematchObject for Bold<'x> { fn rematch_object<'r, 's>( - &'r self, + &self, _context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Object<'s>> { @@ -257,7 +257,7 @@ impl<'x> RematchObject for Bold<'x> { } } -#[tracing::instrument(ret, level = "debug")] +// #[tracing::instrument(ret, level = "debug")] fn _rematch_text_markup_object<'r, 's, 'x>( context: Context<'r, 's>, input: &'s str, From b9a7c3f7f34f09e928c997ab036503e7f5a5e981 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 18:04:01 -0400 Subject: [PATCH 11/28] Fix lifetime issue. --- src/parser/plain_text.rs | 4 ++-- src/parser/radio_link.rs | 6 +++--- src/parser/text_markup.rs | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 123fabf..3a1a23e 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -37,9 +37,9 @@ fn plain_text_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s recognize(parser_with_context!(any_object_except_plain_text)(context))(input) } -impl<'x> RematchObject for PlainText<'x> { +impl<'x> RematchObject<'x> for PlainText<'x> { fn rematch_object<'r, 's>( - &self, + &'x self, _context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Object<'s>> { diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index cd8fe0f..1efaa6b 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -50,7 +50,7 @@ pub fn radio_link<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s #[tracing::instrument(ret, level = "debug")] pub fn rematch_target<'x, 'r, 's>( context: Context<'r, 's>, - target: &'x Vec>, + target: &'x Vec>, input: &'s str, ) -> Res<&'s str, Vec>> { let mut remaining = input; @@ -104,9 +104,9 @@ fn radio_target_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s alt((tag("<"), tag(">"), line_ending))(input) } -pub trait RematchObject { +pub trait RematchObject<'x> { fn rematch_object<'r, 's>( - &self, + &'x self, context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Object<'s>>; diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 09f5442..fea011e 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -244,9 +244,9 @@ fn _text_markup_end<'r, 's, 'x>( Ok((remaining, source)) } -impl<'x> RematchObject for Bold<'x> { +impl<'x> RematchObject<'x> for Bold<'x> { fn rematch_object<'r, 's>( - &self, + &'x self, _context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Object<'s>> { @@ -262,7 +262,7 @@ fn _rematch_text_markup_object<'r, 's, 'x>( context: Context<'r, 's>, input: &'s str, marker_symbol: &'static str, - original_match_children: &'x Vec>, + original_match_children: &'x Vec>, ) -> Res<&'s str, Vec>> { let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; @@ -290,5 +290,5 @@ fn _rematch_text_markup_object<'r, 's, 'x>( let (remaining, _close) = text_markup_end_specialized(context, remaining)?; let (remaining, _trailing_whitespace) = space0(remaining)?; - Ok((remaining, children)) + Ok((remaining, Vec::new())) } From e4c6ca288041eecc3e16301b00c77b586cf35567 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 18:05:31 -0400 Subject: [PATCH 12/28] Add tracing. --- src/parser/plain_text.rs | 1 + src/parser/text_markup.rs | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 3a1a23e..497535c 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -38,6 +38,7 @@ fn plain_text_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s } impl<'x> RematchObject<'x> for PlainText<'x> { + #[tracing::instrument(ret, level = "debug")] fn rematch_object<'r, 's>( &'x self, _context: Context<'r, 's>, diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index fea011e..d5bd200 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -245,6 +245,7 @@ fn _text_markup_end<'r, 's, 'x>( } impl<'x> RematchObject<'x> for Bold<'x> { + #[tracing::instrument(ret, level = "debug")] fn rematch_object<'r, 's>( &'x self, _context: Context<'r, 's>, @@ -257,7 +258,7 @@ impl<'x> RematchObject<'x> for Bold<'x> { } } -// #[tracing::instrument(ret, level = "debug")] +#[tracing::instrument(ret, level = "debug")] fn _rematch_text_markup_object<'r, 's, 'x>( context: Context<'r, 's>, input: &'s str, From 4ba1e63dde3a040c25beb1ac21d01a0074c38e83 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 18:12:04 -0400 Subject: [PATCH 13/28] Add a test for bold inside a radio target. --- src/parser/radio_link.rs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index 1efaa6b..9bb9860 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -120,6 +120,7 @@ mod tests { use crate::parser::parser_context::ContextTree; use crate::parser::parser_with_context::parser_with_context; use crate::parser::source::Source; + use crate::parser::Bold; use crate::parser::PlainText; #[test] @@ -151,4 +152,40 @@ mod tests { }) ); } + + #[test] + fn bold_radio_target() { + let input = "foo *bar* baz"; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = initial_context + .with_additional_node(ContextElement::DocumentRoot(input)) + .with_additional_node(ContextElement::RadioTarget(vec![vec![Object::Bold( + Bold { + source: "*bar*", + children: vec![Object::PlainText(PlainText { source: "bar" })], + }, + )]])); + let paragraph_matcher = parser_with_context!(element(true))(&document_context); + let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph"); + let first_paragraph = match first_paragraph { + crate::parser::Element::Paragraph(paragraph) => paragraph, + _ => panic!("Should be a paragraph!"), + }; + assert_eq!(remaining, ""); + assert_eq!(first_paragraph.get_source(), "foo *bar* baz"); + assert_eq!(first_paragraph.children.len(), 3); + assert_eq!( + first_paragraph + .children + .get(1) + .expect("Len already asserted to be 3"), + &Object::RadioLink(RadioLink { + source: "*bar*", + children: vec![Object::Bold(Bold { + source: "*bar*", + children: vec![Object::PlainText(PlainText { source: "bar" })] + })] + }) + ); + } } From 7ca8beac5a90255225c7fcb85e574fe5dcaf1d03 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 18:27:09 -0400 Subject: [PATCH 14/28] Allow matching bolds in radio link targets. --- src/parser/radio_link.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index 9bb9860..8754ee2 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -58,6 +58,11 @@ pub fn rematch_target<'x, 'r, 's>( for original_object in target { match original_object { // TODO: The rest of the minimal set of objects. + Object::Bold(bold) => { + let (new_remaining, new_match) = bold.rematch_object(context, remaining)?; + remaining = new_remaining; + new_matches.push(new_match); + } Object::PlainText(plaintext) => { let (new_remaining, new_match) = plaintext.rematch_object(context, remaining)?; remaining = new_remaining; From cdd3517655bbb5b67c5a5060ea58f4e5fa19c62a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 18:32:16 -0400 Subject: [PATCH 15/28] Include the trailing space for the bolds. --- src/parser/radio_link.rs | 4 ++-- src/parser/text_markup.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index 8754ee2..5c8d773 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -185,9 +185,9 @@ mod tests { .get(1) .expect("Len already asserted to be 3"), &Object::RadioLink(RadioLink { - source: "*bar*", + source: "*bar* ", children: vec![Object::Bold(Bold { - source: "*bar*", + source: "*bar* ", children: vec![Object::PlainText(PlainText { source: "bar" })] })] }) diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index d5bd200..f2d85f3 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -291,5 +291,5 @@ fn _rematch_text_markup_object<'r, 's, 'x>( let (remaining, _close) = text_markup_end_specialized(context, remaining)?; let (remaining, _trailing_whitespace) = space0(remaining)?; - Ok((remaining, Vec::new())) + Ok((remaining, children)) } From ef2c3516966a65e81f475a08dadfb06254c8adc4 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 18:36:25 -0400 Subject: [PATCH 16/28] Expect fail the last radio link test. --- build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/build.rs b/build.rs index 0d45c7c..c4305c3 100644 --- a/build.rs +++ b/build.rs @@ -81,6 +81,7 @@ fn is_expect_fail(name: &str) -> Option<&str> { "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), "radio_link_before_and_after" => Some("Matching the contents of radio targets not yet implemented."), "radio_link_simple" => Some("Matching the contents of radio targets not yet implemented."), + "radio_link_identical_or_semantically_identical" => Some("Would require having the 2-pass parsing implemented."), _ => None, } } From bd04451d5852ebd8ae8d77f25ab5bdfe5cf76f2b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 19:06:58 -0400 Subject: [PATCH 17/28] Implement the second parsing pass. --- src/parser/document.rs | 46 ++++++++++++++++++++++++++++-------- src/parser/parser_context.rs | 2 +- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 98e570c..719b53b 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -96,8 +96,35 @@ pub fn document(input: &str) -> Res<&str, Document> { let initial_context: ContextTree<'_, '_> = ContextTree::new(); let document_context = initial_context.with_additional_node(ContextElement::DocumentRoot(input)); - let zeroth_section_matcher = parser_with_context!(zeroth_section)(&document_context); - let heading_matcher = parser_with_context!(heading)(&document_context); + let (remaining, document) = _document(&document_context, input)?; + { + // If there are radio targets in this document then we need to parse the entire document again with the knowledge of the radio targets. + let all_radio_targets: Vec<&Vec>> = document + .iter_tokens() + .filter_map(|tkn| match tkn { + Token::Object(obj) => Some(obj), + _ => None, + }) + .filter_map(|obj| match obj { + Object::RadioTarget(rt) => Some(rt), + _ => None, + }) + .map(|rt| &rt.children) + .collect(); + if !all_radio_targets.is_empty() { + let document_context = document_context + .with_additional_node(ContextElement::RadioTarget(all_radio_targets)); + let (remaining, document) = _document(&document_context, input)?; + return Ok((remaining, document)); + } + } + Ok((remaining, document)) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn _document<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Document<'s>> { + let zeroth_section_matcher = parser_with_context!(zeroth_section)(context); + let heading_matcher = parser_with_context!(heading)(context); let (remaining, _blank_lines) = many0(blank_line)(input)?; let (remaining, zeroth_section) = opt(zeroth_section_matcher)(remaining)?; let (remaining, children) = many0(heading_matcher)(remaining)?; @@ -268,14 +295,13 @@ impl<'s> Document<'s> { impl<'s> Heading<'s> { pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { - self.title.iter().map(Token::Object).chain(self.children.iter().map( - |de| { - match de { - DocumentElement::Heading(obj) => Token::Heading(obj), - DocumentElement::Section(obj) => Token::Section(obj), - } - } - )) + self.title + .iter() + .map(Token::Object) + .chain(self.children.iter().map(|de| match de { + DocumentElement::Heading(obj) => Token::Heading(obj), + DocumentElement::Section(obj) => Token::Section(obj), + })) } } diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 616b983..c49e099 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -140,7 +140,7 @@ pub enum ContextElement<'r, 's> { /// If any are found, this will force a 2nd parse through the /// org-mode document since text needs to be re-parsed to look for /// radio links matching the contents of radio targets. - RadioTarget(Vec>>), + RadioTarget(Vec<&'r Vec>>), } pub struct ExitMatcherNode<'r> { From 688779ba40e45e6cbe3daa2d21b55869d18743d1 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 19:09:54 -0400 Subject: [PATCH 18/28] Fix tests. --- src/parser/radio_link.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index 5c8d773..e51927f 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -131,12 +131,11 @@ mod tests { #[test] fn plain_text_radio_target() { let input = "foo bar baz"; + let radio_target_match = vec![Object::PlainText(PlainText { source: "bar" })]; let initial_context: ContextTree<'_, '_> = ContextTree::new(); let document_context = initial_context .with_additional_node(ContextElement::DocumentRoot(input)) - .with_additional_node(ContextElement::RadioTarget(vec![vec![Object::PlainText( - PlainText { source: "bar" }, - )]])); + .with_additional_node(ContextElement::RadioTarget(vec![&radio_target_match])); let paragraph_matcher = parser_with_context!(element(true))(&document_context); let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph"); let first_paragraph = match first_paragraph { @@ -161,15 +160,14 @@ mod tests { #[test] fn bold_radio_target() { let input = "foo *bar* baz"; + let radio_target_match = vec![Object::Bold(Bold { + source: "*bar*", + children: vec![Object::PlainText(PlainText { source: "bar" })], + })]; let initial_context: ContextTree<'_, '_> = ContextTree::new(); let document_context = initial_context .with_additional_node(ContextElement::DocumentRoot(input)) - .with_additional_node(ContextElement::RadioTarget(vec![vec![Object::Bold( - Bold { - source: "*bar*", - children: vec![Object::PlainText(PlainText { source: "bar" })], - }, - )]])); + .with_additional_node(ContextElement::RadioTarget(vec![&radio_target_match])); let paragraph_matcher = parser_with_context!(element(true))(&document_context); let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph"); let first_paragraph = match first_paragraph { From 76187a0cb9932adce5752e97e1cabf3e6ad6178d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 19:11:51 -0400 Subject: [PATCH 19/28] Enable radio_link_simple test. This test does not yet pass, but this is goal-setting. --- build.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/build.rs b/build.rs index c4305c3..c780b33 100644 --- a/build.rs +++ b/build.rs @@ -80,7 +80,6 @@ fn is_expect_fail(name: &str) -> Option<&str> { "element_container_priority_section_greater_block" => Some("Need to implement subscript."), "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), "radio_link_before_and_after" => Some("Matching the contents of radio targets not yet implemented."), - "radio_link_simple" => Some("Matching the contents of radio targets not yet implemented."), "radio_link_identical_or_semantically_identical" => Some("Would require having the 2-pass parsing implemented."), _ => None, } From 0073af19e283910c8ad337329fdcb256f327d2bf Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 19:54:41 -0400 Subject: [PATCH 20/28] Running into an issue returning different iterators from the same function. --- src/parser/element.rs | 1 + src/parser/token.rs | 27 +++++++++++++++++++++++++++ toy_language.txt | 4 +--- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/parser/element.rs b/src/parser/element.rs index 3052ece..27efa21 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -20,6 +20,7 @@ use super::lesser_element::SrcBlock; use super::lesser_element::VerseBlock; use super::source::SetSource; use super::source::Source; +use super::token::Token; use super::Drawer; #[derive(Debug)] diff --git a/src/parser/token.rs b/src/parser/token.rs index 6685773..7f02a93 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -3,10 +3,37 @@ use super::Element; use super::Heading; use super::Object; use super::Section; +use crate::parser::DocumentElement; pub enum Token<'r, 's> { + Document(&'r Document<'s>), Heading(&'r Heading<'s>), Section(&'r Section<'s>), Object(&'r Object<'s>), Element(&'r Element<'s>), } + +impl<'r, 's> Token<'r, 's> { + pub fn iter_tokens(&self) -> impl Iterator> { + match self { + Token::Document(document) => document + .zeroth_section + .iter() + .map(Token::Section) + .chain(document.children.iter().map(Token::Heading)), + Token::Heading(heading) => { + heading + .title + .iter() + .map(Token::Object) + .chain(heading.children.iter().map(|de| match de { + DocumentElement::Heading(ref obj) => Token::Heading(obj), + DocumentElement::Section(ref obj) => Token::Section(obj), + })) + } + Token::Section(section) => section.children.iter().map(Token::Element), + Token::Object(_) => panic!(), + Token::Element(_) => panic!(), + } + } +} diff --git a/toy_language.txt b/toy_language.txt index 64ee871..a29169b 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -1,3 +1 @@ -foo *bar /baz *lorem* ipsum/ dolar* alpha - -foo *bar /baz _lorem_ ipsum/ dolar* alpha +foo <<<*bar* baz>>> lorem ipsum *bar* baz dolar. From 793e560bd545295d67c281a2356fc06bbc35c2f9 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 19:57:27 -0400 Subject: [PATCH 21/28] Boxing made it work. --- src/parser/token.rs | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/parser/token.rs b/src/parser/token.rs index 7f02a93..e7c3a97 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -14,24 +14,22 @@ pub enum Token<'r, 's> { } impl<'r, 's> Token<'r, 's> { - pub fn iter_tokens(&self) -> impl Iterator> { + pub fn iter_tokens(&self) -> Box> + '_> { match self { - Token::Document(document) => document - .zeroth_section - .iter() - .map(Token::Section) - .chain(document.children.iter().map(Token::Heading)), - Token::Heading(heading) => { - heading - .title + Token::Document(document) => Box::new( + document + .zeroth_section .iter() - .map(Token::Object) - .chain(heading.children.iter().map(|de| match de { - DocumentElement::Heading(ref obj) => Token::Heading(obj), - DocumentElement::Section(ref obj) => Token::Section(obj), - })) - } - Token::Section(section) => section.children.iter().map(Token::Element), + .map(Token::Section) + .chain(document.children.iter().map(Token::Heading)), + ), + Token::Heading(heading) => Box::new(heading.title.iter().map(Token::Object).chain( + heading.children.iter().map(|de| match de { + DocumentElement::Heading(ref obj) => Token::Heading(obj), + DocumentElement::Section(ref obj) => Token::Section(obj), + }), + )), + Token::Section(section) => Box::new(section.children.iter().map(Token::Element)), Token::Object(_) => panic!(), Token::Element(_) => panic!(), } From 0e73b83bf39925a0a53cd6b7a809f73fe51a2337 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 20:09:24 -0400 Subject: [PATCH 22/28] Filling in more of the iter_tokens tree. --- src/parser/token.rs | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/parser/token.rs b/src/parser/token.rs index e7c3a97..028f859 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -2,6 +2,7 @@ use super::Document; use super::Element; use super::Heading; use super::Object; +use super::PlainListItem; use super::Section; use crate::parser::DocumentElement; @@ -11,6 +12,7 @@ pub enum Token<'r, 's> { Section(&'r Section<'s>), Object(&'r Object<'s>), Element(&'r Element<'s>), + PlainListItem(&'r PlainListItem<'s>), } impl<'r, 's> Token<'r, 's> { @@ -30,8 +32,47 @@ impl<'r, 's> Token<'r, 's> { }), )), Token::Section(section) => Box::new(section.children.iter().map(Token::Element)), - Token::Object(_) => panic!(), - Token::Element(_) => panic!(), + Token::Object(obj) => match obj { + Object::Bold(_) => todo!(), + Object::Italic(_) => todo!(), + Object::Underline(_) => todo!(), + Object::StrikeThrough(_) => todo!(), + Object::Code(_) => todo!(), + Object::Verbatim(_) => todo!(), + Object::PlainText(_) => todo!(), + Object::RegularLink(_) => todo!(), + Object::RadioLink(_) => todo!(), + Object::RadioTarget(_) => todo!(), + Object::PlainLink(_) => todo!(), + Object::AngleLink(_) => todo!(), + Object::OrgMacro(_) => todo!(), + }, + Token::Element(elem) => match elem { + Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)), + Element::PlainList(inner) => { + Box::new(inner.children.iter().map(Token::PlainListItem)) + } + Element::GreaterBlock(inner) => Box::new(inner.children.iter().map(Token::Element)), + Element::DynamicBlock(_) => todo!(), + Element::FootnoteDefinition(_) => todo!(), + Element::Comment(_) => todo!(), + Element::Drawer(_) => todo!(), + Element::PropertyDrawer(_) => todo!(), + Element::Table(_) => todo!(), + Element::VerseBlock(_) => todo!(), + Element::CommentBlock(_) => todo!(), + Element::ExampleBlock(_) => todo!(), + Element::ExportBlock(_) => todo!(), + Element::SrcBlock(_) => todo!(), + Element::Clock(_) => todo!(), + Element::DiarySexp(_) => todo!(), + Element::Planning(_) => todo!(), + Element::FixedWidthArea(_) => todo!(), + Element::HorizontalRule(_) => todo!(), + Element::Keyword(_) => todo!(), + Element::LatexEnvironment(_) => todo!(), + }, + Token::PlainListItem(elem) => Box::new(elem.children.iter().map(Token::Element)), } } } From 08e6efe5f59302a552cea2732917453c411dad8e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 20:18:30 -0400 Subject: [PATCH 23/28] Filling in more of the iter_tokens tree. --- src/parser/token.rs | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/parser/token.rs b/src/parser/token.rs index 028f859..dbfbdda 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -4,6 +4,8 @@ use super::Heading; use super::Object; use super::PlainListItem; use super::Section; +use super::TableCell; +use super::TableRow; use crate::parser::DocumentElement; pub enum Token<'r, 's> { @@ -13,6 +15,8 @@ pub enum Token<'r, 's> { Object(&'r Object<'s>), Element(&'r Element<'s>), PlainListItem(&'r PlainListItem<'s>), + TableRow(&'r TableRow<'s>), + TableCell(&'r TableCell<'s>), } impl<'r, 's> Token<'r, 's> { @@ -53,26 +57,30 @@ impl<'r, 's> Token<'r, 's> { Box::new(inner.children.iter().map(Token::PlainListItem)) } Element::GreaterBlock(inner) => Box::new(inner.children.iter().map(Token::Element)), - Element::DynamicBlock(_) => todo!(), - Element::FootnoteDefinition(_) => todo!(), - Element::Comment(_) => todo!(), - Element::Drawer(_) => todo!(), - Element::PropertyDrawer(_) => todo!(), - Element::Table(_) => todo!(), - Element::VerseBlock(_) => todo!(), - Element::CommentBlock(_) => todo!(), - Element::ExampleBlock(_) => todo!(), - Element::ExportBlock(_) => todo!(), - Element::SrcBlock(_) => todo!(), - Element::Clock(_) => todo!(), - Element::DiarySexp(_) => todo!(), - Element::Planning(_) => todo!(), - Element::FixedWidthArea(_) => todo!(), - Element::HorizontalRule(_) => todo!(), - Element::Keyword(_) => todo!(), - Element::LatexEnvironment(_) => todo!(), + Element::DynamicBlock(inner) => Box::new(inner.children.iter().map(Token::Element)), + Element::FootnoteDefinition(inner) => { + Box::new(inner.children.iter().map(Token::Element)) + } + Element::Comment(_) => Box::new(std::iter::empty()), + Element::Drawer(inner) => Box::new(inner.children.iter().map(Token::Element)), + Element::PropertyDrawer(_) => Box::new(std::iter::empty()), + Element::Table(inner) => Box::new(inner.children.iter().map(Token::TableRow)), + Element::VerseBlock(inner) => Box::new(inner.children.iter().map(Token::Object)), + Element::CommentBlock(_) => Box::new(std::iter::empty()), + Element::ExampleBlock(_) => Box::new(std::iter::empty()), + Element::ExportBlock(_) => Box::new(std::iter::empty()), + Element::SrcBlock(_) => Box::new(std::iter::empty()), + Element::Clock(_) => Box::new(std::iter::empty()), + Element::DiarySexp(_) => Box::new(std::iter::empty()), + Element::Planning(_) => Box::new(std::iter::empty()), + Element::FixedWidthArea(_) => Box::new(std::iter::empty()), + Element::HorizontalRule(_) => Box::new(std::iter::empty()), + Element::Keyword(_) => Box::new(std::iter::empty()), + Element::LatexEnvironment(_) => Box::new(std::iter::empty()), }, Token::PlainListItem(elem) => Box::new(elem.children.iter().map(Token::Element)), + Token::TableRow(elem) => Box::new(elem.children.iter().map(Token::TableCell)), + Token::TableCell(elem) => Box::new(elem.children.iter().map(Token::Object)), } } } From b27f911ff3879fe7ae599696169d09c827adebc6 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 20:24:06 -0400 Subject: [PATCH 24/28] Finish implementing token iteration. --- src/parser/token.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/parser/token.rs b/src/parser/token.rs index dbfbdda..5dc7e20 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -37,19 +37,19 @@ impl<'r, 's> Token<'r, 's> { )), Token::Section(section) => Box::new(section.children.iter().map(Token::Element)), Token::Object(obj) => match obj { - Object::Bold(_) => todo!(), - Object::Italic(_) => todo!(), - Object::Underline(_) => todo!(), - Object::StrikeThrough(_) => todo!(), - Object::Code(_) => todo!(), - Object::Verbatim(_) => todo!(), - Object::PlainText(_) => todo!(), - Object::RegularLink(_) => todo!(), - Object::RadioLink(_) => todo!(), - Object::RadioTarget(_) => todo!(), - Object::PlainLink(_) => todo!(), - Object::AngleLink(_) => todo!(), - Object::OrgMacro(_) => todo!(), + Object::Bold(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::Italic(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::Underline(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::StrikeThrough(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::Code(_) => Box::new(std::iter::empty()), + Object::Verbatim(_) => Box::new(std::iter::empty()), + Object::PlainText(_) => Box::new(std::iter::empty()), + Object::RegularLink(_) => Box::new(std::iter::empty()), + Object::RadioLink(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::RadioTarget(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::PlainLink(_) => Box::new(std::iter::empty()), + Object::AngleLink(_) => Box::new(std::iter::empty()), + Object::OrgMacro(_) => Box::new(std::iter::empty()), }, Token::Element(elem) => match elem { Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)), From e608b73d1a8635564daa5605564a33ee1cee2e31 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 20:45:31 -0400 Subject: [PATCH 25/28] Implement all-token iteration. Radio targets are now being properly detected and they trigger re-parses but the tests do not yet pass. --- src/parser/document.rs | 24 +---------- src/parser/token.rs | 91 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 22 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 719b53b..9a08331 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -18,6 +18,7 @@ use super::element::Element; use super::object::Object; use super::parser_with_context::parser_with_context; use super::source::Source; +use super::token::AllTokensIterator; use super::token::Token; use super::util::exit_matcher_parser; use super::util::get_consumed; @@ -286,27 +287,6 @@ fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s st impl<'s> Document<'s> { pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { - self.zeroth_section - .iter() - .map(Token::Section) - .chain(self.children.iter().map(Token::Heading)) - } -} - -impl<'s> Heading<'s> { - pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { - self.title - .iter() - .map(Token::Object) - .chain(self.children.iter().map(|de| match de { - DocumentElement::Heading(obj) => Token::Heading(obj), - DocumentElement::Section(obj) => Token::Section(obj), - })) - } -} - -impl<'s> Section<'s> { - pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { - self.children.iter().map(Token::Element) + AllTokensIterator::new(Token::Document(self)) } } diff --git a/src/parser/token.rs b/src/parser/token.rs index 5dc7e20..940697e 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -1,3 +1,5 @@ +use std::collections::VecDeque; + use super::Document; use super::Element; use super::Heading; @@ -83,4 +85,93 @@ impl<'r, 's> Token<'r, 's> { Token::TableCell(elem) => Box::new(elem.children.iter().map(Token::Object)), } } + + pub fn all_tokens_no_order(&self) -> Box> + '_> { + match self { + Token::Document(document) => Box::new( + document + .zeroth_section + .iter() + .map(Token::Section) + .chain(document.children.iter().map(Token::Heading)), + ), + Token::Heading(heading) => Box::new(heading.title.iter().map(Token::Object).chain( + heading.children.iter().map(|de| match de { + DocumentElement::Heading(ref obj) => Token::Heading(obj), + DocumentElement::Section(ref obj) => Token::Section(obj), + }), + )), + Token::Section(section) => Box::new(section.children.iter().map(Token::Element)), + Token::Object(obj) => match obj { + Object::Bold(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::Italic(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::Underline(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::StrikeThrough(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::Code(_) => Box::new(std::iter::empty()), + Object::Verbatim(_) => Box::new(std::iter::empty()), + Object::PlainText(_) => Box::new(std::iter::empty()), + Object::RegularLink(_) => Box::new(std::iter::empty()), + Object::RadioLink(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::RadioTarget(inner) => Box::new(inner.children.iter().map(Token::Object)), + Object::PlainLink(_) => Box::new(std::iter::empty()), + Object::AngleLink(_) => Box::new(std::iter::empty()), + Object::OrgMacro(_) => Box::new(std::iter::empty()), + }, + Token::Element(elem) => match elem { + Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)), + Element::PlainList(inner) => { + Box::new(inner.children.iter().map(Token::PlainListItem)) + } + Element::GreaterBlock(inner) => Box::new(inner.children.iter().map(Token::Element)), + Element::DynamicBlock(inner) => Box::new(inner.children.iter().map(Token::Element)), + Element::FootnoteDefinition(inner) => { + Box::new(inner.children.iter().map(Token::Element)) + } + Element::Comment(_) => Box::new(std::iter::empty()), + Element::Drawer(inner) => Box::new(inner.children.iter().map(Token::Element)), + Element::PropertyDrawer(_) => Box::new(std::iter::empty()), + Element::Table(inner) => Box::new(inner.children.iter().map(Token::TableRow)), + Element::VerseBlock(inner) => Box::new(inner.children.iter().map(Token::Object)), + Element::CommentBlock(_) => Box::new(std::iter::empty()), + Element::ExampleBlock(_) => Box::new(std::iter::empty()), + Element::ExportBlock(_) => Box::new(std::iter::empty()), + Element::SrcBlock(_) => Box::new(std::iter::empty()), + Element::Clock(_) => Box::new(std::iter::empty()), + Element::DiarySexp(_) => Box::new(std::iter::empty()), + Element::Planning(_) => Box::new(std::iter::empty()), + Element::FixedWidthArea(_) => Box::new(std::iter::empty()), + Element::HorizontalRule(_) => Box::new(std::iter::empty()), + Element::Keyword(_) => Box::new(std::iter::empty()), + Element::LatexEnvironment(_) => Box::new(std::iter::empty()), + }, + Token::PlainListItem(elem) => Box::new(elem.children.iter().map(Token::Element)), + Token::TableRow(elem) => Box::new(elem.children.iter().map(Token::TableCell)), + Token::TableCell(elem) => Box::new(elem.children.iter().map(Token::Object)), + } + } +} + +pub struct AllTokensIterator<'r, 's> { + queued_tokens: VecDeque>, +} + +impl<'r, 's> AllTokensIterator<'r, 's> { + pub fn new(tkn: Token<'r, 's>) -> Self { + let mut queued_tokens = VecDeque::new(); + queued_tokens.push_back(tkn); + AllTokensIterator { queued_tokens } + } +} + +impl<'r, 's> Iterator for AllTokensIterator<'r, 's> { + type Item = Token<'r, 's>; + + fn next(&mut self) -> Option { + let next_token = match self.queued_tokens.pop_front() { + Some(tkn) => tkn, + None => return None, + }; + self.queued_tokens.extend(next_token.iter_tokens()); + Some(next_token) + } } From 27d863b87527fa2676d58ef2362b9162d004d04b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 20:55:16 -0400 Subject: [PATCH 26/28] Fix the simple test by allowing bold to start/end with <> and by capturing trailing whitespace from radio links. --- src/parser/radio_link.rs | 1 + src/parser/text_markup.rs | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index e51927f..d091977 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -32,6 +32,7 @@ pub fn radio_link<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s for radio_target in radio_targets { let rematched_target = rematch_target(context, radio_target, input); if let Ok((remaining, rematched_target)) = rematched_target { + let (remaining, _) = space0(remaining)?; let source = get_consumed(input, remaining); return Ok(( remaining, diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index f2d85f3..f6fb816 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -205,7 +205,7 @@ pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> match preceding_character { // If None, we are at the start of the file which is technically the beginning of a line. None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(') - | Some('{') | Some('\'') | Some('"') => {} + | Some('{') | Some('\'') | Some('"') | Some('<') => {} Some(_) => { // Not at start of line, cannot be a heading return Err(nom::Err::Error(CustomError::MyError(MyError( @@ -218,7 +218,7 @@ pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> #[tracing::instrument(ret, level = "debug")] pub fn post<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { - let (remaining, _) = alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"")), line_ending))(input)?; + let (remaining, _) = alt((recognize(one_of(" \r\n\t-.,;:!?')}[\">")), line_ending))(input)?; Ok((remaining, ())) } From 167ffa650c2736230452fed384c4e6b1efaefccf Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 20:58:11 -0400 Subject: [PATCH 27/28] Enable another test. --- build.rs | 1 - src/parser/radio_link.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/build.rs b/build.rs index c780b33..bf10e49 100644 --- a/build.rs +++ b/build.rs @@ -79,7 +79,6 @@ fn is_expect_fail(name: &str) -> Option<&str> { "element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."), "element_container_priority_section_greater_block" => Some("Need to implement subscript."), "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), - "radio_link_before_and_after" => Some("Matching the contents of radio targets not yet implemented."), "radio_link_identical_or_semantically_identical" => Some("Would require having the 2-pass parsing implemented."), _ => None, } diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index d091977..57fe3f8 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -152,7 +152,7 @@ mod tests { .get(1) .expect("Len already asserted to be 3"), &Object::RadioLink(RadioLink { - source: "bar", + source: "bar ", children: vec![Object::PlainText(PlainText { source: "bar" })] }) ); From 97f956d0bf78cc614ae39ff5304d206c06bbb8b5 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Jul 2023 20:59:00 -0400 Subject: [PATCH 28/28] Enable another test. --- build.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/build.rs b/build.rs index bf10e49..bb99b66 100644 --- a/build.rs +++ b/build.rs @@ -79,7 +79,6 @@ fn is_expect_fail(name: &str) -> Option<&str> { "element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."), "element_container_priority_section_greater_block" => Some("Need to implement subscript."), "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), - "radio_link_identical_or_semantically_identical" => Some("Would require having the 2-pass parsing implemented."), _ => None, } }