From a5627d0ceea6e17e9d5947a6fb3a0e2b92dba9f5 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 31 Oct 2023 18:13:21 -0400 Subject: [PATCH] Do not parse the lesser block contents during parsing, but rather only if the contents are requested. This seemed like an unnecessary allocation during parsing, especially considering we throw away some parses based on whether or not we found radio targets in the source. --- src/compare/diff.rs | 6 +- src/parser/lesser_block.rs | 82 ++----------------------- src/types/lesser_element.rs | 118 +++++++++++++++++++++++++++++++++++- 3 files changed, 124 insertions(+), 82 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 6054b74..0dbf851 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1576,7 +1576,7 @@ fn compare_example_block<'b, 's>( [], ( EmacsField::Required(":value"), - |r| Some(&r.contents), + |r| Some(r.get_contents()), compare_property_quoted_string ), ( @@ -1654,7 +1654,7 @@ fn compare_export_block<'b, 's>( ), ( EmacsField::Required(":value"), - |r| Some(&r.contents), + |r| Some(r.get_contents()), compare_property_quoted_string ) ) { @@ -1702,7 +1702,7 @@ fn compare_src_block<'b, 's>( ), ( EmacsField::Required(":value"), - |r| Some(&r.contents), + |r| Some(r.get_contents()), compare_property_quoted_string ), ( diff --git a/src/parser/lesser_block.rs b/src/parser/lesser_block.rs index bc2a9b6..1241cf2 100644 --- a/src/parser/lesser_block.rs +++ b/src/parser/lesser_block.rs @@ -1,5 +1,3 @@ -use std::borrow::Cow; - use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag; @@ -204,7 +202,7 @@ where let parser_context = parser_context.with_additional_node(&contexts[1]); let parser_context = parser_context.with_additional_node(&contexts[2]); - let (remaining, contents) = content(&parser_context, remaining)?; + let (remaining, contents) = text_until_exit(&parser_context, remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let (remaining, _trailing_ws) = @@ -238,7 +236,7 @@ where retain_labels, use_labels, label_format, - contents, + contents: Into::<&str>::into(contents), }, )) } @@ -278,7 +276,7 @@ where let parser_context = parser_context.with_additional_node(&contexts[1]); let parser_context = parser_context.with_additional_node(&contexts[2]); - let (remaining, contents) = content(&parser_context, remaining)?; + let (remaining, contents) = text_until_exit(&parser_context, remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let (remaining, _trailing_ws) = @@ -294,7 +292,7 @@ where ), export_type: export_type.map(Into::<&str>::into), data: parameters.map(Into::<&str>::into), - contents, + contents: Into::<&str>::into(contents), }, )) } @@ -333,7 +331,7 @@ where let parser_context = context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); let parser_context = parser_context.with_additional_node(&contexts[2]); - let (remaining, contents) = content(&parser_context, remaining)?; + let (remaining, contents) = text_until_exit(&parser_context, remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let (remaining, _trailing_ws) = @@ -373,7 +371,7 @@ where retain_labels, use_labels, label_format, - contents, + contents: Into::<&str>::into(contents), }, )) } @@ -652,71 +650,3 @@ fn switch_word<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { is_not(" \t\r\n"), ))(input) } - -enum ContentState { - Normal, - Modified(String), -} - -#[cfg_attr( - feature = "tracing", - tracing::instrument(ret, level = "debug", skip(context)) -)] -pub(crate) fn content<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, Cow<'s, str>> { - let mut state = ContentState::Normal; - let mut remaining = input; - let exit_matcher_parser = parser_with_context!(exit_matcher_parser)(context); - loop { - if exit_matcher_parser(remaining).is_ok() { - break; - } - - let (remain, (pre_escape_whitespace, line)) = content_line(remaining)?; - if let Some(val) = pre_escape_whitespace { - if let ContentState::Modified(ref mut ret) = state { - ret.push_str(Into::<&str>::into(val)); - } else { - let mut ret = String::new(); - ret.push_str(Into::<&str>::into(input.get_until(remaining))); - ret.push_str(Into::<&str>::into(val)); - state = ContentState::Modified(ret); - } - } - if let ContentState::Modified(ref mut ret) = state { - ret.push_str(line.into()); - } - remaining = remain; - } - - match state { - ContentState::Normal => Ok(( - remaining, - Cow::Borrowed(Into::<&str>::into(input.get_until(remaining))), - )), - ContentState::Modified(ret) => Ok((remaining, Cow::Owned(ret))), - } -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn content_line<'s>( - input: OrgSource<'s>, -) -> Res, (Option>, OrgSource<'s>)> { - let (remaining, pre_escape_whitespace) = opt(map( - tuple(( - recognize(tuple(( - space0, - many_till( - tag(","), - peek(tuple((tag(","), alt((tag("#+"), tag("*")))))), - ), - ))), - tag(","), - )), - |(pre_comma, _)| pre_comma, - ))(input)?; - let (remaining, line_post_escape) = recognize(many_till(anychar, line_ending))(remaining)?; - Ok((remaining, (pre_escape_whitespace, line_post_escape))) -} diff --git a/src/types/lesser_element.rs b/src/types/lesser_element.rs index 868c175..41097e4 100644 --- a/src/types/lesser_element.rs +++ b/src/types/lesser_element.rs @@ -1,11 +1,25 @@ use std::borrow::Cow; +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::anychar; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::combinator::map; +use nom::combinator::opt; +use nom::combinator::peek; +use nom::combinator::recognize; +use nom::multi::many_till; +use nom::sequence::tuple; + use super::object::Object; use super::AffiliatedKeywords; use super::GetAffiliatedKeywords; use super::PlainText; use super::StandardProperties; use super::Timestamp; +use crate::error::CustomError; +use crate::error::Res; #[derive(Debug)] pub struct Paragraph<'s> { @@ -61,7 +75,7 @@ pub struct ExampleBlock<'s> { pub retain_labels: RetainLabels, pub use_labels: bool, pub label_format: Option<&'s str>, - pub contents: Cow<'s, str>, + pub contents: &'s str, } #[derive(Debug)] @@ -70,7 +84,7 @@ pub struct ExportBlock<'s> { pub affiliated_keywords: AffiliatedKeywords<'s>, pub export_type: Option<&'s str>, pub data: Option<&'s str>, - pub contents: Cow<'s, str>, + pub contents: &'s str, } #[derive(Debug)] @@ -85,7 +99,7 @@ pub struct SrcBlock<'s> { pub retain_labels: RetainLabels, pub use_labels: bool, pub label_format: Option<&'s str>, - pub contents: Cow<'s, str>, + pub contents: &'s str, } #[derive(Debug)] @@ -296,6 +310,13 @@ impl<'s> FixedWidthArea<'s> { } } +impl<'s> ExampleBlock<'s> { + /// Gets the contents of the lesser block, handling the escaping of lines with leading commas. + pub fn get_contents(&self) -> Cow<'s, str> { + lesser_block_content(self.contents).expect("This parser should never fail.") + } +} + impl<'s> ExportBlock<'s> { /// Gets the export type capitalized. /// @@ -303,6 +324,18 @@ impl<'s> ExportBlock<'s> { pub fn get_export_type(&self) -> Option { self.export_type.map(|s| s.to_uppercase()) } + + /// Gets the contents of the lesser block, handling the escaping of lines with leading commas. + pub fn get_contents(&self) -> Cow<'s, str> { + lesser_block_content(self.contents).expect("This parser should never fail.") + } +} + +impl<'s> SrcBlock<'s> { + /// Gets the contents of the lesser block, handling the escaping of lines with leading commas. + pub fn get_contents(&self) -> Cow<'s, str> { + lesser_block_content(self.contents).expect("This parser should never fail.") + } } impl<'s> GetAffiliatedKeywords<'s> for Paragraph<'s> { @@ -376,3 +409,82 @@ impl<'s> GetAffiliatedKeywords<'s> for VerseBlock<'s> { &self.affiliated_keywords } } + +enum ContentState { + Normal, + Modified(String), +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn lesser_block_content<'s>(input: &'s str) -> Result, CustomError> { + let mut state = ContentState::Normal; + let mut remaining = input; + loop { + if remaining.is_empty() { + break; + } + + let (remain, (pre_escape_whitespace, line)) = + content_line(remaining).map_err(|err| match err { + nom::Err::Incomplete(_) => panic!("This parser does not support streaming."), + nom::Err::Error(e) => e, + nom::Err::Failure(e) => e, + })?; + if let Some(val) = pre_escape_whitespace { + if let ContentState::Modified(ref mut ret) = state { + ret.push_str(val); + } else { + let mut ret = String::new(); + ret.push_str(get_str_until(input, remaining)); + ret.push_str(val); + state = ContentState::Modified(ret); + } + } + if let ContentState::Modified(ref mut ret) = state { + ret.push_str(line); + } + remaining = remain; + } + + match state { + ContentState::Normal => Ok(Cow::Borrowed(get_str_until(input, remaining))), + ContentState::Modified(ret) => Ok(Cow::Owned(ret)), + } +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn content_line<'s>(input: &'s str) -> Res<&'s str, (Option<&'s str>, &'s str)> { + let (remaining, pre_escape_whitespace) = opt(map( + tuple(( + recognize(tuple(( + space0, + many_till( + tag(","), + peek(tuple((tag(","), alt((tag("#+"), tag("*")))))), + ), + ))), + tag(","), + )), + |(pre_comma, _)| pre_comma, + ))(input)?; + let (remaining, line_post_escape) = recognize(many_till(anychar, line_ending))(remaining)?; + Ok((remaining, (pre_escape_whitespace, line_post_escape))) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +/// Check if the child string slice is a slice of the parent string slice. +fn is_slice_of(parent: &str, child: &str) -> bool { + let parent_start = parent.as_ptr() as usize; + let parent_end = parent_start + parent.len(); + let child_start = child.as_ptr() as usize; + let child_end = child_start + child.len(); + child_start >= parent_start && child_end <= parent_end +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn get_str_until<'s>(parent: &'s str, child: &'s str) -> &'s str { + debug_assert!(is_slice_of(parent, child)); + let parent_start = parent.as_ptr() as usize; + let child_start = child.as_ptr() as usize; + &parent[..(child_start - parent_start)] +}