Do not parse the lesser block contents during parsing, but rather only if the contents are requested.
rust-test Build rust-test has failed Details
clippy Build clippy has succeeded Details
rust-foreign-document-test Build rust-foreign-document-test has succeeded Details
rust-build Build rust-build has succeeded Details

This seemed like an unnecessary allocation during parsing, especially considering we throw away some parses based on whether or not we found radio targets in the source.
This commit is contained in:
Tom Alexander 2023-10-31 18:13:21 -04:00
parent 93cfa71df2
commit a5627d0cee
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
3 changed files with 124 additions and 82 deletions

View File

@ -1576,7 +1576,7 @@ fn compare_example_block<'b, 's>(
[],
(
EmacsField::Required(":value"),
|r| Some(&r.contents),
|r| Some(r.get_contents()),
compare_property_quoted_string
),
(
@ -1654,7 +1654,7 @@ fn compare_export_block<'b, 's>(
),
(
EmacsField::Required(":value"),
|r| Some(&r.contents),
|r| Some(r.get_contents()),
compare_property_quoted_string
)
) {
@ -1702,7 +1702,7 @@ fn compare_src_block<'b, 's>(
),
(
EmacsField::Required(":value"),
|r| Some(&r.contents),
|r| Some(r.get_contents()),
compare_property_quoted_string
),
(

View File

@ -1,5 +1,3 @@
use std::borrow::Cow;
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
@ -204,7 +202,7 @@ where
let parser_context = parser_context.with_additional_node(&contexts[1]);
let parser_context = parser_context.with_additional_node(&contexts[2]);
let (remaining, contents) = content(&parser_context, remaining)?;
let (remaining, contents) = text_until_exit(&parser_context, remaining)?;
let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?;
let (remaining, _trailing_ws) =
@ -238,7 +236,7 @@ where
retain_labels,
use_labels,
label_format,
contents,
contents: Into::<&str>::into(contents),
},
))
}
@ -278,7 +276,7 @@ where
let parser_context = parser_context.with_additional_node(&contexts[1]);
let parser_context = parser_context.with_additional_node(&contexts[2]);
let (remaining, contents) = content(&parser_context, remaining)?;
let (remaining, contents) = text_until_exit(&parser_context, remaining)?;
let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?;
let (remaining, _trailing_ws) =
@ -294,7 +292,7 @@ where
),
export_type: export_type.map(Into::<&str>::into),
data: parameters.map(Into::<&str>::into),
contents,
contents: Into::<&str>::into(contents),
},
))
}
@ -333,7 +331,7 @@ where
let parser_context = context.with_additional_node(&contexts[0]);
let parser_context = parser_context.with_additional_node(&contexts[1]);
let parser_context = parser_context.with_additional_node(&contexts[2]);
let (remaining, contents) = content(&parser_context, remaining)?;
let (remaining, contents) = text_until_exit(&parser_context, remaining)?;
let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?;
let (remaining, _trailing_ws) =
@ -373,7 +371,7 @@ where
retain_labels,
use_labels,
label_format,
contents,
contents: Into::<&str>::into(contents),
},
))
}
@ -652,71 +650,3 @@ fn switch_word<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
is_not(" \t\r\n"),
))(input)
}
enum ContentState {
Normal,
Modified(String),
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(context))
)]
pub(crate) fn content<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Cow<'s, str>> {
let mut state = ContentState::Normal;
let mut remaining = input;
let exit_matcher_parser = parser_with_context!(exit_matcher_parser)(context);
loop {
if exit_matcher_parser(remaining).is_ok() {
break;
}
let (remain, (pre_escape_whitespace, line)) = content_line(remaining)?;
if let Some(val) = pre_escape_whitespace {
if let ContentState::Modified(ref mut ret) = state {
ret.push_str(Into::<&str>::into(val));
} else {
let mut ret = String::new();
ret.push_str(Into::<&str>::into(input.get_until(remaining)));
ret.push_str(Into::<&str>::into(val));
state = ContentState::Modified(ret);
}
}
if let ContentState::Modified(ref mut ret) = state {
ret.push_str(line.into());
}
remaining = remain;
}
match state {
ContentState::Normal => Ok((
remaining,
Cow::Borrowed(Into::<&str>::into(input.get_until(remaining))),
)),
ContentState::Modified(ret) => Ok((remaining, Cow::Owned(ret))),
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn content_line<'s>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, (Option<OrgSource<'s>>, OrgSource<'s>)> {
let (remaining, pre_escape_whitespace) = opt(map(
tuple((
recognize(tuple((
space0,
many_till(
tag(","),
peek(tuple((tag(","), alt((tag("#+"), tag("*")))))),
),
))),
tag(","),
)),
|(pre_comma, _)| pre_comma,
))(input)?;
let (remaining, line_post_escape) = recognize(many_till(anychar, line_ending))(remaining)?;
Ok((remaining, (pre_escape_whitespace, line_post_escape)))
}

View File

@ -1,11 +1,25 @@
use std::borrow::Cow;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::space0;
use nom::combinator::map;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::object::Object;
use super::AffiliatedKeywords;
use super::GetAffiliatedKeywords;
use super::PlainText;
use super::StandardProperties;
use super::Timestamp;
use crate::error::CustomError;
use crate::error::Res;
#[derive(Debug)]
pub struct Paragraph<'s> {
@ -61,7 +75,7 @@ pub struct ExampleBlock<'s> {
pub retain_labels: RetainLabels,
pub use_labels: bool,
pub label_format: Option<&'s str>,
pub contents: Cow<'s, str>,
pub contents: &'s str,
}
#[derive(Debug)]
@ -70,7 +84,7 @@ pub struct ExportBlock<'s> {
pub affiliated_keywords: AffiliatedKeywords<'s>,
pub export_type: Option<&'s str>,
pub data: Option<&'s str>,
pub contents: Cow<'s, str>,
pub contents: &'s str,
}
#[derive(Debug)]
@ -85,7 +99,7 @@ pub struct SrcBlock<'s> {
pub retain_labels: RetainLabels,
pub use_labels: bool,
pub label_format: Option<&'s str>,
pub contents: Cow<'s, str>,
pub contents: &'s str,
}
#[derive(Debug)]
@ -296,6 +310,13 @@ impl<'s> FixedWidthArea<'s> {
}
}
impl<'s> ExampleBlock<'s> {
/// Gets the contents of the lesser block, handling the escaping of lines with leading commas.
pub fn get_contents(&self) -> Cow<'s, str> {
lesser_block_content(self.contents).expect("This parser should never fail.")
}
}
impl<'s> ExportBlock<'s> {
/// Gets the export type capitalized.
///
@ -303,6 +324,18 @@ impl<'s> ExportBlock<'s> {
pub fn get_export_type(&self) -> Option<String> {
self.export_type.map(|s| s.to_uppercase())
}
/// Gets the contents of the lesser block, handling the escaping of lines with leading commas.
pub fn get_contents(&self) -> Cow<'s, str> {
lesser_block_content(self.contents).expect("This parser should never fail.")
}
}
impl<'s> SrcBlock<'s> {
/// Gets the contents of the lesser block, handling the escaping of lines with leading commas.
pub fn get_contents(&self) -> Cow<'s, str> {
lesser_block_content(self.contents).expect("This parser should never fail.")
}
}
impl<'s> GetAffiliatedKeywords<'s> for Paragraph<'s> {
@ -376,3 +409,82 @@ impl<'s> GetAffiliatedKeywords<'s> for VerseBlock<'s> {
&self.affiliated_keywords
}
}
enum ContentState {
Normal,
Modified(String),
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn lesser_block_content<'s>(input: &'s str) -> Result<Cow<'s, str>, CustomError> {
let mut state = ContentState::Normal;
let mut remaining = input;
loop {
if remaining.is_empty() {
break;
}
let (remain, (pre_escape_whitespace, line)) =
content_line(remaining).map_err(|err| match err {
nom::Err::Incomplete(_) => panic!("This parser does not support streaming."),
nom::Err::Error(e) => e,
nom::Err::Failure(e) => e,
})?;
if let Some(val) = pre_escape_whitespace {
if let ContentState::Modified(ref mut ret) = state {
ret.push_str(val);
} else {
let mut ret = String::new();
ret.push_str(get_str_until(input, remaining));
ret.push_str(val);
state = ContentState::Modified(ret);
}
}
if let ContentState::Modified(ref mut ret) = state {
ret.push_str(line);
}
remaining = remain;
}
match state {
ContentState::Normal => Ok(Cow::Borrowed(get_str_until(input, remaining))),
ContentState::Modified(ret) => Ok(Cow::Owned(ret)),
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn content_line<'s>(input: &'s str) -> Res<&'s str, (Option<&'s str>, &'s str)> {
let (remaining, pre_escape_whitespace) = opt(map(
tuple((
recognize(tuple((
space0,
many_till(
tag(","),
peek(tuple((tag(","), alt((tag("#+"), tag("*")))))),
),
))),
tag(","),
)),
|(pre_comma, _)| pre_comma,
))(input)?;
let (remaining, line_post_escape) = recognize(many_till(anychar, line_ending))(remaining)?;
Ok((remaining, (pre_escape_whitespace, line_post_escape)))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn get_str_until<'s>(parent: &'s str, child: &'s str) -> &'s str {
debug_assert!(is_slice_of(parent, child));
let parent_start = parent.as_ptr() as usize;
let child_start = child.as_ptr() as usize;
&parent[..(child_start - parent_start)]
}