Merge branch 'lazy_parse_lesser_block_contents'
All checks were successful
clippy Build clippy has succeeded
rustfmt Build rustfmt has succeeded
rust-build Build rust-build has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded
rust-test Build rust-test has succeeded

This commit is contained in:
Tom Alexander 2023-10-31 20:54:01 -04:00
commit 7dfe24ff98
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
3 changed files with 124 additions and 82 deletions

View File

@ -1576,7 +1576,7 @@ fn compare_example_block<'b, 's>(
[],
(
EmacsField::Required(":value"),
|r| Some(&r.contents),
|r| Some(r.get_contents()),
compare_property_quoted_string
),
(
@ -1654,7 +1654,7 @@ fn compare_export_block<'b, 's>(
),
(
EmacsField::Required(":value"),
|r| Some(&r.contents),
|r| Some(r.get_contents()),
compare_property_quoted_string
)
) {
@ -1702,7 +1702,7 @@ fn compare_src_block<'b, 's>(
),
(
EmacsField::Required(":value"),
|r| Some(&r.contents),
|r| Some(r.get_contents()),
compare_property_quoted_string
),
(

View File

@ -1,5 +1,3 @@
use std::borrow::Cow;
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
@ -204,7 +202,7 @@ where
let parser_context = parser_context.with_additional_node(&contexts[1]);
let parser_context = parser_context.with_additional_node(&contexts[2]);
let (remaining, contents) = content(&parser_context, remaining)?;
let (remaining, contents) = text_until_exit(&parser_context, remaining)?;
let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?;
let (remaining, _trailing_ws) =
@ -238,7 +236,7 @@ where
retain_labels,
use_labels,
label_format,
contents,
contents: Into::<&str>::into(contents),
},
))
}
@ -278,7 +276,7 @@ where
let parser_context = parser_context.with_additional_node(&contexts[1]);
let parser_context = parser_context.with_additional_node(&contexts[2]);
let (remaining, contents) = content(&parser_context, remaining)?;
let (remaining, contents) = text_until_exit(&parser_context, remaining)?;
let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?;
let (remaining, _trailing_ws) =
@ -294,7 +292,7 @@ where
),
export_type: export_type.map(Into::<&str>::into),
data: parameters.map(Into::<&str>::into),
contents,
contents: Into::<&str>::into(contents),
},
))
}
@ -333,7 +331,7 @@ where
let parser_context = context.with_additional_node(&contexts[0]);
let parser_context = parser_context.with_additional_node(&contexts[1]);
let parser_context = parser_context.with_additional_node(&contexts[2]);
let (remaining, contents) = content(&parser_context, remaining)?;
let (remaining, contents) = text_until_exit(&parser_context, remaining)?;
let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?;
let (remaining, _trailing_ws) =
@ -373,7 +371,7 @@ where
retain_labels,
use_labels,
label_format,
contents,
contents: Into::<&str>::into(contents),
},
))
}
@ -652,71 +650,3 @@ fn switch_word<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
is_not(" \t\r\n"),
))(input)
}
enum ContentState {
Normal,
Modified(String),
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(context))
)]
pub(crate) fn content<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Cow<'s, str>> {
let mut state = ContentState::Normal;
let mut remaining = input;
let exit_matcher_parser = parser_with_context!(exit_matcher_parser)(context);
loop {
if exit_matcher_parser(remaining).is_ok() {
break;
}
let (remain, (pre_escape_whitespace, line)) = content_line(remaining)?;
if let Some(val) = pre_escape_whitespace {
if let ContentState::Modified(ref mut ret) = state {
ret.push_str(Into::<&str>::into(val));
} else {
let mut ret = String::new();
ret.push_str(Into::<&str>::into(input.get_until(remaining)));
ret.push_str(Into::<&str>::into(val));
state = ContentState::Modified(ret);
}
}
if let ContentState::Modified(ref mut ret) = state {
ret.push_str(line.into());
}
remaining = remain;
}
match state {
ContentState::Normal => Ok((
remaining,
Cow::Borrowed(Into::<&str>::into(input.get_until(remaining))),
)),
ContentState::Modified(ret) => Ok((remaining, Cow::Owned(ret))),
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn content_line<'s>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, (Option<OrgSource<'s>>, OrgSource<'s>)> {
let (remaining, pre_escape_whitespace) = opt(map(
tuple((
recognize(tuple((
space0,
many_till(
tag(","),
peek(tuple((tag(","), alt((tag("#+"), tag("*")))))),
),
))),
tag(","),
)),
|(pre_comma, _)| pre_comma,
))(input)?;
let (remaining, line_post_escape) = recognize(many_till(anychar, line_ending))(remaining)?;
Ok((remaining, (pre_escape_whitespace, line_post_escape)))
}

View File

@ -1,11 +1,25 @@
use std::borrow::Cow;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::space0;
use nom::combinator::map;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::object::Object;
use super::AffiliatedKeywords;
use super::GetAffiliatedKeywords;
use super::PlainText;
use super::StandardProperties;
use super::Timestamp;
use crate::error::CustomError;
use crate::error::Res;
#[derive(Debug)]
pub struct Paragraph<'s> {
@ -61,7 +75,7 @@ pub struct ExampleBlock<'s> {
pub retain_labels: RetainLabels,
pub use_labels: bool,
pub label_format: Option<&'s str>,
pub contents: Cow<'s, str>,
pub contents: &'s str,
}
#[derive(Debug)]
@ -70,7 +84,7 @@ pub struct ExportBlock<'s> {
pub affiliated_keywords: AffiliatedKeywords<'s>,
pub export_type: Option<&'s str>,
pub data: Option<&'s str>,
pub contents: Cow<'s, str>,
pub contents: &'s str,
}
#[derive(Debug)]
@ -85,7 +99,7 @@ pub struct SrcBlock<'s> {
pub retain_labels: RetainLabels,
pub use_labels: bool,
pub label_format: Option<&'s str>,
pub contents: Cow<'s, str>,
pub contents: &'s str,
}
#[derive(Debug)]
@ -296,6 +310,13 @@ impl<'s> FixedWidthArea<'s> {
}
}
impl<'s> ExampleBlock<'s> {
/// Gets the contents of the lesser block, handling the escaping of lines with leading commas.
pub fn get_contents(&self) -> Cow<'s, str> {
lesser_block_content(self.contents).expect("This parser should never fail.")
}
}
impl<'s> ExportBlock<'s> {
/// Gets the export type capitalized.
///
@ -303,6 +324,18 @@ impl<'s> ExportBlock<'s> {
pub fn get_export_type(&self) -> Option<String> {
self.export_type.map(|s| s.to_uppercase())
}
/// Gets the contents of the lesser block, handling the escaping of lines with leading commas.
pub fn get_contents(&self) -> Cow<'s, str> {
lesser_block_content(self.contents).expect("This parser should never fail.")
}
}
impl<'s> SrcBlock<'s> {
/// Gets the contents of the lesser block, handling the escaping of lines with leading commas.
pub fn get_contents(&self) -> Cow<'s, str> {
lesser_block_content(self.contents).expect("This parser should never fail.")
}
}
impl<'s> GetAffiliatedKeywords<'s> for Paragraph<'s> {
@ -376,3 +409,82 @@ impl<'s> GetAffiliatedKeywords<'s> for VerseBlock<'s> {
&self.affiliated_keywords
}
}
enum ContentState {
Normal,
Modified(String),
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn lesser_block_content<'s>(input: &'s str) -> Result<Cow<'s, str>, CustomError> {
let mut state = ContentState::Normal;
let mut remaining = input;
loop {
if remaining.is_empty() {
break;
}
let (remain, (pre_escape_whitespace, line)) =
content_line(remaining).map_err(|err| match err {
nom::Err::Incomplete(_) => panic!("This parser does not support streaming."),
nom::Err::Error(e) => e,
nom::Err::Failure(e) => e,
})?;
if let Some(val) = pre_escape_whitespace {
if let ContentState::Modified(ref mut ret) = state {
ret.push_str(val);
} else {
let mut ret = String::new();
ret.push_str(get_str_until(input, remaining));
ret.push_str(val);
state = ContentState::Modified(ret);
}
}
if let ContentState::Modified(ref mut ret) = state {
ret.push_str(line);
}
remaining = remain;
}
match state {
ContentState::Normal => Ok(Cow::Borrowed(get_str_until(input, remaining))),
ContentState::Modified(ret) => Ok(Cow::Owned(ret)),
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn content_line<'s>(input: &'s str) -> Res<&'s str, (Option<&'s str>, &'s str)> {
let (remaining, pre_escape_whitespace) = opt(map(
tuple((
recognize(tuple((
space0,
many_till(
tag(","),
peek(tuple((tag(","), alt((tag("#+"), tag("*")))))),
),
))),
tag(","),
)),
|(pre_comma, _)| pre_comma,
))(input)?;
let (remaining, line_post_escape) = recognize(many_till(anychar, line_ending))(remaining)?;
Ok((remaining, (pre_escape_whitespace, line_post_escape)))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn get_str_until<'s>(parent: &'s str, child: &'s str) -> &'s str {
debug_assert!(is_slice_of(parent, child));
let parent_start = parent.as_ptr() as usize;
let child_start = child.as_ptr() as usize;
&parent[..(child_start - parent_start)]
}