Switch to handling the unescaping during the initial parsing.

This preserves the line ending characters unlike the rust .lines() iterator.
This commit is contained in:
Tom Alexander 2023-10-04 13:08:24 -04:00
parent afb43ff34f
commit 7ee48ff65c
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
2 changed files with 23 additions and 17 deletions

View File

@ -2,17 +2,22 @@ use nom::branch::alt;
use nom::bytes::complete::is_not; use nom::bytes::complete::is_not;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case; use nom::bytes::complete::tag_no_case;
use nom::character::complete::anychar;
use nom::character::complete::line_ending; use nom::character::complete::line_ending;
use nom::character::complete::space0; use nom::character::complete::space0;
use nom::character::complete::space1; use nom::character::complete::space1;
use nom::combinator::consumed; use nom::combinator::consumed;
use nom::combinator::eof; use nom::combinator::eof;
use nom::combinator::map; use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt; use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize; use nom::combinator::recognize;
use nom::combinator::verify; use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many_till; use nom::multi::many_till;
use nom::multi::separated_list1; use nom::multi::separated_list1;
use nom::sequence::preceded;
use nom::sequence::tuple; use nom::sequence::tuple;
use super::org_source::OrgSource; use super::org_source::OrgSource;
@ -151,7 +156,10 @@ pub(crate) fn example_block<'b, 'g, 'r, 's>(
let parser_context = parser_context.with_additional_node(&contexts[2]); let parser_context = parser_context.with_additional_node(&contexts[2]);
let parameters = parameters.map(|(_, parameters)| parameters); let parameters = parameters.map(|(_, parameters)| parameters);
let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?; let (remaining, contents) = many0(preceded(
not(parser_with_context!(exit_matcher_parser)(&parser_context)),
map(content_line, Into::<&str>::into),
))(remaining)?;
let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?;
let source = get_consumed(input, remaining); let source = get_consumed(input, remaining);
@ -178,7 +186,7 @@ pub(crate) fn example_block<'b, 'g, 'r, 's>(
retain_labels, retain_labels,
use_labels, use_labels,
label_format, label_format,
contents: contents.into(), contents,
}, },
)) ))
} }
@ -431,3 +439,10 @@ fn switch_word<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
is_not(" \t\r\n"), is_not(" \t\r\n"),
))(input) ))(input)
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn content_line<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
let (remaining, _) = opt(tuple((space0, tag(","), peek(alt((tag("#+"), tag("*")))))))(input)?;
let (remaining, line_post_escape) = recognize(many_till(anychar, line_ending))(remaining)?;
Ok((remaining, line_post_escape))
}

View File

@ -45,7 +45,7 @@ pub struct ExampleBlock<'s> {
pub retain_labels: bool, pub retain_labels: bool,
pub use_labels: bool, pub use_labels: bool,
pub label_format: Option<&'s str>, pub label_format: Option<&'s str>,
pub contents: &'s str, pub contents: Vec<&'s str>,
} }
#[derive(Debug)] #[derive(Debug)]
@ -241,21 +241,12 @@ impl<'s> Comment<'s> {
impl<'s> ExampleBlock<'s> { impl<'s> ExampleBlock<'s> {
/// Get the inner contents of the ExampleBlock with the escaping commas removed. /// Get the inner contents of the ExampleBlock with the escaping commas removed.
pub fn get_contents(&self) -> String { pub fn get_contents(&self) -> String {
let mut ret = String::with_capacity(self.contents.len()); let final_size = self.contents.iter().map(|line| line.len()).sum();
for line in self.contents.lines() { let mut ret = String::with_capacity(final_size);
let first_comma = line.find(",#+").or_else(|| line.find(",*")); for line in &self.contents {
if let Some(first_comma) = first_comma {
let before_first_comma = &line[..first_comma];
let is_escaping_comma = before_first_comma.chars().all(char::is_whitespace);
if is_escaping_comma {
ret.push_str(&line[(first_comma + 1)..]);
} else {
ret.push_str(line); ret.push_str(line);
} }
} else {
ret.push_str(line);
}
}
ret ret
} }
} }