Switch to handling the unescaping during the initial parsing.

This preserves the line ending characters unlike the rust .lines() iterator.
This commit is contained in:
Tom Alexander 2023-10-04 13:08:24 -04:00
parent afb43ff34f
commit 7ee48ff65c
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
2 changed files with 23 additions and 17 deletions

View File

@ -2,17 +2,22 @@ use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::combinator::consumed;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many_till;
use nom::multi::separated_list1;
use nom::sequence::preceded;
use nom::sequence::tuple;
use super::org_source::OrgSource;
@ -151,7 +156,10 @@ pub(crate) fn example_block<'b, 'g, 'r, 's>(
let parser_context = parser_context.with_additional_node(&contexts[2]);
let parameters = parameters.map(|(_, parameters)| parameters);
let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?;
let (remaining, contents) = many0(preceded(
not(parser_with_context!(exit_matcher_parser)(&parser_context)),
map(content_line, Into::<&str>::into),
))(remaining)?;
let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?;
let source = get_consumed(input, remaining);
@ -178,7 +186,7 @@ pub(crate) fn example_block<'b, 'g, 'r, 's>(
retain_labels,
use_labels,
label_format,
contents: contents.into(),
contents,
},
))
}
@ -431,3 +439,10 @@ fn switch_word<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
is_not(" \t\r\n"),
))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn content_line<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
let (remaining, _) = opt(tuple((space0, tag(","), peek(alt((tag("#+"), tag("*")))))))(input)?;
let (remaining, line_post_escape) = recognize(many_till(anychar, line_ending))(remaining)?;
Ok((remaining, line_post_escape))
}

View File

@ -45,7 +45,7 @@ pub struct ExampleBlock<'s> {
pub retain_labels: bool,
pub use_labels: bool,
pub label_format: Option<&'s str>,
pub contents: &'s str,
pub contents: Vec<&'s str>,
}
#[derive(Debug)]
@ -241,21 +241,12 @@ impl<'s> Comment<'s> {
impl<'s> ExampleBlock<'s> {
/// Get the inner contents of the ExampleBlock with the escaping commas removed.
pub fn get_contents(&self) -> String {
let mut ret = String::with_capacity(self.contents.len());
for line in self.contents.lines() {
let first_comma = line.find(",#+").or_else(|| line.find(",*"));
if let Some(first_comma) = first_comma {
let before_first_comma = &line[..first_comma];
let is_escaping_comma = before_first_comma.chars().all(char::is_whitespace);
if is_escaping_comma {
ret.push_str(&line[(first_comma + 1)..]);
} else {
ret.push_str(line);
}
} else {
ret.push_str(line);
}
let final_size = self.contents.iter().map(|line| line.len()).sum();
let mut ret = String::with_capacity(final_size);
for line in &self.contents {
ret.push_str(line);
}
ret
}
}