Regurgitate seems to have made all text a paragraph.

This commit is contained in:
Tom Alexander 2023-03-27 19:12:20 -04:00
parent 3643f91bac
commit 9545990b52
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
2 changed files with 37 additions and 0 deletions

View File

@ -12,6 +12,7 @@ use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::exit_matcher_parser; use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed; use crate::parser::util::get_consumed;
use crate::parser::util::regurgitate;
use crate::parser::util::start_of_line; use crate::parser::util::start_of_line;
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
@ -62,6 +63,7 @@ pub fn plain_list_item<'r, 's>(
let (remaining, (bull, _ws)) = tuple((bullet, space0))(remaining)?; let (remaining, (bull, _ws)) = tuple((bullet, space0))(remaining)?;
let (remaining, (contents, _exit_contents)) = let (remaining, (contents, _exit_contents)) =
many_till(element_matcher, exit_matcher)(remaining)?; many_till(element_matcher, exit_matcher)(remaining)?;
let remaining = regurgitate(input, remaining);
let source = get_consumed(input, remaining); let source = get_consumed(input, remaining);
Ok(( Ok((

View File

@ -127,6 +127,31 @@ pub fn always_fail<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s
)))) ))))
} }
/// Walk backwards unconsuming blank lines and line endings.
///
/// List items are a special case where the trailing blank lines do not belong to it, unlike all other elements. Rather than write that special logic into each child parser, this just walks backwards through the consumed input to unconsume trailing blank lines and line breaks.
pub fn regurgitate<'s>(input: &'s str, remaining: &'s str) -> &'s str {
assert!(is_slice_of(input, remaining));
let mut offset = remaining.as_ptr() as usize - input.as_ptr() as usize;
let source = &input[..offset];
let mut char_indices = source.char_indices().rev();
loop {
match char_indices.next() {
Some((off, chr)) => {
if chr == '\n' {
offset = off;
} else if chr != ' ' && chr != '\t' {
return &input[offset..];
}
}
None => {
// It was all whitespace, so return the full input string
return input;
}
};
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -140,4 +165,14 @@ mod tests {
assert!(is_slice_of(input, yellow_heart)); assert!(is_slice_of(input, yellow_heart));
assert_eq!(yellow_heart, "💛"); assert_eq!(yellow_heart, "💛");
} }
#[test]
fn regurgitate_unicode() {
let input = "🧡💛\n\t \t \n\n💚💙💜";
let (green_heart_index, _) = input.char_indices().skip(12).next().unwrap();
let starting_with_green_heart = &input[green_heart_index..];
let after_yellow = regurgitate(input, starting_with_green_heart);
assert!(is_slice_of(input, after_yellow));
assert_eq!(after_yellow, "\n\t \t \n\n💚💙💜");
}
} }