From 9545990b522f11224ba5c6fa75e19fe80374e9f3 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 19:12:20 -0400 Subject: [PATCH] Regurgitate seems to have made all text a paragraph. --- src/parser/plain_list.rs | 2 ++ src/parser/util.rs | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 9743fd6c..ed7a310c 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -12,6 +12,7 @@ use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; +use crate::parser::util::regurgitate; use crate::parser::util::start_of_line; use nom::branch::alt; use nom::bytes::complete::tag; @@ -62,6 +63,7 @@ pub fn plain_list_item<'r, 's>( let (remaining, (bull, _ws)) = tuple((bullet, space0))(remaining)?; let (remaining, (contents, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; + let remaining = regurgitate(input, remaining); let source = get_consumed(input, remaining); Ok(( diff --git a/src/parser/util.rs b/src/parser/util.rs index a6f1b6ef..9804b13d 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -127,6 +127,31 @@ pub fn always_fail<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s )))) } +/// Walk backwards unconsuming blank lines and line endings. +/// +/// List items are a special case where the trailing blank lines do not belong to it, unlike all other elements. Rather than write that special logic into each child parser, this just walks backwards through the consumed input to unconsume trailing blank lines and line breaks. +pub fn regurgitate<'s>(input: &'s str, remaining: &'s str) -> &'s str { + assert!(is_slice_of(input, remaining)); + let mut offset = remaining.as_ptr() as usize - input.as_ptr() as usize; + let source = &input[..offset]; + let mut char_indices = source.char_indices().rev(); + loop { + match char_indices.next() { + Some((off, chr)) => { + if chr == '\n' { + offset = off; + } else if chr != ' ' && chr != '\t' { + return &input[offset..]; + } + } + None => { + // It was all whitespace, so return the full input string + return input; + } + }; + } +} + #[cfg(test)] mod tests { use super::*; @@ -140,4 +165,14 @@ mod tests { assert!(is_slice_of(input, yellow_heart)); assert_eq!(yellow_heart, "๐Ÿ’›"); } + + #[test] + fn regurgitate_unicode() { + let input = "๐Ÿงก๐Ÿ’›\n\t \t \n\n๐Ÿ’š๐Ÿ’™๐Ÿ’œ"; + let (green_heart_index, _) = input.char_indices().skip(12).next().unwrap(); + let starting_with_green_heart = &input[green_heart_index..]; + let after_yellow = regurgitate(input, starting_with_green_heart); + assert!(is_slice_of(input, after_yellow)); + assert_eq!(after_yellow, "\n\t \t \n\n๐Ÿ’š๐Ÿ’™๐Ÿ’œ"); + } }