use nom::branch::alt; use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::multispace0; use nom::character::complete::none_of; use nom::character::complete::space0; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many0; use nom::multi::many_till; use nom::sequence::tuple; use super::parser_context::ContextElement; use super::Context; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; use crate::parser::parser_with_context::parser_with_context; pub const WORD_CONSTITUENT_CHARACTERS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; /// Check if we are below a section of the given section type regardless of depth #[allow(dead_code)] pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool { for thing in context.iter() { match thing.get_data() { ContextElement::Context(name) if *name == section_name => return true, _ => {} } } false } /// Checks if we are currently an immediate child of the given section type pub fn immediate_in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool { for thing in context.iter() { match thing.get_data() { ContextElement::Context(name) if *name == section_name => return true, ContextElement::Context(name) if *name != section_name => return false, _ => {} } } false } /// Get one character from before the current position. pub fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> { assert!(is_slice_of(document, current_position)); if document.as_ptr() as usize == current_position.as_ptr() as usize { return None; } let offset = current_position.as_ptr() as usize - document.as_ptr() as usize; let previous_character_offset = document.floor_char_boundary(offset - 1); Some(&document[previous_character_offset..offset]) } /// Get the line current_position is on up until current_position pub fn get_current_line_before_position<'s>( document: &'s str, current_position: &'s str, ) -> Option<&'s str> { assert!(is_slice_of(document, current_position)); if document.as_ptr() as usize == current_position.as_ptr() as usize { return None; } let offset = current_position.as_ptr() as usize - document.as_ptr() as usize; let mut previous_character_offset = offset; loop { let new_offset = document.floor_char_boundary(previous_character_offset - 1); let new_line = &document[new_offset..offset]; let leading_char = new_line .chars() .next() .expect("Impossible to not have at least 1 character to read."); if "\r\n".contains(leading_char) || new_offset == 0 { break; } previous_character_offset = new_offset; } Some(&document[previous_character_offset..offset]) } /// Check if the child string slice is a slice of the parent string slice. fn is_slice_of(parent: &str, child: &str) -> bool { let parent_start = parent.as_ptr() as usize; let parent_end = parent_start + parent.len(); let child_start = child.as_ptr() as usize; let child_end = child_start + child.len(); child_start >= parent_start && child_end <= parent_end } /// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. pub fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { assert!(is_slice_of(input, remaining)); let source = { let offset = remaining.as_ptr() as usize - input.as_ptr() as usize; &input[..offset] }; source } /// A line containing only whitespace and then a line break /// /// It is up to the caller to ensure this is called at the start of a line. #[tracing::instrument(ret, level = "debug")] pub fn blank_line(input: &str) -> Res<&str, &str> { not(eof)(input)?; recognize(tuple((space0, alt((line_ending, eof)))))(input) } #[tracing::instrument(ret, level = "debug")] pub fn element_trailing_whitespace<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, &'s str> { start_of_line(context, input)?; alt((eof, recognize(many0(blank_line))))(input) } #[tracing::instrument(ret, level = "debug")] pub fn maybe_consume_trailing_whitespace_if_not_exiting<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Option<&'s str>> { if context.should_consume_trailing_whitespace() && exit_matcher_parser(context, input).is_err() { Ok(opt(parser_with_context!(element_trailing_whitespace)( context, ))(input)?) } else { Ok((input, None)) } } #[tracing::instrument(ret, level = "debug")] pub fn maybe_consume_trailing_whitespace<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Option<&'s str>> { if context.should_consume_trailing_whitespace() { Ok(opt(parser_with_context!(element_trailing_whitespace)( context, ))(input)?) } else { Ok((input, None)) } } #[tracing::instrument(ret, level = "debug")] pub fn trailing_whitespace(input: &str) -> Res<&str, &str> { alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input) } /// Check that we are at the start of a line #[tracing::instrument(ret, level = "debug")] pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { let document_root = context.get_document_root().unwrap(); let preceding_character = get_one_before(document_root, input) .map(|slice| slice.chars().next()) .flatten(); match preceding_character { Some('\n') => {} Some(_) => { // Not at start of line, cannot be a heading return Err(nom::Err::Error(CustomError::MyError(MyError( "Not at start of line", )))); } // If None, we are at the start of the file which allows for headings None => {} }; Ok((input, ())) } /// Check that we are at the start of a line #[tracing::instrument(ret, level = "debug")] pub fn preceded_by_whitespace<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, ()> { let document_root = context.get_document_root().unwrap(); let preceding_character = get_one_before(document_root, input) .map(|slice| slice.chars().next()) .flatten(); match preceding_character { Some('\n') | Some('\r') | Some(' ') | Some('\t') => {} // If None, we are at the start of the file which is not allowed None | Some(_) => { return Err(nom::Err::Error(CustomError::MyError(MyError( "Not preceded by whitespace.", )))); } }; Ok((input, ())) } /// Pull one non-whitespace character. /// /// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace. #[tracing::instrument(ret, level = "debug")] pub fn non_whitespace_character(input: &str) -> Res<&str, char> { none_of(" \t\r\n")(input) } /// Check that we are at the start of a line #[tracing::instrument(ret, level = "debug")] pub fn exit_matcher_parser<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, &'s str> { peek(|i| context.check_exit_matcher(i))(input) } #[tracing::instrument(ret, level = "debug")] pub fn always_fail<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { Err(nom::Err::Error(CustomError::MyError(MyError( "Always fail", )))) } #[tracing::instrument(ret, level = "debug")] pub fn whitespace_eof(input: &str) -> Res<&str, &str> { recognize(tuple((multispace0, eof)))(input) } #[tracing::instrument(ret, level = "debug")] pub fn text_until_exit<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { recognize(verify( many_till(anychar, parser_with_context!(exit_matcher_parser)(context)), |(children, _exit_contents)| !children.is_empty(), ))(input) } #[allow(dead_code)] pub fn not_yet_implemented() -> Res<&'static str, ()> { return Err(nom::Err::Error(CustomError::MyError(MyError( "Not implemented yet.", )))); } #[cfg(test)] mod tests { use super::*; #[test] fn get_one_before_unicode() { let input = "๐Ÿงก๐Ÿ’›๐Ÿ’š๐Ÿ’™๐Ÿ’œ"; let (green_heart_index, _) = input.char_indices().skip(2).next().unwrap(); let starting_with_green_heart = &input[green_heart_index..]; let yellow_heart = get_one_before(input, starting_with_green_heart).unwrap(); assert!(is_slice_of(input, yellow_heart)); assert_eq!(yellow_heart, "๐Ÿ’›"); } }