organic/src/parser/util.rs

use nom::branch::alt;
use nom::character::complete::line_ending;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::recognize;
use nom::multi::many0;
use nom::sequence::tuple;

use super::error::Res;
use super::parser_context::ContextElement;
use super::Context;

/// Check if we are below a section of the given section type regardless of depth
pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
    for thing in context.iter() {
        match thing.get_data() {
            ContextElement::Context(name) if *name == section_name => return true,
            _ => {}
        }
    }
    false
}

/// Checks if we are currently an immediate child of the given section type
pub fn immediate_in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
    for thing in context.iter() {
        match thing.get_data() {
            ContextElement::Context(name) if *name == section_name => return true,
            ContextElement::Context(name) if *name != section_name => return false,
            _ => {}
        }
    }
    false
}

/// Get one character from before the current position.
pub fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> {
    assert!(is_slice_of(document, current_position));
    if document.as_ptr() as usize == current_position.as_ptr() as usize {
        return None;
    }
    let offset = current_position.as_ptr() as usize - document.as_ptr() as usize;
    let previous_character_offset = document.floor_char_boundary(offset - 1);
    Some(&document[previous_character_offset..offset])
}

/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
    let parent_start = parent.as_ptr() as usize;
    let parent_end = parent_start + parent.len();
    let child_start = child.as_ptr() as usize;
    let child_end = child_start + child.len();
    child_start >= parent_start && child_end <= parent_end
}

/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser.
pub fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
    assert!(is_slice_of(input, remaining));
    let source = {
        let offset = remaining.as_ptr() as usize - input.as_ptr() as usize;
        &input[..offset]
    };
    source
}

/// A line containing only whitespace and then a line break
///
/// It is up to the caller to ensure this is called at the start of a line.
pub fn blank_line(input: &str) -> Res<&str, &str> {
    not(eof)(input)?;
    recognize(tuple((space0, alt((line_ending, eof)))))(input)
}

pub fn trailing_whitespace(input: &str) -> Res<&str, &str> {
    alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn get_one_before_unicode() {
        let input = "🧡💛💚💙💜";
        let (green_heart_index, _) = input.char_indices().skip(2).next().unwrap();
        let starting_with_green_heart = &input[green_heart_index..];
        let yellow_heart = get_one_before(input, starting_with_green_heart).unwrap();
        assert!(is_slice_of(input, yellow_heart));
        assert_eq!(yellow_heart, "💛");
    }
}