organic/src/parser/util.rs

209 lines
7.0 KiB
Rust

use nom::branch::alt;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::none_of;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::many0;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::org_source::OrgSource;
use crate::context::parser_with_context;
use crate::context::ContextElement;
use crate::context::RefContext;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
pub const WORD_CONSTITUENT_CHARACTERS: &str =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
/// Check if we are below a section of the given section type regardless of depth
pub fn in_section<'b, 'g, 'r, 's, 'x>(
context: RefContext<'b, 'g, 'r, 's>,
section_name: &'x str,
) -> bool {
for thing in context.iter() {
match thing {
ContextElement::Context(name) if *name == section_name => return true,
_ => {}
}
}
false
}
/// Checks if we are currently an immediate child of the given section type
pub fn immediate_in_section<'b, 'g, 'r, 's, 'x>(
context: RefContext<'b, 'g, 'r, 's>,
section_name: &'x str,
) -> bool {
for thing in context.iter() {
match thing {
ContextElement::Context(name) if *name == section_name => return true,
ContextElement::Context(name) if *name != section_name => return false,
_ => {}
}
}
false
}
/// Check if we are below a section of the given section type regardless of depth
pub fn in_object_section<'b, 'g, 'r, 's, 'x>(
context: RefContext<'b, 'g, 'r, 's>,
section_name: &'x str,
) -> bool {
for thing in context.iter() {
match thing {
ContextElement::ContextObject(name) if *name == section_name => return true,
_ => {}
}
}
false
}
/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser.
pub fn get_consumed<'s>(input: OrgSource<'s>, remaining: OrgSource<'s>) -> OrgSource<'s> {
input.get_until(remaining)
}
/// A line containing only whitespace and then a line break
///
/// It is up to the caller to ensure this is called at the start of a line.
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn blank_line(input: OrgSource<'_>) -> Res<OrgSource<'_>, OrgSource<'_>> {
not(eof)(input)?;
recognize(tuple((space0, alt((line_ending, eof)))))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn element_trailing_whitespace<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
start_of_line(input)?;
alt((eof, recognize(many0(blank_line))))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn maybe_consume_object_trailing_whitespace_if_not_exiting<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Option<OrgSource<'s>>> {
if exit_matcher_parser(context, input).is_err() {
opt(space0)(input)
} else {
Ok((input, None))
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn maybe_consume_trailing_whitespace_if_not_exiting<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Option<OrgSource<'s>>> {
if context.should_consume_trailing_whitespace() && exit_matcher_parser(context, input).is_err()
{
Ok(opt(element_trailing_whitespace)(input)?)
} else {
Ok((input, None))
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn maybe_consume_trailing_whitespace<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Option<OrgSource<'s>>> {
if context.should_consume_trailing_whitespace() {
Ok(opt(element_trailing_whitespace)(input)?)
} else {
Ok((input, None))
}
}
/// Check that we are at the start of a line
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn start_of_line<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
if input.is_at_start_of_line() {
Ok((input, ()))
} else {
Err(nom::Err::Error(CustomError::MyError(MyError(
"Not at start of line".into(),
))))
}
}
pub fn preceded_by_whitespace(
allow_start_of_file: bool,
) -> impl for<'s> Fn(OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
move |input| _preceded_by_whitespace(allow_start_of_file, input)
}
/// Check that we are at the start of a line
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _preceded_by_whitespace<'s>(
allow_start_of_file: bool,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
let preceding_character = input.get_preceding_character();
if !preceding_character
.map(|c| c.is_whitespace() || c == '\u{200B}') // 200B = Zero-width space
.unwrap_or(allow_start_of_file)
{
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Must be preceded by a non-whitespace character.".into(),
))));
}
Ok((input, ()))
}
/// Pull one non-whitespace character.
///
/// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace.
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn non_whitespace_character(input: OrgSource<'_>) -> Res<OrgSource<'_>, char> {
none_of(" \t\r\n")(input)
}
/// Check that we are at the start of a line
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn exit_matcher_parser<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
peek(|i| context.check_exit_matcher(i))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn text_until_exit<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(many_till(
anychar,
parser_with_context!(exit_matcher_parser)(context),
))(input)
}
#[allow(dead_code)]
pub fn not_yet_implemented() -> Res<OrgSource<'static>, ()> {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not implemented yet.".into(),
))));
}
#[allow(dead_code)]
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
/// Text from the current point until the next line break or end of file
///
/// Useful for debugging.
pub fn text_until_eol<'r, 's>(
input: OrgSource<'s>,
) -> Result<&'s str, nom::Err<CustomError<OrgSource<'s>>>> {
let line = recognize(many_till(anychar, alt((line_ending, eof))))(input)
.map(|(_remaining, line)| Into::<&str>::into(line))?;
Ok(line.trim())
}