2023-03-25 11:59:19 -04:00
|
|
|
use nom::branch::alt;
|
2023-04-22 22:06:34 -04:00
|
|
|
use nom::character::complete::anychar;
|
2023-03-25 11:59:19 -04:00
|
|
|
use nom::character::complete::line_ending;
|
2023-04-07 17:14:44 -04:00
|
|
|
use nom::character::complete::multispace0;
|
2023-03-25 14:10:22 -04:00
|
|
|
use nom::character::complete::none_of;
|
2023-03-25 11:59:19 -04:00
|
|
|
use nom::character::complete::space0;
|
|
|
|
use nom::combinator::eof;
|
|
|
|
use nom::combinator::not;
|
2023-04-10 11:50:43 -04:00
|
|
|
use nom::combinator::opt;
|
2023-03-27 12:52:49 -04:00
|
|
|
use nom::combinator::peek;
|
2023-03-25 11:59:19 -04:00
|
|
|
use nom::combinator::recognize;
|
2023-04-22 22:06:34 -04:00
|
|
|
use nom::combinator::verify;
|
2023-03-25 11:59:19 -04:00
|
|
|
use nom::multi::many0;
|
2023-04-22 22:06:34 -04:00
|
|
|
use nom::multi::many_till;
|
2023-03-25 11:59:19 -04:00
|
|
|
use nom::sequence::tuple;
|
|
|
|
|
2023-08-22 22:57:44 -04:00
|
|
|
use super::org_source::OrgSource;
|
2023-04-22 21:45:18 -04:00
|
|
|
use super::parser_context::ContextElement;
|
|
|
|
use super::Context;
|
|
|
|
use crate::error::CustomError;
|
|
|
|
use crate::error::MyError;
|
|
|
|
use crate::error::Res;
|
|
|
|
use crate::parser::parser_with_context::parser_with_context;
|
|
|
|
|
2023-04-07 17:14:44 -04:00
|
|
|
pub const WORD_CONSTITUENT_CHARACTERS: &str =
|
|
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
|
2022-12-18 03:18:43 -05:00
|
|
|
|
2023-03-23 16:40:39 -04:00
|
|
|
/// Check if we are below a section of the given section type regardless of depth
|
2023-04-21 18:22:17 -04:00
|
|
|
#[allow(dead_code)]
|
2022-12-18 03:30:28 -05:00
|
|
|
pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
|
2022-12-18 03:18:43 -05:00
|
|
|
for thing in context.iter() {
|
|
|
|
match thing.get_data() {
|
|
|
|
ContextElement::Context(name) if *name == section_name => return true,
|
2023-03-23 16:40:39 -04:00
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Checks if we are currently an immediate child of the given section type
|
|
|
|
pub fn immediate_in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
|
|
|
|
for thing in context.iter() {
|
|
|
|
match thing.get_data() {
|
|
|
|
ContextElement::Context(name) if *name == section_name => return true,
|
|
|
|
ContextElement::Context(name) if *name != section_name => return false,
|
|
|
|
_ => {}
|
2022-12-18 03:18:43 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
}
|
2023-03-25 11:25:10 -04:00
|
|
|
|
|
|
|
/// Check if the child string slice is a slice of the parent string slice.
|
|
|
|
fn is_slice_of(parent: &str, child: &str) -> bool {
|
|
|
|
let parent_start = parent.as_ptr() as usize;
|
|
|
|
let parent_end = parent_start + parent.len();
|
|
|
|
let child_start = child.as_ptr() as usize;
|
|
|
|
let child_end = child_start + child.len();
|
|
|
|
child_start >= parent_start && child_end <= parent_end
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser.
|
2023-08-23 00:30:26 -04:00
|
|
|
pub fn get_consumed<'s>(input: OrgSource<'s>, remaining: OrgSource<'s>) -> OrgSource<'s> {
|
|
|
|
// TODO: This should be replaced with new logic now that we are wrapping the input type.
|
|
|
|
let input = Into::<&str>::into(&input);
|
|
|
|
let remaining = Into::<&str>::into(&remaining);
|
2023-03-25 11:25:10 -04:00
|
|
|
assert!(is_slice_of(input, remaining));
|
|
|
|
let source = {
|
|
|
|
let offset = remaining.as_ptr() as usize - input.as_ptr() as usize;
|
|
|
|
&input[..offset]
|
|
|
|
};
|
2023-08-23 00:30:26 -04:00
|
|
|
source.into()
|
2023-03-25 11:25:10 -04:00
|
|
|
}
|
|
|
|
|
2023-03-25 11:59:19 -04:00
|
|
|
/// A line containing only whitespace and then a line break
|
|
|
|
///
|
|
|
|
/// It is up to the caller to ensure this is called at the start of a line.
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-08-23 00:30:26 -04:00
|
|
|
pub fn blank_line(input: OrgSource<'_>) -> Res<OrgSource<'_>, OrgSource<'_>> {
|
2023-08-22 22:57:44 -04:00
|
|
|
not(eof)(input)?;
|
|
|
|
recognize(tuple((space0, alt((line_ending, eof)))))(input)
|
2023-03-25 11:59:19 -04:00
|
|
|
}
|
|
|
|
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-03-31 11:16:37 -04:00
|
|
|
pub fn element_trailing_whitespace<'r, 's>(
|
|
|
|
context: Context<'r, 's>,
|
2023-08-23 00:30:26 -04:00
|
|
|
input: OrgSource<'s>,
|
|
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
2023-03-31 11:16:37 -04:00
|
|
|
start_of_line(context, input)?;
|
|
|
|
alt((eof, recognize(many0(blank_line))))(input)
|
|
|
|
}
|
|
|
|
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-04-10 13:13:11 -04:00
|
|
|
pub fn maybe_consume_trailing_whitespace_if_not_exiting<'r, 's>(
|
|
|
|
context: Context<'r, 's>,
|
2023-08-23 00:30:26 -04:00
|
|
|
input: OrgSource<'s>,
|
|
|
|
) -> Res<OrgSource<'s>, Option<OrgSource<'s>>> {
|
2023-04-10 13:13:11 -04:00
|
|
|
if context.should_consume_trailing_whitespace() && exit_matcher_parser(context, input).is_err()
|
|
|
|
{
|
|
|
|
Ok(opt(parser_with_context!(element_trailing_whitespace)(
|
|
|
|
context,
|
|
|
|
))(input)?)
|
|
|
|
} else {
|
|
|
|
Ok((input, None))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-04-10 11:50:43 -04:00
|
|
|
pub fn maybe_consume_trailing_whitespace<'r, 's>(
|
|
|
|
context: Context<'r, 's>,
|
2023-08-23 00:30:26 -04:00
|
|
|
input: OrgSource<'s>,
|
|
|
|
) -> Res<OrgSource<'s>, Option<OrgSource<'s>>> {
|
2023-04-10 11:50:43 -04:00
|
|
|
if context.should_consume_trailing_whitespace() {
|
|
|
|
Ok(opt(parser_with_context!(element_trailing_whitespace)(
|
|
|
|
context,
|
|
|
|
))(input)?)
|
|
|
|
} else {
|
|
|
|
Ok((input, None))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-08-23 00:30:26 -04:00
|
|
|
pub fn trailing_whitespace(input: OrgSource<'_>) -> Res<OrgSource<'_>, OrgSource<'_>> {
|
2023-03-25 11:59:19 -04:00
|
|
|
alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input)
|
|
|
|
}
|
|
|
|
|
2023-03-25 14:10:22 -04:00
|
|
|
/// Check that we are at the start of a line
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-08-23 00:30:26 -04:00
|
|
|
pub fn start_of_line<'r, 's>(
|
|
|
|
context: Context<'r, 's>,
|
|
|
|
input: OrgSource<'s>,
|
|
|
|
) -> Res<OrgSource<'s>, ()> {
|
2023-08-24 16:55:56 -04:00
|
|
|
if input.is_at_start_of_line() {
|
|
|
|
Ok((input, ()))
|
|
|
|
} else {
|
|
|
|
Err(nom::Err::Error(CustomError::MyError(MyError(
|
|
|
|
"Not at start of line".into(),
|
|
|
|
))))
|
|
|
|
}
|
2023-03-25 14:10:22 -04:00
|
|
|
}
|
|
|
|
|
2023-04-22 22:34:37 -04:00
|
|
|
/// Check that we are at the start of a line
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-04-22 22:34:37 -04:00
|
|
|
pub fn preceded_by_whitespace<'r, 's>(
|
|
|
|
context: Context<'r, 's>,
|
2023-08-23 00:30:26 -04:00
|
|
|
input: OrgSource<'s>,
|
|
|
|
) -> Res<OrgSource<'s>, ()> {
|
2023-08-24 16:55:56 -04:00
|
|
|
let preceding_character = input.get_preceding_character();
|
2023-04-22 22:34:37 -04:00
|
|
|
match preceding_character {
|
|
|
|
Some('\n') | Some('\r') | Some(' ') | Some('\t') => {}
|
|
|
|
// If None, we are at the start of the file which is not allowed
|
|
|
|
None | Some(_) => {
|
|
|
|
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
2023-08-23 00:30:26 -04:00
|
|
|
"Not preceded by whitespace.".into(),
|
2023-04-22 22:34:37 -04:00
|
|
|
))));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
Ok((input, ()))
|
|
|
|
}
|
|
|
|
|
2023-03-25 14:10:22 -04:00
|
|
|
/// Pull one non-whitespace character.
|
|
|
|
///
|
|
|
|
/// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace.
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-08-23 00:30:26 -04:00
|
|
|
pub fn non_whitespace_character(input: OrgSource<'_>) -> Res<OrgSource<'_>, char> {
|
2023-03-25 14:10:22 -04:00
|
|
|
none_of(" \t\r\n")(input)
|
|
|
|
}
|
|
|
|
|
2023-03-27 12:52:49 -04:00
|
|
|
/// Check that we are at the start of a line
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-03-27 12:52:49 -04:00
|
|
|
pub fn exit_matcher_parser<'r, 's>(
|
|
|
|
context: Context<'r, 's>,
|
2023-08-23 00:30:26 -04:00
|
|
|
input: OrgSource<'s>,
|
|
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
2023-03-27 12:52:49 -04:00
|
|
|
peek(|i| context.check_exit_matcher(i))(input)
|
|
|
|
}
|
|
|
|
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-08-23 00:30:26 -04:00
|
|
|
pub fn always_fail<'r, 's>(
|
|
|
|
_context: Context<'r, 's>,
|
|
|
|
input: OrgSource<'s>,
|
|
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
2023-03-27 18:08:17 -04:00
|
|
|
Err(nom::Err::Error(CustomError::MyError(MyError(
|
2023-08-23 00:30:26 -04:00
|
|
|
"Always fail".into(),
|
2023-03-27 18:08:17 -04:00
|
|
|
))))
|
|
|
|
}
|
|
|
|
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-08-23 00:30:26 -04:00
|
|
|
pub fn whitespace_eof(input: OrgSource<'_>) -> Res<OrgSource<'_>, OrgSource<'_>> {
|
2023-04-07 17:14:44 -04:00
|
|
|
recognize(tuple((multispace0, eof)))(input)
|
|
|
|
}
|
|
|
|
|
2023-08-10 20:04:59 -04:00
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
2023-08-23 00:30:26 -04:00
|
|
|
pub fn text_until_exit<'r, 's>(
|
|
|
|
context: Context<'r, 's>,
|
|
|
|
input: OrgSource<'s>,
|
|
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
2023-04-22 22:06:34 -04:00
|
|
|
recognize(verify(
|
|
|
|
many_till(anychar, parser_with_context!(exit_matcher_parser)(context)),
|
|
|
|
|(children, _exit_contents)| !children.is_empty(),
|
|
|
|
))(input)
|
|
|
|
}
|
|
|
|
|
2023-04-24 20:08:12 -04:00
|
|
|
#[allow(dead_code)]
|
2023-08-23 00:30:26 -04:00
|
|
|
pub fn not_yet_implemented() -> Res<OrgSource<'static>, ()> {
|
2023-04-24 20:08:12 -04:00
|
|
|
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
2023-08-23 00:30:26 -04:00
|
|
|
"Not implemented yet.".into(),
|
2023-04-24 20:08:12 -04:00
|
|
|
))));
|
|
|
|
}
|