organic/src/parser/plain_text.rs
2023-09-23 19:13:01 -04:00

168 lines
5.2 KiB
Rust

use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::one_of;
use nom::combinator::eof;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::org_source::OrgSource;
use super::radio_link::RematchObject;
use super::util::exit_matcher_parser;
use super::util::get_consumed;
use super::util::org_space_or_line_ending;
use crate::context::parser_with_context;
use crate::context::RefContext;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::types::Object;
use crate::types::PlainText;
pub(crate) fn plain_text<F>(
end_condition: F,
) -> impl for<'b, 'g, 'r, 's> Fn(
RefContext<'b, 'g, 'r, 's>,
OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainText<'s>>
where
F: for<'bb, 'gg, 'rr, 'ss> Fn(
RefContext<'bb, 'gg, 'rr, 'ss>,
OrgSource<'ss>,
) -> Res<OrgSource<'ss>, ()>,
{
move |context, input| _plain_text(&end_condition, context, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(end_condition))
)]
fn _plain_text<'b, 'g, 'r, 's, F>(
end_condition: F,
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainText<'s>>
where
F: for<'bb, 'gg, 'rr, 'ss> Fn(
RefContext<'bb, 'gg, 'rr, 'ss>,
OrgSource<'ss>,
) -> Res<OrgSource<'ss>, ()>,
{
let (remaining, source) = recognize(verify(
many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
recognize(parser_with_context!(end_condition)(context)),
))),
),
|(children, _exit_contents)| !children.is_empty(),
))(input)?;
Ok((
remaining,
PlainText {
source: source.into(),
},
))
}
impl<'x> RematchObject<'x> for PlainText<'x> {
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn rematch_object<'b, 'g, 'r, 's>(
&'x self,
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Object<'s>> {
let mut remaining = input;
let mut goal = self.source;
loop {
if goal.is_empty() {
break;
}
let is_not_whitespace = is_not::<&str, &str, CustomError<_>>(" \t\r\n")(goal);
match is_not_whitespace {
Ok((new_goal, payload)) => {
let (new_remaining, _) = tuple((
tag_no_case(payload),
// TODO: Test to see what the REAL condition is. Checking for not-alphabetic works fine for now, but the real criteria might be something like the plain text exit matcher.
peek(alt((
recognize(verify(anychar, |c| !c.is_alphanumeric())),
eof,
))),
))(remaining)?;
remaining = new_remaining;
goal = new_goal;
continue;
}
Err(_) => {}
};
let is_whitespace = recognize(many1(alt((
recognize(one_of::<&str, &str, CustomError<_>>(" \t")),
line_ending,
))))(goal);
match is_whitespace {
Ok((new_goal, _)) => {
let (new_remaining, _) = many1(org_space_or_line_ending)(remaining)?;
remaining = new_remaining;
goal = new_goal;
continue;
}
Err(_) => {}
};
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Target does not match.".into(),
))));
}
let source = get_consumed(input, remaining);
Ok((
remaining,
Object::PlainText(PlainText {
source: Into::<&str>::into(source),
}),
))
}
}
#[cfg(test)]
mod tests {
use nom::combinator::map;
use super::*;
use crate::context::Context;
use crate::context::ContextElement;
use crate::context::GlobalSettings;
use crate::context::List;
use crate::parser::object_parser::detect_standard_set_object_sans_plain_text;
use crate::types::GetStandardProperties;
#[test]
fn plain_text_simple() {
let input = OrgSource::new("foobarbaz");
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
let plain_text_matcher = parser_with_context!(plain_text(
detect_standard_set_object_sans_plain_text
))(&initial_context);
let (remaining, result) = map(plain_text_matcher, Object::PlainText)(input).unwrap();
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(
result.get_standard_properties().get_source(),
Into::<&str>::into(input)
);
}
}