Building the plain list item context.

This commit is contained in:
Tom Alexander
2023-03-25 14:10:22 -04:00
parent 4a863e92ff
commit e6752b9d83
7 changed files with 110 additions and 28 deletions

View File

@@ -14,8 +14,6 @@ use nom::multi::many1_count;
use nom::sequence::tuple;
use crate::parser::element::element;
use crate::parser::error::CustomError;
use crate::parser::error::MyError;
use crate::parser::object::standard_set_object;
use crate::parser::parser_context::ChainBehavior;
use crate::parser::parser_context::ContextElement;
@@ -28,7 +26,7 @@ use super::object::Object;
use super::parser_with_context::parser_with_context;
use super::source::Source;
use super::util::get_consumed;
use super::util::get_one_before;
use super::util::start_of_line;
use super::util::trailing_whitespace;
use super::Context;
@@ -117,7 +115,6 @@ fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Hea
not(|i| context.check_exit_matcher(i))(input)?;
let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?;
let section_matcher = parser_with_context!(section)(context);
// TODO: This needs to only match headings below the current level
let heading_matcher = parser_with_context!(heading)(context);
let (remaining, children) = many0(alt((
map(
@@ -159,26 +156,6 @@ fn headline<'r, 's>(
Ok((remaining, (star_count, ws, title, ws2)))
}
fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
alt((line_ending, eof))(input)
}
/// Check that we are at the start of a line
fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
match preceding_character {
Some('\n') => {}
Some(_) => {
// Not at start of line, cannot be a heading
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not at start of line",
))));
}
// If None, we are at the start of the file which allows for headings
None => {}
};
Ok((input, ()))
}

View File

@@ -1,125 +0,0 @@
use super::parser_context::ContextElement;
use super::parser_context::PreviousElementNode;
use super::token::Token;
use super::Context;
use nom::error::ErrorKind;
use nom::error::ParseError;
use nom::IResult;
use nom::InputLength;
pub fn context_many1<'r, 's, I, O, E, M>(
context: Context<'r, 's>,
mut many_matcher: M,
) -> impl FnMut(I) -> IResult<I, Vec<Token<'s>>, E> + 'r
where
I: Clone + InputLength,
E: ParseError<I>,
M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
O: Into<Token<'s>>,
{
move |mut i: I| {
let mut err = None;
// TODO: Can I eliminate the clone? I think this is incrementing the reference count
let mut current_context = context.clone();
// Despite the clone, the Rc should still point to the same value.
assert!(current_context.ptr_eq(context));
loop {
match many_matcher(&current_context, i.clone()) {
Ok((remaining, many_elem)) => {
current_context = current_context.with_additional_node(
ContextElement::PreviousElementNode(PreviousElementNode {
element: many_elem.into(),
}),
);
i = remaining;
}
the_error @ Err(_) => {
err = Some(the_error);
break;
}
}
}
let mut elements: Vec<Token<'s>> = current_context
.into_iter_until(context)
.filter_map(|context_element| match context_element {
ContextElement::PreviousElementNode(elem) => Some(elem.element),
_ => None,
})
.collect();
if elements.is_empty() {
if let Some(err) = err {
err?;
}
}
elements.reverse();
Ok((i, elements))
}
}
pub fn context_many_till<'r, 's, I, O, E, F, M, T>(
context: Context<'r, 's>,
mut many_matcher: M,
mut till_matcher: T,
) -> impl FnMut(I) -> IResult<I, (Vec<Token<'s>>, F), E> + 'r
where
I: Clone + InputLength,
E: ParseError<I>,
M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
T: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, F, E> + 'r,
O: Into<Token<'s>>,
{
move |mut i: I| {
// TODO: Can I eliminate the clone? I think this is incrementing the reference count
let mut current_context = context.clone();
// Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop.
assert!(current_context.ptr_eq(context));
loop {
let len = i.input_len();
match till_matcher(&current_context, i.clone()) {
Ok((remaining, finish)) => {
let mut ret = Vec::new();
while !current_context.ptr_eq(context) {
let (context_element, next_context) = current_context.pop_front();
let context_element = context_element.expect("We only pop off context elements created in this function, so they are all Some()");
current_context = next_context;
match context_element {
ContextElement::PreviousElementNode(PreviousElementNode {
element: token,
}) => {
ret.push(token);
}
_ => {}
};
}
ret.reverse();
return Ok((remaining, (ret, finish)));
}
Err(nom::Err::Error(_)) => {
match many_matcher(&current_context, i.clone()) {
Err(nom::Err::Error(err)) => {
return Err(nom::Err::Error(E::append(i, ErrorKind::ManyTill, err)))
}
Err(e) => return Err(e),
Ok((remaining, many_elem)) => {
// infinite loop check: the parser must always consume
if remaining.input_len() == len {
return Err(nom::Err::Error(E::from_error_kind(
remaining,
ErrorKind::ManyTill,
)));
}
current_context = current_context.with_additional_node(
ContextElement::PreviousElementNode(PreviousElementNode {
element: many_elem.into(),
}),
);
i = remaining;
}
}
}
Err(e) => return Err(e),
};
}
}
}

View File

@@ -1,29 +0,0 @@
//! A single element of text.
use super::combinator::context_many1;
use super::error::Res;
use super::paragraph::paragraph;
use super::parser_context::ContextElement;
use super::parser_context::ContextTree;
use super::token::Paragraph;
use super::token::Token;
use super::Context;
use nom::IResult;
type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;
// TODO: Implement FromStr for Document
pub fn document(input: &str) -> Res<&str, Vec<Paragraph>> {
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let (remaining, tokens) = context_many1(&document_context, paragraph)(input)?;
let paragraphs = tokens
.into_iter()
.map(|token| match token {
Token::TextElement(_) => unreachable!(),
Token::Paragraph(paragraph) => paragraph,
})
.collect();
Ok((remaining, paragraphs))
}

View File

@@ -106,9 +106,14 @@ impl<'r, 's> ContextTree<'r, 's> {
#[derive(Debug)]
pub enum ContextElement<'r, 's> {
/// Stores a reference to the entire org-mode document being parsed.
///
/// This is used for look-behind.
DocumentRoot(&'s str),
ExitMatcherNode(ExitMatcherNode<'r>),
Context(&'r str),
/// Stores the indentation level of the current list item
ListItem(usize),
}

View File

@@ -1,5 +1,22 @@
use nom::branch::alt;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::sequence::tuple;
use crate::parser::parser_context::ChainBehavior;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::start_of_line;
use super::error::CustomError;
use super::error::MyError;
use super::error::Res;
use super::lesser_element::Paragraph;
use super::parser_with_context::parser_with_context;
use super::util::non_whitespace_character;
use super::Context;
#[allow(dead_code)]
@@ -7,5 +24,52 @@ pub fn plain_list_item<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Paragraph<'s>> {
not(|i| context.check_exit_matcher(i))(input)?;
start_of_line(context, input)?;
let (remaining, leading_whitespace) = space0(input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
let list_item_context = context
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
}))
.with_additional_node(ContextElement::ListItem(indent_level));
todo!()
}
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context);
alt((
recognize(plain_list_item_matcher),
line_indented_lte_matcher,
eof,
))(input)
}
fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let current_item_indent_level: &usize =
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
"Not inside a plain list item",
))))?;
start_of_line(context, input)?;
let matched = recognize(verify(
tuple((space0::<&str, _>, non_whitespace_character)),
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
))(input)?;
Ok(matched)
}
fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
for thing in context.iter() {
match thing.get_data() {
ContextElement::ListItem(depth) => return Some(depth),
_ => {}
};
}
None
}

View File

@@ -1,5 +1,6 @@
use nom::branch::alt;
use nom::character::complete::line_ending;
use nom::character::complete::none_of;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
@@ -7,6 +8,8 @@ use nom::combinator::recognize;
use nom::multi::many0;
use nom::sequence::tuple;
use super::error::CustomError;
use super::error::MyError;
use super::error::Res;
use super::parser_context::ContextElement;
use super::Context;
@@ -76,6 +79,33 @@ pub fn trailing_whitespace(input: &str) -> Res<&str, &str> {
alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input)
}
/// Check that we are at the start of a line
pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
match preceding_character {
Some('\n') => {}
Some(_) => {
// Not at start of line, cannot be a heading
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not at start of line",
))));
}
// If None, we are at the start of the file which allows for headings
None => {}
};
Ok((input, ()))
}
/// Pull one non-whitespace character.
///
/// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace.
pub fn non_whitespace_character(input: &str) -> Res<&str, char> {
none_of(" \t\r\n")(input)
}
#[cfg(test)]
mod tests {
use super::*;