Building the plain list item context.
This commit is contained in:
@@ -14,8 +14,6 @@ use nom::multi::many1_count;
|
||||
use nom::sequence::tuple;
|
||||
|
||||
use crate::parser::element::element;
|
||||
use crate::parser::error::CustomError;
|
||||
use crate::parser::error::MyError;
|
||||
use crate::parser::object::standard_set_object;
|
||||
use crate::parser::parser_context::ChainBehavior;
|
||||
use crate::parser::parser_context::ContextElement;
|
||||
@@ -28,7 +26,7 @@ use super::object::Object;
|
||||
use super::parser_with_context::parser_with_context;
|
||||
use super::source::Source;
|
||||
use super::util::get_consumed;
|
||||
use super::util::get_one_before;
|
||||
use super::util::start_of_line;
|
||||
use super::util::trailing_whitespace;
|
||||
use super::Context;
|
||||
|
||||
@@ -117,7 +115,6 @@ fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Hea
|
||||
not(|i| context.check_exit_matcher(i))(input)?;
|
||||
let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?;
|
||||
let section_matcher = parser_with_context!(section)(context);
|
||||
// TODO: This needs to only match headings below the current level
|
||||
let heading_matcher = parser_with_context!(heading)(context);
|
||||
let (remaining, children) = many0(alt((
|
||||
map(
|
||||
@@ -159,26 +156,6 @@ fn headline<'r, 's>(
|
||||
Ok((remaining, (star_count, ws, title, ws2)))
|
||||
}
|
||||
|
||||
fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
alt((line_ending, eof))(input)
|
||||
}
|
||||
|
||||
/// Check that we are at the start of a line
|
||||
fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
|
||||
let document_root = context.get_document_root().unwrap();
|
||||
let preceding_character = get_one_before(document_root, input)
|
||||
.map(|slice| slice.chars().next())
|
||||
.flatten();
|
||||
match preceding_character {
|
||||
Some('\n') => {}
|
||||
Some(_) => {
|
||||
// Not at start of line, cannot be a heading
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Not at start of line",
|
||||
))));
|
||||
}
|
||||
// If None, we are at the start of the file which allows for headings
|
||||
None => {}
|
||||
};
|
||||
Ok((input, ()))
|
||||
}
|
||||
|
||||
@@ -1,125 +0,0 @@
|
||||
use super::parser_context::ContextElement;
|
||||
use super::parser_context::PreviousElementNode;
|
||||
use super::token::Token;
|
||||
use super::Context;
|
||||
use nom::error::ErrorKind;
|
||||
use nom::error::ParseError;
|
||||
use nom::IResult;
|
||||
use nom::InputLength;
|
||||
|
||||
pub fn context_many1<'r, 's, I, O, E, M>(
|
||||
context: Context<'r, 's>,
|
||||
mut many_matcher: M,
|
||||
) -> impl FnMut(I) -> IResult<I, Vec<Token<'s>>, E> + 'r
|
||||
where
|
||||
I: Clone + InputLength,
|
||||
E: ParseError<I>,
|
||||
M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
|
||||
O: Into<Token<'s>>,
|
||||
{
|
||||
move |mut i: I| {
|
||||
let mut err = None;
|
||||
// TODO: Can I eliminate the clone? I think this is incrementing the reference count
|
||||
let mut current_context = context.clone();
|
||||
// Despite the clone, the Rc should still point to the same value.
|
||||
assert!(current_context.ptr_eq(context));
|
||||
loop {
|
||||
match many_matcher(¤t_context, i.clone()) {
|
||||
Ok((remaining, many_elem)) => {
|
||||
current_context = current_context.with_additional_node(
|
||||
ContextElement::PreviousElementNode(PreviousElementNode {
|
||||
element: many_elem.into(),
|
||||
}),
|
||||
);
|
||||
i = remaining;
|
||||
}
|
||||
the_error @ Err(_) => {
|
||||
err = Some(the_error);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut elements: Vec<Token<'s>> = current_context
|
||||
.into_iter_until(context)
|
||||
.filter_map(|context_element| match context_element {
|
||||
ContextElement::PreviousElementNode(elem) => Some(elem.element),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
if elements.is_empty() {
|
||||
if let Some(err) = err {
|
||||
err?;
|
||||
}
|
||||
}
|
||||
elements.reverse();
|
||||
Ok((i, elements))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn context_many_till<'r, 's, I, O, E, F, M, T>(
|
||||
context: Context<'r, 's>,
|
||||
mut many_matcher: M,
|
||||
mut till_matcher: T,
|
||||
) -> impl FnMut(I) -> IResult<I, (Vec<Token<'s>>, F), E> + 'r
|
||||
where
|
||||
I: Clone + InputLength,
|
||||
E: ParseError<I>,
|
||||
M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
|
||||
T: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, F, E> + 'r,
|
||||
O: Into<Token<'s>>,
|
||||
{
|
||||
move |mut i: I| {
|
||||
// TODO: Can I eliminate the clone? I think this is incrementing the reference count
|
||||
let mut current_context = context.clone();
|
||||
// Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop.
|
||||
assert!(current_context.ptr_eq(context));
|
||||
loop {
|
||||
let len = i.input_len();
|
||||
match till_matcher(¤t_context, i.clone()) {
|
||||
Ok((remaining, finish)) => {
|
||||
let mut ret = Vec::new();
|
||||
while !current_context.ptr_eq(context) {
|
||||
let (context_element, next_context) = current_context.pop_front();
|
||||
let context_element = context_element.expect("We only pop off context elements created in this function, so they are all Some()");
|
||||
current_context = next_context;
|
||||
match context_element {
|
||||
ContextElement::PreviousElementNode(PreviousElementNode {
|
||||
element: token,
|
||||
}) => {
|
||||
ret.push(token);
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
ret.reverse();
|
||||
return Ok((remaining, (ret, finish)));
|
||||
}
|
||||
Err(nom::Err::Error(_)) => {
|
||||
match many_matcher(¤t_context, i.clone()) {
|
||||
Err(nom::Err::Error(err)) => {
|
||||
return Err(nom::Err::Error(E::append(i, ErrorKind::ManyTill, err)))
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
Ok((remaining, many_elem)) => {
|
||||
// infinite loop check: the parser must always consume
|
||||
if remaining.input_len() == len {
|
||||
return Err(nom::Err::Error(E::from_error_kind(
|
||||
remaining,
|
||||
ErrorKind::ManyTill,
|
||||
)));
|
||||
}
|
||||
|
||||
current_context = current_context.with_additional_node(
|
||||
ContextElement::PreviousElementNode(PreviousElementNode {
|
||||
element: many_elem.into(),
|
||||
}),
|
||||
);
|
||||
i = remaining;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
//! A single element of text.
|
||||
use super::combinator::context_many1;
|
||||
use super::error::Res;
|
||||
use super::paragraph::paragraph;
|
||||
use super::parser_context::ContextElement;
|
||||
use super::parser_context::ContextTree;
|
||||
use super::token::Paragraph;
|
||||
use super::token::Token;
|
||||
use super::Context;
|
||||
use nom::IResult;
|
||||
|
||||
type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;
|
||||
|
||||
// TODO: Implement FromStr for Document
|
||||
|
||||
pub fn document(input: &str) -> Res<&str, Vec<Paragraph>> {
|
||||
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
||||
let document_context =
|
||||
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
||||
let (remaining, tokens) = context_many1(&document_context, paragraph)(input)?;
|
||||
let paragraphs = tokens
|
||||
.into_iter()
|
||||
.map(|token| match token {
|
||||
Token::TextElement(_) => unreachable!(),
|
||||
Token::Paragraph(paragraph) => paragraph,
|
||||
})
|
||||
.collect();
|
||||
Ok((remaining, paragraphs))
|
||||
}
|
||||
@@ -106,9 +106,14 @@ impl<'r, 's> ContextTree<'r, 's> {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ContextElement<'r, 's> {
|
||||
/// Stores a reference to the entire org-mode document being parsed.
|
||||
///
|
||||
/// This is used for look-behind.
|
||||
DocumentRoot(&'s str),
|
||||
ExitMatcherNode(ExitMatcherNode<'r>),
|
||||
Context(&'r str),
|
||||
|
||||
/// Stores the indentation level of the current list item
|
||||
ListItem(usize),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,22 @@
|
||||
use nom::branch::alt;
|
||||
use nom::character::complete::space0;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::not;
|
||||
use nom::combinator::recognize;
|
||||
use nom::combinator::verify;
|
||||
use nom::sequence::tuple;
|
||||
|
||||
use crate::parser::parser_context::ChainBehavior;
|
||||
use crate::parser::parser_context::ContextElement;
|
||||
use crate::parser::parser_context::ExitMatcherNode;
|
||||
use crate::parser::util::start_of_line;
|
||||
|
||||
use super::error::CustomError;
|
||||
use super::error::MyError;
|
||||
use super::error::Res;
|
||||
use super::lesser_element::Paragraph;
|
||||
use super::parser_with_context::parser_with_context;
|
||||
use super::util::non_whitespace_character;
|
||||
use super::Context;
|
||||
|
||||
#[allow(dead_code)]
|
||||
@@ -7,5 +24,52 @@ pub fn plain_list_item<'r, 's>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
) -> Res<&'s str, Paragraph<'s>> {
|
||||
not(|i| context.check_exit_matcher(i))(input)?;
|
||||
start_of_line(context, input)?;
|
||||
let (remaining, leading_whitespace) = space0(input)?;
|
||||
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
||||
let indent_level = leading_whitespace.len();
|
||||
let list_item_context = context
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
|
||||
}))
|
||||
.with_additional_node(ContextElement::ListItem(indent_level));
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
|
||||
let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context);
|
||||
alt((
|
||||
recognize(plain_list_item_matcher),
|
||||
line_indented_lte_matcher,
|
||||
eof,
|
||||
))(input)
|
||||
}
|
||||
|
||||
fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let current_item_indent_level: &usize =
|
||||
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Not inside a plain list item",
|
||||
))))?;
|
||||
|
||||
start_of_line(context, input)?;
|
||||
|
||||
let matched = recognize(verify(
|
||||
tuple((space0::<&str, _>, non_whitespace_character)),
|
||||
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
||||
|(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
|
||||
))(input)?;
|
||||
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
|
||||
for thing in context.iter() {
|
||||
match thing.get_data() {
|
||||
ContextElement::ListItem(depth) => return Some(depth),
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use nom::branch::alt;
|
||||
use nom::character::complete::line_ending;
|
||||
use nom::character::complete::none_of;
|
||||
use nom::character::complete::space0;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::not;
|
||||
@@ -7,6 +8,8 @@ use nom::combinator::recognize;
|
||||
use nom::multi::many0;
|
||||
use nom::sequence::tuple;
|
||||
|
||||
use super::error::CustomError;
|
||||
use super::error::MyError;
|
||||
use super::error::Res;
|
||||
use super::parser_context::ContextElement;
|
||||
use super::Context;
|
||||
@@ -76,6 +79,33 @@ pub fn trailing_whitespace(input: &str) -> Res<&str, &str> {
|
||||
alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input)
|
||||
}
|
||||
|
||||
/// Check that we are at the start of a line
|
||||
pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
|
||||
let document_root = context.get_document_root().unwrap();
|
||||
let preceding_character = get_one_before(document_root, input)
|
||||
.map(|slice| slice.chars().next())
|
||||
.flatten();
|
||||
match preceding_character {
|
||||
Some('\n') => {}
|
||||
Some(_) => {
|
||||
// Not at start of line, cannot be a heading
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Not at start of line",
|
||||
))));
|
||||
}
|
||||
// If None, we are at the start of the file which allows for headings
|
||||
None => {}
|
||||
};
|
||||
Ok((input, ()))
|
||||
}
|
||||
|
||||
/// Pull one non-whitespace character.
|
||||
///
|
||||
/// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace.
|
||||
pub fn non_whitespace_character(input: &str) -> Res<&str, char> {
|
||||
none_of(" \t\r\n")(input)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
Reference in New Issue
Block a user