Building the plain list item context.

2023-03-25 14:10:22 -04:00
parent 4a863e92ff
commit e6752b9d83
7 changed files with 110 additions and 28 deletions
--- a/src/parser/document.rs
+++ b/src/parser/document.rs
@@ -14,8 +14,6 @@ use nom::multi::many1_count;
 use nom::sequence::tuple;

 use crate::parser::element::element;
-use crate::parser::error::CustomError;
-use crate::parser::error::MyError;
 use crate::parser::object::standard_set_object;
 use crate::parser::parser_context::ChainBehavior;
 use crate::parser::parser_context::ContextElement;
@@ -28,7 +26,7 @@ use super::object::Object;
 use super::parser_with_context::parser_with_context;
 use super::source::Source;
 use super::util::get_consumed;
-use super::util::get_one_before;
+use super::util::start_of_line;
 use super::util::trailing_whitespace;
 use super::Context;

@@ -117,7 +115,6 @@ fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Hea
    not(|i| context.check_exit_matcher(i))(input)?;
    let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?;
    let section_matcher = parser_with_context!(section)(context);
-    // TODO: This needs to only match headings below the current level
    let heading_matcher = parser_with_context!(heading)(context);
    let (remaining, children) = many0(alt((
        map(
@@ -159,26 +156,6 @@ fn headline<'r, 's>(
    Ok((remaining, (star_count, ws, title, ws2)))
 }

-fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
+fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
    alt((line_ending, eof))(input)
 }
-
-/// Check that we are at the start of a line
-fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
-    let document_root = context.get_document_root().unwrap();
-    let preceding_character = get_one_before(document_root, input)
-        .map(|slice| slice.chars().next())
-        .flatten();
-    match preceding_character {
-        Some('\n') => {}
-        Some(_) => {
-            // Not at start of line, cannot be a heading
-            return Err(nom::Err::Error(CustomError::MyError(MyError(
-                "Not at start of line",
-            ))));
-        }
-        // If None, we are at the start of the file which allows for headings
-        None => {}
-    };
-    Ok((input, ()))
-}
--- a/src/parser/old_combinator.rs
+++ b/src/parser/old_combinator.rs
@@ -1,125 +0,0 @@
-use super::parser_context::ContextElement;
-use super::parser_context::PreviousElementNode;
-use super::token::Token;
-use super::Context;
-use nom::error::ErrorKind;
-use nom::error::ParseError;
-use nom::IResult;
-use nom::InputLength;
-
-pub fn context_many1<'r, 's, I, O, E, M>(
-    context: Context<'r, 's>,
-    mut many_matcher: M,
-) -> impl FnMut(I) -> IResult<I, Vec<Token<'s>>, E> + 'r
-where
-    I: Clone + InputLength,
-    E: ParseError<I>,
-    M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
-    O: Into<Token<'s>>,
-{
-    move |mut i: I| {
-        let mut err = None;
-        // TODO: Can I eliminate the clone? I think this is incrementing the reference count
-        let mut current_context = context.clone();
-        // Despite the clone, the Rc should still point to the same value.
-        assert!(current_context.ptr_eq(context));
-        loop {
-            match many_matcher(&current_context, i.clone()) {
-                Ok((remaining, many_elem)) => {
-                    current_context = current_context.with_additional_node(
-                        ContextElement::PreviousElementNode(PreviousElementNode {
-                            element: many_elem.into(),
-                        }),
-                    );
-                    i = remaining;
-                }
-                the_error @ Err(_) => {
-                    err = Some(the_error);
-                    break;
-                }
-            }
-        }
-        let mut elements: Vec<Token<'s>> = current_context
-            .into_iter_until(context)
-            .filter_map(|context_element| match context_element {
-                ContextElement::PreviousElementNode(elem) => Some(elem.element),
-                _ => None,
-            })
-            .collect();
-        if elements.is_empty() {
-            if let Some(err) = err {
-                err?;
-            }
-        }
-        elements.reverse();
-        Ok((i, elements))
-    }
-}
-
-pub fn context_many_till<'r, 's, I, O, E, F, M, T>(
-    context: Context<'r, 's>,
-    mut many_matcher: M,
-    mut till_matcher: T,
-) -> impl FnMut(I) -> IResult<I, (Vec<Token<'s>>, F), E> + 'r
-where
-    I: Clone + InputLength,
-    E: ParseError<I>,
-    M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
-    T: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, F, E> + 'r,
-    O: Into<Token<'s>>,
-{
-    move |mut i: I| {
-        // TODO: Can I eliminate the clone? I think this is incrementing the reference count
-        let mut current_context = context.clone();
-        // Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop.
-        assert!(current_context.ptr_eq(context));
-        loop {
-            let len = i.input_len();
-            match till_matcher(&current_context, i.clone()) {
-                Ok((remaining, finish)) => {
-                    let mut ret = Vec::new();
-                    while !current_context.ptr_eq(context) {
-                        let (context_element, next_context) = current_context.pop_front();
-                        let context_element = context_element.expect("We only pop off context elements created in this function, so they are all Some()");
-                        current_context = next_context;
-                        match context_element {
-                            ContextElement::PreviousElementNode(PreviousElementNode {
-                                element: token,
-                            }) => {
-                                ret.push(token);
-                            }
-                            _ => {}
-                        };
-                    }
-                    ret.reverse();
-                    return Ok((remaining, (ret, finish)));
-                }
-                Err(nom::Err::Error(_)) => {
-                    match many_matcher(&current_context, i.clone()) {
-                        Err(nom::Err::Error(err)) => {
-                            return Err(nom::Err::Error(E::append(i, ErrorKind::ManyTill, err)))
-                        }
-                        Err(e) => return Err(e),
-                        Ok((remaining, many_elem)) => {
-                            // infinite loop check: the parser must always consume
-                            if remaining.input_len() == len {
-                                return Err(nom::Err::Error(E::from_error_kind(
-                                    remaining,
-                                    ErrorKind::ManyTill,
-                                )));
-                            }
-
-                            current_context = current_context.with_additional_node(
-                                ContextElement::PreviousElementNode(PreviousElementNode {
-                                    element: many_elem.into(),
-                                }),
-                            );
-                            i = remaining;
-                        }
-                    }
-                }
-                Err(e) => return Err(e),
-            };
-        }
-    }
-}
--- a/src/parser/old_document.rs
+++ b/src/parser/old_document.rs
@@ -1,29 +0,0 @@
-//! A single element of text.
-use super::combinator::context_many1;
-use super::error::Res;
-use super::paragraph::paragraph;
-use super::parser_context::ContextElement;
-use super::parser_context::ContextTree;
-use super::token::Paragraph;
-use super::token::Token;
-use super::Context;
-use nom::IResult;
-
-type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;
-
-// TODO: Implement FromStr for Document
-
-pub fn document(input: &str) -> Res<&str, Vec<Paragraph>> {
-    let initial_context: ContextTree<'_, '_> = ContextTree::new();
-    let document_context =
-        initial_context.with_additional_node(ContextElement::DocumentRoot(input));
-    let (remaining, tokens) = context_many1(&document_context, paragraph)(input)?;
-    let paragraphs = tokens
-        .into_iter()
-        .map(|token| match token {
-            Token::TextElement(_) => unreachable!(),
-            Token::Paragraph(paragraph) => paragraph,
-        })
-        .collect();
-    Ok((remaining, paragraphs))
-}
--- a/src/parser/parser_context.rs
+++ b/src/parser/parser_context.rs
@@ -106,9 +106,14 @@ impl<'r, 's> ContextTree<'r, 's> {

 #[derive(Debug)]
 pub enum ContextElement<'r, 's> {
+    /// Stores a reference to the entire org-mode document being parsed.
+    ///
+    /// This is used for look-behind.
    DocumentRoot(&'s str),
    ExitMatcherNode(ExitMatcherNode<'r>),
    Context(&'r str),
+
+    /// Stores the indentation level of the current list item
    ListItem(usize),
 }

--- a/src/parser/plain_list.rs
+++ b/src/parser/plain_list.rs
@@ -1,5 +1,22 @@
+use nom::branch::alt;
+use nom::character::complete::space0;
+use nom::combinator::eof;
+use nom::combinator::not;
+use nom::combinator::recognize;
+use nom::combinator::verify;
+use nom::sequence::tuple;
+
+use crate::parser::parser_context::ChainBehavior;
+use crate::parser::parser_context::ContextElement;
+use crate::parser::parser_context::ExitMatcherNode;
+use crate::parser::util::start_of_line;
+
+use super::error::CustomError;
+use super::error::MyError;
 use super::error::Res;
 use super::lesser_element::Paragraph;
+use super::parser_with_context::parser_with_context;
+use super::util::non_whitespace_character;
 use super::Context;

 #[allow(dead_code)]
@@ -7,5 +24,52 @@ pub fn plain_list_item<'r, 's>(
    context: Context<'r, 's>,
    input: &'s str,
 ) -> Res<&'s str, Paragraph<'s>> {
+    not(|i| context.check_exit_matcher(i))(input)?;
+    start_of_line(context, input)?;
+    let (remaining, leading_whitespace) = space0(input)?;
+    // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
+    let indent_level = leading_whitespace.len();
+    let list_item_context = context
+        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
+            exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
+        }))
+        .with_additional_node(ContextElement::ListItem(indent_level));
    todo!()
 }
+
+fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
+    let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
+    let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context);
+    alt((
+        recognize(plain_list_item_matcher),
+        line_indented_lte_matcher,
+        eof,
+    ))(input)
+}
+
+fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
+    let current_item_indent_level: &usize =
+        get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
+            "Not inside a plain list item",
+        ))))?;
+
+    start_of_line(context, input)?;
+
+    let matched = recognize(verify(
+        tuple((space0::<&str, _>, non_whitespace_character)),
+        // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
+        |(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
+    ))(input)?;
+
+    Ok(matched)
+}
+
+fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
+    for thing in context.iter() {
+        match thing.get_data() {
+            ContextElement::ListItem(depth) => return Some(depth),
+            _ => {}
+        };
+    }
+    None
+}
--- a/src/parser/util.rs
+++ b/src/parser/util.rs
@@ -1,5 +1,6 @@
 use nom::branch::alt;
 use nom::character::complete::line_ending;
+use nom::character::complete::none_of;
 use nom::character::complete::space0;
 use nom::combinator::eof;
 use nom::combinator::not;
@@ -7,6 +8,8 @@ use nom::combinator::recognize;
 use nom::multi::many0;
 use nom::sequence::tuple;

+use super::error::CustomError;
+use super::error::MyError;
 use super::error::Res;
 use super::parser_context::ContextElement;
 use super::Context;
@@ -76,6 +79,33 @@ pub fn trailing_whitespace(input: &str) -> Res<&str, &str> {
    alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input)
 }

+/// Check that we are at the start of a line
+pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
+    let document_root = context.get_document_root().unwrap();
+    let preceding_character = get_one_before(document_root, input)
+        .map(|slice| slice.chars().next())
+        .flatten();
+    match preceding_character {
+        Some('\n') => {}
+        Some(_) => {
+            // Not at start of line, cannot be a heading
+            return Err(nom::Err::Error(CustomError::MyError(MyError(
+                "Not at start of line",
+            ))));
+        }
+        // If None, we are at the start of the file which allows for headings
+        None => {}
+    };
+    Ok((input, ()))
+}
+
+/// Pull one non-whitespace character.
+///
+/// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace.
+pub fn non_whitespace_character(input: &str) -> Res<&str, char> {
+    none_of(" \t\r\n")(input)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;