Merge branch 'plain_list'
This commit is contained in:
commit
0e070f2d4c
@ -12,7 +12,10 @@ path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
nom = "7.1.1"
|
||||
opentelemetry = "0.17.0"
|
||||
opentelemetry-jaeger = "0.16.0"
|
||||
tracing = "0.1.37"
|
||||
tracing-subscriber = "0.3.16"
|
||||
tracing-opentelemetry = "0.17.2"
|
||||
tracing-subscriber = {version="0.3.16", features=["env-filter"]}
|
||||
|
||||
[features]
|
||||
|
26
Makefile
26
Makefile
@ -11,15 +11,33 @@ endif
|
||||
.RECIPEPREFIX = >
|
||||
|
||||
.PHONY: build
|
||||
build: target/debug/toy
|
||||
build:
|
||||
> cargo build
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
> cargo clean
|
||||
|
||||
target/debug/toy:
|
||||
> cargo build
|
||||
.PHONY: test
|
||||
test:
|
||||
> cargo test
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
> cargo run
|
||||
|
||||
.PHONY: debug
|
||||
debug:
|
||||
> RUST_LOG=debug cargo run
|
||||
|
||||
.PHONY: jaeger
|
||||
jaeger:
|
||||
> docker run -d --rm -p 6831:6831/udp -p 6832:6832/udp -p 16686:16686 -p 14268:14268 jaegertracing/all-in-one:latest
|
||||
> docker run -d --rm --name toylanguagedocker -p 6831:6831/udp -p 6832:6832/udp -p 16686:16686 -p 14268:14268 jaegertracing/all-in-one:latest
|
||||
|
||||
.PHONY: jaegerweb
|
||||
jaegerweb:
|
||||
> xdg-open 'http://localhost:16686'
|
||||
|
||||
.PHONY: jaegerstop
|
||||
jaegerstop:
|
||||
> docker stop toylanguagedocker
|
||||
|
35
notes/exit_matcher_loop_notes.txt
Normal file
35
notes/exit_matcher_loop_notes.txt
Normal file
@ -0,0 +1,35 @@
|
||||
Headings add exit matcher for heading
|
||||
|
||||
Paragraphs add exit matcher for elements (but it should be sans paragraph)
|
||||
|
||||
|
||||
|
||||
|
||||
* foo
|
||||
* bar
|
||||
* baz
|
||||
|
||||
context tree -> ()
|
||||
|
||||
match * foo
|
||||
|
||||
context tree -> exit(heading matcher)
|
||||
|
||||
check exit
|
||||
invoke heading matcher
|
||||
check exit
|
||||
invoke heading matcher
|
||||
check exit
|
||||
invoke heading matcher
|
||||
adds second heading matcher exit
|
||||
|
||||
|
||||
Ways around this:
|
||||
- Always parse SOMETHING before checking for exit
|
||||
- Doesn't always seem possible
|
||||
- Disable exit matchers during exit check
|
||||
- Seems like it would break syntax
|
||||
- Have separate parsers for the beginning of the exit condition (for example, checking for just the headline instead of the full heading parser)
|
||||
- Won't be possible with paragraphs ending at any other element
|
||||
- Check exit matchers in parent parser
|
||||
- Will this work? seems like it would just create larger loops
|
@ -0,0 +1,2 @@
|
||||
foo bar baz
|
||||
1. lorem
|
3
org_mode_samples/plain_lists/empty_list_item.org
Normal file
3
org_mode_samples/plain_lists/empty_list_item.org
Normal file
@ -0,0 +1,3 @@
|
||||
1.
|
||||
2.
|
||||
3.
|
@ -0,0 +1,2 @@
|
||||
1. foo
|
||||
1. bar
|
@ -0,0 +1 @@
|
||||
Seems like the only element that can exist on the same line as the opening of an item is a paragraph. Perhaps all other elements should have a start of line matcher at the beginning of their parser to force this?
|
@ -0,0 +1,4 @@
|
||||
1. regular
|
||||
1. nested list
|
||||
2. 1. Sameline
|
||||
3. | table|
|
44
src/main.rs
44
src/main.rs
@ -1,26 +1,42 @@
|
||||
#![feature(round_char_boundary)]
|
||||
use crate::parser::document;
|
||||
use tracing::Level;
|
||||
use tracing_subscriber::fmt::format::FmtSpan;
|
||||
|
||||
use tracing_subscriber::EnvFilter;
|
||||
mod parser;
|
||||
use tracing_subscriber::fmt;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::util::SubscriberInitExt;
|
||||
|
||||
const TEST_DOC: &'static str = include_str!("../toy_language.txt");
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let format = tracing_subscriber::fmt::format()
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.with_thread_ids(false)
|
||||
.with_target(false);
|
||||
let subscriber = tracing_subscriber::fmt()
|
||||
.event_format(format)
|
||||
.with_max_level(Level::TRACE)
|
||||
.with_span_events(FmtSpan::ENTER | FmtSpan::EXIT)
|
||||
.finish();
|
||||
tracing::subscriber::set_global_default(subscriber)?;
|
||||
init_telemetry()?;
|
||||
let parsed = document(TEST_DOC);
|
||||
println!("{}\n\n\n", TEST_DOC);
|
||||
println!("{:#?}", parsed);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn init_telemetry() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN"));
|
||||
|
||||
let stdout = fmt::Layer::new()
|
||||
.pretty()
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.with_thread_ids(false)
|
||||
.with_target(false);
|
||||
|
||||
opentelemetry::global::set_text_map_propagator(opentelemetry_jaeger::Propagator::new());
|
||||
let tracer = opentelemetry_jaeger::new_pipeline()
|
||||
.with_service_name("toy_language")
|
||||
.install_simple()?;
|
||||
|
||||
let opentelemetry = tracing_opentelemetry::layer().with_tracer(tracer);
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(env_filter)
|
||||
.with(opentelemetry)
|
||||
.with(stdout)
|
||||
.try_init()?;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,38 +0,0 @@
|
||||
use nom::error::ParseError;
|
||||
use nom::IResult;
|
||||
use nom::InputLength;
|
||||
|
||||
use super::Context;
|
||||
|
||||
pub fn context_many1<'r: 's, 's, I, O, E, M>(
|
||||
context: Context<'r, 's>,
|
||||
mut many_matcher: M,
|
||||
) -> impl FnMut(I) -> IResult<I, Vec<O>, E> + 'r
|
||||
where
|
||||
I: Clone + InputLength,
|
||||
E: ParseError<I>,
|
||||
M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
|
||||
{
|
||||
move |mut i: I| {
|
||||
let mut err = None;
|
||||
let mut elements: Vec<O> = Vec::new();
|
||||
loop {
|
||||
match many_matcher(&context, i.clone()) {
|
||||
Ok((remaining, many_elem)) => {
|
||||
i = remaining;
|
||||
elements.push(many_elem);
|
||||
}
|
||||
the_error @ Err(_) => {
|
||||
err = Some(the_error);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if elements.is_empty() {
|
||||
if let Some(err) = err {
|
||||
err?;
|
||||
}
|
||||
}
|
||||
Ok((i, elements))
|
||||
}
|
||||
}
|
@ -11,24 +11,25 @@ use nom::combinator::verify;
|
||||
use nom::multi::many0;
|
||||
use nom::multi::many1;
|
||||
use nom::multi::many1_count;
|
||||
use nom::multi::many_till;
|
||||
use nom::sequence::tuple;
|
||||
|
||||
use crate::parser::element::element;
|
||||
use crate::parser::error::CustomError;
|
||||
use crate::parser::error::MyError;
|
||||
use crate::parser::object::standard_set_object;
|
||||
use crate::parser::parser_context::ChainBehavior;
|
||||
use crate::parser::parser_context::ContextElement;
|
||||
use crate::parser::parser_context::ContextTree;
|
||||
use crate::parser::parser_context::ExitMatcherNode;
|
||||
use crate::parser::util::element_trailing_whitespace;
|
||||
|
||||
use super::element::Element;
|
||||
use super::error::Res;
|
||||
use super::object::Object;
|
||||
use super::parser_with_context::parser_with_context;
|
||||
use super::source::Source;
|
||||
use super::util::exit_matcher_parser;
|
||||
use super::util::get_consumed;
|
||||
use super::util::get_one_before;
|
||||
use super::util::start_of_line;
|
||||
use super::util::trailing_whitespace;
|
||||
use super::Context;
|
||||
|
||||
@ -74,6 +75,7 @@ impl<'s> Source<'s> for DocumentElement<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
#[allow(dead_code)]
|
||||
pub fn document(input: &str) -> Res<&str, Document> {
|
||||
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
||||
@ -94,6 +96,7 @@ pub fn document(input: &str) -> Res<&str, Document> {
|
||||
))
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> {
|
||||
// TODO: The zeroth section is specialized so it probably needs its own parser
|
||||
let parser_context = context
|
||||
@ -101,23 +104,51 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec
|
||||
exit_matcher: ChainBehavior::AndParent(Some(§ion_end)),
|
||||
}))
|
||||
.with_additional_node(ContextElement::Context("section"));
|
||||
not(|i| parser_context.check_exit_matcher(i))(input)?;
|
||||
let element_matcher = parser_with_context!(element)(&parser_context);
|
||||
let (remaining, children) = many1(element_matcher)(input)?;
|
||||
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
|
||||
let trailing_matcher = parser_with_context!(element_trailing_whitespace)(&parser_context);
|
||||
let (remaining, (children, _exit_contents)) = verify(
|
||||
many_till(
|
||||
tuple((
|
||||
element_matcher,
|
||||
opt(map(trailing_matcher, Element::TrailingWhitespace)),
|
||||
)),
|
||||
exit_matcher,
|
||||
),
|
||||
|(children, _exit_contents)| !children.is_empty(),
|
||||
)(input)?;
|
||||
let flattened_children: Vec<Element> = children
|
||||
.into_iter()
|
||||
.flat_map(|tpl| {
|
||||
let mut flattened_children = Vec::with_capacity(2);
|
||||
flattened_children.push(tpl.0);
|
||||
if let Some(bar) = tpl.1 {
|
||||
flattened_children.push(bar);
|
||||
}
|
||||
flattened_children.into_iter()
|
||||
})
|
||||
.collect();
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((remaining, Section { source, children }))
|
||||
Ok((
|
||||
remaining,
|
||||
Section {
|
||||
source,
|
||||
children: flattened_children,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let headline_matcher = parser_with_context!(headline)(context);
|
||||
alt((recognize(headline_matcher), eof))(input)
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> {
|
||||
not(|i| context.check_exit_matcher(i))(input)?;
|
||||
let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?;
|
||||
let section_matcher = parser_with_context!(section)(context);
|
||||
// TODO: This needs to only match headings below the current level
|
||||
let heading_matcher = parser_with_context!(heading)(context);
|
||||
let (remaining, children) = many0(alt((
|
||||
map(
|
||||
@ -138,6 +169,7 @@ fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Hea
|
||||
))
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
fn headline<'r, 's>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
@ -159,26 +191,7 @@ fn headline<'r, 's>(
|
||||
Ok((remaining, (star_count, ws, title, ws2)))
|
||||
}
|
||||
|
||||
fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
alt((line_ending, eof))(input)
|
||||
}
|
||||
|
||||
/// Check that we are at the start of a line
|
||||
fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
|
||||
let document_root = context.get_document_root().unwrap();
|
||||
let preceding_character = get_one_before(document_root, input)
|
||||
.map(|slice| slice.chars().next())
|
||||
.flatten();
|
||||
match preceding_character {
|
||||
Some('\n') => {}
|
||||
Some(_) => {
|
||||
// Not at start of line, cannot be a heading
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Not at start of line",
|
||||
))));
|
||||
}
|
||||
// If None, we are at the start of the file which allows for headings
|
||||
None => {}
|
||||
};
|
||||
Ok((input, ()))
|
||||
}
|
||||
|
@ -1,18 +1,22 @@
|
||||
use crate::parser::parser_with_context::parser_with_context;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::not;
|
||||
|
||||
use super::error::Res;
|
||||
use super::greater_element::PlainList;
|
||||
use super::lesser_element::Paragraph;
|
||||
use super::paragraph::paragraph;
|
||||
use super::plain_list::plain_list;
|
||||
use super::source::Source;
|
||||
use super::Context;
|
||||
use crate::parser::parser_with_context::parser_with_context;
|
||||
use nom::branch::alt;
|
||||
use nom::combinator::map;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Element<'s> {
|
||||
Paragraph(Paragraph<'s>),
|
||||
PlainList(PlainList<'s>),
|
||||
/// The whitespace that follows an element.
|
||||
///
|
||||
/// This isn't a real org-mode element. Except for items in plain lists, trailing blank lines belong to the preceding element. It is a separate `Element` in this enum to make parsing easier.
|
||||
TrailingWhitespace(&'s str),
|
||||
}
|
||||
|
||||
impl<'s> Source<'s> for Element<'s> {
|
||||
@ -20,14 +24,26 @@ impl<'s> Source<'s> for Element<'s> {
|
||||
match self {
|
||||
Element::Paragraph(obj) => obj.source,
|
||||
Element::PlainList(obj) => obj.source,
|
||||
Element::TrailingWhitespace(src) => src,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> {
|
||||
not(|i| context.check_exit_matcher(i))(input)?;
|
||||
|
||||
let non_paragraph_matcher = parser_with_context!(non_paragraph_element)(context);
|
||||
let paragraph_matcher = parser_with_context!(paragraph)(context);
|
||||
|
||||
map(paragraph_matcher, Element::Paragraph)(input)
|
||||
alt((
|
||||
non_paragraph_matcher,
|
||||
map(paragraph_matcher, Element::Paragraph),
|
||||
))(input)
|
||||
}
|
||||
|
||||
pub fn non_paragraph_element<'r, 's>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
) -> Res<&'s str, Element<'s>> {
|
||||
let plain_list_matcher = parser_with_context!(plain_list)(context);
|
||||
map(plain_list_matcher, Element::PlainList)(input)
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ impl<I> ParseError<I> for CustomError<I> {
|
||||
CustomError::Nom(input, kind)
|
||||
}
|
||||
|
||||
fn append(input: I, kind: ErrorKind, mut other: Self) -> Self {
|
||||
fn append(_input: I, _kind: ErrorKind, mut other: Self) -> Self {
|
||||
// Doesn't do append like VerboseError
|
||||
other
|
||||
}
|
||||
|
@ -1,4 +1,15 @@
|
||||
use super::element::Element;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PlainList<'s> {
|
||||
pub source: &'s str,
|
||||
pub children: Vec<PlainListItem<'s>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PlainListItem<'s> {
|
||||
pub source: &'s str,
|
||||
pub indentation: usize,
|
||||
pub bullet: &'s str,
|
||||
pub contents: Vec<Element<'s>>,
|
||||
}
|
||||
|
@ -1,4 +1,3 @@
|
||||
mod combinator;
|
||||
mod document;
|
||||
mod element;
|
||||
mod error;
|
||||
@ -9,6 +8,7 @@ mod object;
|
||||
mod paragraph;
|
||||
mod parser_context;
|
||||
mod parser_with_context;
|
||||
mod plain_list;
|
||||
mod plain_text;
|
||||
mod source;
|
||||
mod util;
|
||||
|
@ -9,8 +9,12 @@ use super::Context;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Object<'s> {
|
||||
#[allow(dead_code)]
|
||||
TextMarkup(TextMarkup<'s>),
|
||||
|
||||
PlainText(PlainText<'s>),
|
||||
|
||||
#[allow(dead_code)]
|
||||
RegularLink(RegularLink<'s>),
|
||||
}
|
||||
|
||||
@ -39,6 +43,7 @@ impl<'s> Source<'s> for Object<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn standard_set_object<'r, 's>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
|
@ -1,8 +1,11 @@
|
||||
use nom::branch::alt;
|
||||
use nom::character::complete::line_ending;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::recognize;
|
||||
use nom::combinator::verify;
|
||||
use nom::multi::many1;
|
||||
use nom::multi::many_till;
|
||||
use nom::sequence::tuple;
|
||||
|
||||
use crate::parser::object::standard_set_object;
|
||||
@ -10,31 +13,44 @@ use crate::parser::parser_context::ChainBehavior;
|
||||
use crate::parser::parser_context::ContextElement;
|
||||
use crate::parser::parser_context::ExitMatcherNode;
|
||||
use crate::parser::parser_with_context::parser_with_context;
|
||||
use crate::parser::util::exit_matcher_parser;
|
||||
|
||||
use super::element::non_paragraph_element;
|
||||
use super::error::Res;
|
||||
use super::lesser_element::Paragraph;
|
||||
use super::util::blank_line;
|
||||
use super::util::get_consumed;
|
||||
use super::util::trailing_whitespace;
|
||||
use super::Context;
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Paragraph<'s>> {
|
||||
let parser_context =
|
||||
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
exit_matcher: ChainBehavior::AndParent(Some(¶graph_end)),
|
||||
}));
|
||||
let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);
|
||||
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
|
||||
|
||||
let (remaining, children) = many1(standard_set_object_matcher)(input)?;
|
||||
|
||||
let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?;
|
||||
let (remaining, (children, _exit_contents)) = verify(
|
||||
many_till(
|
||||
standard_set_object_matcher,
|
||||
peek(alt((eof, recognize(tuple((line_ending, exit_matcher)))))),
|
||||
),
|
||||
|(children, _exit_contents)| !children.is_empty(),
|
||||
)(input)?;
|
||||
|
||||
let (remaining, _linebreak) = alt((eof, line_ending))(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
|
||||
Ok((remaining, Paragraph { source, children }))
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
// TODO: Other elements should also end paragraphs
|
||||
alt((recognize(tuple((line_ending, many1(blank_line)))), eof))(input)
|
||||
let non_paragraph_element_matcher = parser_with_context!(non_paragraph_element)(context);
|
||||
alt((
|
||||
recognize(many1(blank_line)),
|
||||
recognize(non_paragraph_element_matcher),
|
||||
eof,
|
||||
))(input)
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
use nom::combinator::eof;
|
||||
use nom::IResult;
|
||||
|
||||
use super::error::CustomError;
|
||||
@ -53,10 +54,22 @@ impl<'r, 's> ContextTree<'r, 's> {
|
||||
self.tree.into_iter_until(&other.tree)
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn check_exit_matcher(
|
||||
&'r self,
|
||||
i: &'s str,
|
||||
) -> IResult<&'s str, &'s str, CustomError<&'s str>> {
|
||||
// Special check for EOF. We don't just make this a document-level exit matcher since the IgnoreParent ChainBehavior could cause early exit matchers to not run.
|
||||
let at_end_of_file = eof(i);
|
||||
if at_end_of_file.is_ok() {
|
||||
return at_end_of_file;
|
||||
}
|
||||
|
||||
// let blocked_context =
|
||||
// self.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
// exit_matcher: ChainBehavior::IgnoreParent(Some(&always_fail)),
|
||||
// }));
|
||||
|
||||
for current_node in self.iter() {
|
||||
let context_element = current_node.get_data();
|
||||
match context_element {
|
||||
@ -106,9 +119,14 @@ impl<'r, 's> ContextTree<'r, 's> {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ContextElement<'r, 's> {
|
||||
/// Stores a reference to the entire org-mode document being parsed.
|
||||
///
|
||||
/// This is used for look-behind.
|
||||
DocumentRoot(&'s str),
|
||||
ExitMatcherNode(ExitMatcherNode<'r>),
|
||||
Context(&'r str),
|
||||
|
||||
/// Stores the indentation level of the current list item
|
||||
ListItem(usize),
|
||||
}
|
||||
|
||||
@ -120,7 +138,8 @@ pub struct ExitMatcherNode<'r> {
|
||||
#[derive(Clone)]
|
||||
pub enum ChainBehavior<'r> {
|
||||
AndParent(Option<&'r Matcher>),
|
||||
#[allow(dead_code)]
|
||||
|
||||
#[allow(dead_code)] // Will be used when inside code/quote blocks
|
||||
IgnoreParent(Option<&'r Matcher>),
|
||||
}
|
||||
|
||||
|
191
src/parser/plain_list.rs
Normal file
191
src/parser/plain_list.rs
Normal file
@ -0,0 +1,191 @@
|
||||
use super::error::CustomError;
|
||||
use super::error::MyError;
|
||||
use super::error::Res;
|
||||
use super::greater_element::PlainList;
|
||||
use super::greater_element::PlainListItem;
|
||||
use super::parser_with_context::parser_with_context;
|
||||
use super::util::non_whitespace_character;
|
||||
use super::Context;
|
||||
use crate::parser::element::element;
|
||||
use crate::parser::parser_context::ChainBehavior;
|
||||
use crate::parser::parser_context::ContextElement;
|
||||
use crate::parser::parser_context::ExitMatcherNode;
|
||||
use crate::parser::util::exit_matcher_parser;
|
||||
use crate::parser::util::get_consumed;
|
||||
use crate::parser::util::start_of_line;
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::digit1;
|
||||
use nom::character::complete::one_of;
|
||||
use nom::character::complete::space0;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::recognize;
|
||||
use nom::combinator::verify;
|
||||
use nom::multi::many_till;
|
||||
use nom::sequence::tuple;
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> {
|
||||
let (remaining, first_item) = plain_list_item(context, input)?;
|
||||
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
|
||||
let exit_matcher = parser_with_context!(exit_matcher_parser)(context);
|
||||
let (remaining, (mut children, _exit_contents)) = many_till(
|
||||
verify(plain_list_item_matcher, |pli| {
|
||||
pli.indentation == first_item.indentation
|
||||
}),
|
||||
exit_matcher,
|
||||
)(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
children.insert(0, first_item);
|
||||
Ok((remaining, PlainList { source, children }))
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn plain_list_item<'r, 's>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
) -> Res<&'s str, PlainListItem<'s>> {
|
||||
start_of_line(context, input)?;
|
||||
let (remaining, leading_whitespace) = space0(input)?;
|
||||
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
||||
let indent_level = leading_whitespace.len();
|
||||
let parser_context = context
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
|
||||
}))
|
||||
.with_additional_node(ContextElement::ListItem(indent_level));
|
||||
|
||||
let element_matcher = parser_with_context!(element)(&parser_context);
|
||||
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
|
||||
let (remaining, (bull, _ws)) = tuple((bullet, space0))(remaining)?;
|
||||
let (remaining, (contents, _exit_contents)) =
|
||||
many_till(element_matcher, exit_matcher)(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
|
||||
Ok((
|
||||
remaining,
|
||||
PlainListItem {
|
||||
source,
|
||||
indentation: indent_level,
|
||||
bullet: bull,
|
||||
contents,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> {
|
||||
alt((
|
||||
tag("*"),
|
||||
tag("-"),
|
||||
tag("+"),
|
||||
recognize(tuple((counter, alt((tag("."), tag(")")))))),
|
||||
))(i)
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> {
|
||||
alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let current_item_indent_level: &usize =
|
||||
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Not inside a plain list item",
|
||||
))))?;
|
||||
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
|
||||
let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context);
|
||||
alt((
|
||||
recognize(verify(plain_list_item_matcher, |pli| {
|
||||
pli.indentation <= *current_item_indent_level
|
||||
})),
|
||||
recognize(line_indented_lte_matcher),
|
||||
eof,
|
||||
))(input)
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let current_item_indent_level: &usize =
|
||||
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Not inside a plain list item",
|
||||
))))?;
|
||||
|
||||
start_of_line(context, input)?;
|
||||
|
||||
let matched = recognize(verify(
|
||||
tuple((space0::<&str, _>, non_whitespace_character)),
|
||||
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
||||
|(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
|
||||
))(input)?;
|
||||
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
|
||||
for thing in context.iter() {
|
||||
match thing.get_data() {
|
||||
ContextElement::ListItem(depth) => return Some(depth),
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::parser::parser_context::ContextElement;
|
||||
use crate::parser::parser_context::ContextTree;
|
||||
use crate::parser::parser_with_context::parser_with_context;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn plain_list_item_empty() {
|
||||
let input = "1.";
|
||||
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
||||
let document_context =
|
||||
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
||||
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&document_context);
|
||||
let (remaining, result) = plain_list_item_matcher(input).unwrap();
|
||||
assert_eq!(remaining, "");
|
||||
assert_eq!(result.source, "1.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plain_list_item_simple() {
|
||||
let input = "1. foo";
|
||||
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
||||
let document_context =
|
||||
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
||||
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&document_context);
|
||||
let (remaining, result) = plain_list_item_matcher(input).unwrap();
|
||||
assert_eq!(remaining, "");
|
||||
assert_eq!(result.source, "1. foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plain_list_empty() {
|
||||
let input = "1.";
|
||||
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
||||
let document_context =
|
||||
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
||||
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
|
||||
let (remaining, result) = plain_list_matcher(input).unwrap();
|
||||
assert_eq!(remaining, "");
|
||||
assert_eq!(result.source, "1.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plain_list_simple() {
|
||||
let input = "1. foo";
|
||||
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
||||
let document_context =
|
||||
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
||||
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
|
||||
let (remaining, result) = plain_list_matcher(input).unwrap();
|
||||
assert_eq!(remaining, "");
|
||||
assert_eq!(result.source, "1. foo");
|
||||
}
|
||||
}
|
@ -7,13 +7,13 @@ use super::error::Res;
|
||||
use super::object::PlainText;
|
||||
use super::Context;
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainText<'s>> {
|
||||
if input.len() == 0 {
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Zero input length to plain_text.",
|
||||
))));
|
||||
}
|
||||
// not(|i| context.check_exit_matcher(i))(input)?;
|
||||
let mut current_input = input.char_indices();
|
||||
loop {
|
||||
match current_input.next() {
|
||||
|
@ -1,12 +1,16 @@
|
||||
use nom::branch::alt;
|
||||
use nom::character::complete::line_ending;
|
||||
use nom::character::complete::none_of;
|
||||
use nom::character::complete::space0;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::not;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::recognize;
|
||||
use nom::multi::many0;
|
||||
use nom::sequence::tuple;
|
||||
|
||||
use super::error::CustomError;
|
||||
use super::error::MyError;
|
||||
use super::error::Res;
|
||||
use super::parser_context::ContextElement;
|
||||
use super::Context;
|
||||
@ -67,15 +71,97 @@ pub fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
|
||||
/// A line containing only whitespace and then a line break
|
||||
///
|
||||
/// It is up to the caller to ensure this is called at the start of a line.
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn blank_line(input: &str) -> Res<&str, &str> {
|
||||
not(eof)(input)?;
|
||||
recognize(tuple((space0, alt((line_ending, eof)))))(input)
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn element_trailing_whitespace<'r, 's>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
) -> Res<&'s str, &'s str> {
|
||||
start_of_line(context, input)?;
|
||||
alt((eof, recognize(many0(blank_line))))(input)
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn trailing_whitespace(input: &str) -> Res<&str, &str> {
|
||||
alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input)
|
||||
}
|
||||
|
||||
/// Check that we are at the start of a line
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
|
||||
let document_root = context.get_document_root().unwrap();
|
||||
let preceding_character = get_one_before(document_root, input)
|
||||
.map(|slice| slice.chars().next())
|
||||
.flatten();
|
||||
match preceding_character {
|
||||
Some('\n') => {}
|
||||
Some(_) => {
|
||||
// Not at start of line, cannot be a heading
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Not at start of line",
|
||||
))));
|
||||
}
|
||||
// If None, we are at the start of the file which allows for headings
|
||||
None => {}
|
||||
};
|
||||
Ok((input, ()))
|
||||
}
|
||||
|
||||
/// Pull one non-whitespace character.
|
||||
///
|
||||
/// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace.
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn non_whitespace_character(input: &str) -> Res<&str, char> {
|
||||
none_of(" \t\r\n")(input)
|
||||
}
|
||||
|
||||
/// Check that we are at the start of a line
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn exit_matcher_parser<'r, 's>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
) -> Res<&'s str, &'s str> {
|
||||
peek(|i| context.check_exit_matcher(i))(input)
|
||||
}
|
||||
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn always_fail<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Always fail",
|
||||
))))
|
||||
}
|
||||
|
||||
/// Walk backwards unconsuming blank lines and line endings.
|
||||
///
|
||||
/// List items are a special case where the trailing blank lines do not belong to it, unlike all other elements. Rather than write that special logic into each child parser, this just walks backwards through the consumed input to unconsume trailing blank lines and line breaks.
|
||||
#[tracing::instrument(ret, level = "debug")]
|
||||
pub fn regurgitate<'s>(input: &'s str, remaining: &'s str) -> &'s str {
|
||||
assert!(is_slice_of(input, remaining));
|
||||
let mut offset = remaining.as_ptr() as usize - input.as_ptr() as usize;
|
||||
let source = &input[..offset];
|
||||
let mut char_indices = source.char_indices().rev();
|
||||
loop {
|
||||
match char_indices.next() {
|
||||
Some((off, chr)) => {
|
||||
if chr == '\n' {
|
||||
offset = off;
|
||||
} else if chr != ' ' && chr != '\t' {
|
||||
return &input[offset..];
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// It was all whitespace, so return the full input string
|
||||
return input;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -89,4 +175,14 @@ mod tests {
|
||||
assert!(is_slice_of(input, yellow_heart));
|
||||
assert_eq!(yellow_heart, "💛");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regurgitate_unicode() {
|
||||
let input = "🧡💛\n\t \t \n\n💚💙💜";
|
||||
let (green_heart_index, _) = input.char_indices().skip(12).next().unwrap();
|
||||
let starting_with_green_heart = &input[green_heart_index..];
|
||||
let after_yellow = regurgitate(input, starting_with_green_heart);
|
||||
assert!(is_slice_of(input, after_yellow));
|
||||
assert_eq!(after_yellow, "\n\t \t \n\n💚💙💜");
|
||||
}
|
||||
}
|
||||
|
@ -1,22 +1,5 @@
|
||||
prologue *goes here* I guess *bold
|
||||
text*
|
||||
|
||||
bold*wont* start *or stop*when there is text outside it
|
||||
|
||||
I guess *regular
|
||||
|
||||
text*
|
||||
|
||||
[foo *bar] baz* car
|
||||
|
||||
|
||||
*nesting *bold entrances* and* exits
|
||||
|
||||
* Heading
|
||||
|
||||
body of heading
|
||||
|
||||
** Child heading
|
||||
** Immediate second child heading
|
||||
|
||||
* Second top-level heading
|
||||
foo bar
|
||||
1. This is a list immediately after a paragraph
|
||||
2. This is a second item in the list
|
||||
1. This is a child of the second item
|
||||
|
Loading…
x
Reference in New Issue
Block a user