From 76b23254865f746b6c11450b9282a78b91ff3f54 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 18 Dec 2022 03:29:01 -0500 Subject: [PATCH 01/18] Start the plain list module. --- src/parser/mod.rs | 1 + src/parser/plain_list.rs | 8 ++++++++ src/parser/token.rs | 11 +++++++++++ 3 files changed, 20 insertions(+) create mode 100644 src/parser/plain_list.rs diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7e0fd1d..2f5130b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7,6 +7,7 @@ mod list; mod paragraph; mod parser_context; mod parser_with_context; +mod plain_list; mod text; mod token; mod util; diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs new file mode 100644 index 0000000..86f2585 --- /dev/null +++ b/src/parser/plain_list.rs @@ -0,0 +1,8 @@ +use super::error::Res; +use super::token::PlainList; +use super::Context; + +pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, PlainList<'s>> { + // todo + todo!() +} diff --git a/src/parser/token.rs b/src/parser/token.rs index 65d4bc0..e695b23 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -95,3 +95,14 @@ impl<'a> Source<'a> for Paragraph<'a> { self.source } } + +#[derive(Debug)] +pub struct PlainList<'a> { + pub source: &'a str, +} + +impl<'a> Source<'a> for PlainList<'a> { + fn get_source(&'a self) -> &'a str { + self.source + } +} From b4e28f3d4d46db34dc363558a87752d50eae5777 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 18 Dec 2022 05:02:32 -0500 Subject: [PATCH 02/18] Implement a parser for a bullet in a list item. --- src/parser/plain_list.rs | 26 ++++++++++++++++++++++++++ src/parser/token.rs | 22 ++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 86f2585..5fa0fb0 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -1,4 +1,12 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::digit1; +use nom::character::complete::one_of; +use nom::combinator::recognize; +use nom::sequence::tuple; + use super::error::Res; +use super::token::ListItem; use super::token::PlainList; use super::Context; @@ -6,3 +14,21 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, // todo todo!() } + +fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> { + // todo + todo!() +} + +fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> { + alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i) +} + +fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> { + alt(( + tag("*"), + tag("-"), + tag("+"), + recognize(tuple((counter, alt((tag("."), tag(")")))))), + ))(i) +} diff --git a/src/parser/token.rs b/src/parser/token.rs index e695b23..9d4c7a8 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -106,3 +106,25 @@ impl<'a> Source<'a> for PlainList<'a> { self.source } } + +#[derive(Debug)] +pub struct ListItem<'a> { + pub source: &'a str, +} + +impl<'a> Source<'a> for ListItem<'a> { + fn get_source(&'a self) -> &'a str { + self.source + } +} + +#[derive(Debug)] +pub struct ListCounter<'a> { + pub source: &'a str, +} + +impl<'a> Source<'a> for ListCounter<'a> { + fn get_source(&'a self) -> &'a str { + self.source + } +} From 62e5499150af15fdd9b9bc930c0c07ad5db3c50f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 18 Dec 2022 06:31:40 -0500 Subject: [PATCH 03/18] Add parsers for most of the rest of item. --- src/parser/plain_list.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 5fa0fb0..4d026e5 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -1,8 +1,12 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::character::complete::anychar; use nom::character::complete::digit1; +use nom::character::complete::line_ending; use nom::character::complete::one_of; +use nom::combinator::not; use nom::combinator::recognize; +use nom::multi::many1; use nom::sequence::tuple; use super::error::Res; @@ -32,3 +36,28 @@ fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> { recognize(tuple((counter, alt((tag("."), tag(")")))))), ))(i) } + +fn counter_set<'s>(i: &'s str) -> Res<&'s str, &'s str> { + recognize(tuple((tag("[@"), counter, tag("]"))))(i) +} + +fn check_box<'s>(i: &'s str) -> Res<&'s str, &'s str> { + recognize(alt((tag("[ ]"), tag("[X]"), tag("[-]"))))(i) +} + +fn item_tag<'s>(i: &'s str) -> Res<&'s str, &'s str> { + recognize(tuple((tag_text, tag_separator)))(i) +} + +fn tag_text<'s>(i: &'s str) -> Res<&'s str, &'s str> { + recognize(many1(tag_text_character))(i) +} + +fn tag_text_character<'s>(i: &'s str) -> Res<&'s str, &'s str> { + not(alt((tag_separator, line_ending)))(i)?; + recognize(anychar)(i) +} + +fn tag_separator<'s>(i: &'s str) -> Res<&'s str, &'s str> { + tag(" :: ")(i) +} From 5e7c891681cea38e186b7f282ae9fbef10c318ab Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 18 Dec 2022 07:18:42 -0500 Subject: [PATCH 04/18] Start of list item implementation. --- src/parser/plain_list.rs | 18 ++++++++++++++++++ src/parser/text.rs | 2 +- src/parser/token.rs | 5 +++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 4d026e5..c2cf0ec 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -4,12 +4,17 @@ use nom::character::complete::anychar; use nom::character::complete::digit1; use nom::character::complete::line_ending; use nom::character::complete::one_of; +use nom::combinator::consumed; use nom::combinator::not; +use nom::combinator::opt; use nom::combinator::recognize; use nom::multi::many1; use nom::sequence::tuple; +use super::combinator::context_many_till; use super::error::Res; +use super::text::space; +use super::text::text_element; use super::token::ListItem; use super::token::PlainList; use super::Context; @@ -20,6 +25,14 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, } fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> { + let _ = consumed(tuple(( + bullet, + opt(tuple((space, counter_set))), + opt(tuple((space, check_box))), + opt(tuple((space, item_tag))), + space, + context_many_till(context, text_element, item_end), + )))(i)?; // todo todo!() } @@ -61,3 +74,8 @@ fn tag_text_character<'s>(i: &'s str) -> Res<&'s str, &'s str> { fn tag_separator<'s>(i: &'s str) -> Res<&'s str, &'s str> { tag(" :: ")(i) } + +pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> { + // todo + todo!() +} diff --git a/src/parser/text.rs b/src/parser/text.rs index 4aaf632..a82a8df 100644 --- a/src/parser/text.rs +++ b/src/parser/text.rs @@ -24,7 +24,7 @@ pub fn line_break(input: &str) -> Res<&str, LineBreak> { map(line_ending, |s: &str| LineBreak { source: s })(input) } -fn space(input: &str) -> Res<&str, Space> { +pub fn space(input: &str) -> Res<&str, Space> { map(space1, |s: &str| Space { source: s })(input) } diff --git a/src/parser/token.rs b/src/parser/token.rs index 9d4c7a8..1153a54 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -110,6 +110,11 @@ impl<'a> Source<'a> for PlainList<'a> { #[derive(Debug)] pub struct ListItem<'a> { pub source: &'a str, + pub bullet: &'a str, + pub counter_set: &'a str, + pub check_box: &'a str, + pub item_tag: &'a str, + pub contents: Vec>, } impl<'a> Source<'a> for ListItem<'a> { From 37070689c6934e9cb736715480d457221ede1c78 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 18 Dec 2022 07:40:52 -0500 Subject: [PATCH 05/18] Continued work on item parser. Still needs to: 1. Do context things 2. Fix the double-space after the tag separator issue 3. Add support for indentation 4. Write item_end 5. Write plain_list --- src/parser/plain_list.rs | 24 +++++++++++++++++++++--- src/parser/token.rs | 6 +++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index c2cf0ec..4bb435c 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -17,6 +17,7 @@ use super::text::space; use super::text::text_element; use super::token::ListItem; use super::token::PlainList; +use super::token::Token; use super::Context; pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, PlainList<'s>> { @@ -25,16 +26,33 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, } fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> { - let _ = consumed(tuple(( + let (remaining, (source, (bul, count, check, tg, sp, (contents, end)))) = consumed(tuple(( bullet, opt(tuple((space, counter_set))), opt(tuple((space, check_box))), opt(tuple((space, item_tag))), space, + // TODO: This context should probably be something involving the item context_many_till(context, text_element, item_end), )))(i)?; - // todo - todo!() + + let elements = contents + .into_iter() + .filter_map(|token| match token { + Token::TextElement(text_element) => Some(text_element), + Token::Paragraph(_) => panic!("There should only be text elements in items."), + }) + .collect(); + + let ret = ListItem { + source, + bullet: bul, + counter_set: count.map(|(_spc, count)| count), + check_box: check.map(|(_spc, check)| check), + item_tag: tg.map(|(_spc, tg)| tg), + contents: elements, + }; + Ok((remaining, ret)) } fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> { diff --git a/src/parser/token.rs b/src/parser/token.rs index 1153a54..3935377 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -111,9 +111,9 @@ impl<'a> Source<'a> for PlainList<'a> { pub struct ListItem<'a> { pub source: &'a str, pub bullet: &'a str, - pub counter_set: &'a str, - pub check_box: &'a str, - pub item_tag: &'a str, + pub counter_set: Option<&'a str>, + pub check_box: Option<&'a str>, + pub item_tag: Option<&'a str>, pub contents: Vec>, } From 32897270a5c3b94b22aff4405649f866071a7d89 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 17 Mar 2023 16:37:47 -0400 Subject: [PATCH 06/18] Support leading whitespace for list items. --- src/main.rs | 8 ++++++-- src/parser/mod.rs | 2 ++ src/parser/paragraph.rs | 2 +- src/parser/plain_list.rs | 22 ++++++++++++++++------ src/parser/token.rs | 1 + 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/main.rs b/src/main.rs index a50a86c..70abb81 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,6 @@ use crate::parser::document; +use crate::parser::item; +use crate::parser::ContextTree; use tracing::Level; use tracing_subscriber::fmt::format::FmtSpan; @@ -18,8 +20,10 @@ fn main() -> Result<(), Box> { .with_span_events(FmtSpan::ENTER | FmtSpan::EXIT) .finish(); tracing::subscriber::set_global_default(subscriber)?; - let parsed = document(TEST_DOC); - println!("{}\n\n\n", TEST_DOC); + // let parsed = document(TEST_DOC); + // println!("{}\n\n\n", TEST_DOC); + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let parsed = item(&initial_context, " 1. foo\n"); println!("{:#?}", parsed); Ok(()) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2f5130b..545ef9f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -13,3 +13,5 @@ mod token; mod util; pub use document::document; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; +pub use parser_context::ContextTree; +pub use plain_list::item; diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index 6097ec8..549c2d4 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -56,7 +56,7 @@ fn context_paragraph_end<'r, 's>( paragraph_end(input) } -fn paragraph_end(input: &str) -> Res<&str, &str> { +pub fn paragraph_end(input: &str) -> Res<&str, &str> { alt(( recognize(tuple(( map(line_break, TextElement::LineBreak), diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 4bb435c..59370a8 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -4,15 +4,18 @@ use nom::character::complete::anychar; use nom::character::complete::digit1; use nom::character::complete::line_ending; use nom::character::complete::one_of; +use nom::character::complete::space0; use nom::combinator::consumed; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::recognize; +use nom::multi::many0_count; use nom::multi::many1; use nom::sequence::tuple; use super::combinator::context_many_till; use super::error::Res; +use super::paragraph::paragraph_end; use super::text::space; use super::text::text_element; use super::token::ListItem; @@ -25,8 +28,10 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, todo!() } -fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> { - let (remaining, (source, (bul, count, check, tg, sp, (contents, end)))) = consumed(tuple(( +pub fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> { + let (remaining, leading_whitespace) = space0(i)?; + let indent_level = leading_whitespace.len(); + let (remaining, (bul, countset, check, tg, sp, (contents, end))) = tuple(( bullet, opt(tuple((space, counter_set))), opt(tuple((space, check_box))), @@ -34,7 +39,7 @@ fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<' space, // TODO: This context should probably be something involving the item context_many_till(context, text_element, item_end), - )))(i)?; + ))(remaining)?; let elements = contents .into_iter() @@ -44,10 +49,16 @@ fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<' }) .collect(); + let source = { + let offset = remaining.as_ptr() as usize - i.as_ptr() as usize; + &i[..offset] + }; + let ret = ListItem { source, + leading_whitespace, bullet: bul, - counter_set: count.map(|(_spc, count)| count), + counter_set: countset.map(|(_spc, count)| count), check_box: check.map(|(_spc, check)| check), item_tag: tg.map(|(_spc, tg)| tg), contents: elements, @@ -94,6 +105,5 @@ fn tag_separator<'s>(i: &'s str) -> Res<&'s str, &'s str> { } pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> { - // todo - todo!() + paragraph_end(i) } diff --git a/src/parser/token.rs b/src/parser/token.rs index 3935377..a7eb0b4 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -110,6 +110,7 @@ impl<'a> Source<'a> for PlainList<'a> { #[derive(Debug)] pub struct ListItem<'a> { pub source: &'a str, + pub leading_whitespace: &'a str, pub bullet: &'a str, pub counter_set: Option<&'a str>, pub check_box: Option<&'a str>, From 88c974f8e460a0152bfb049555a1da73d9877729 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 17 Mar 2023 16:49:09 -0400 Subject: [PATCH 07/18] Record the list item indent depth in the context tree. --- src/parser/bold.rs | 1 + src/parser/combinator.rs | 2 ++ src/parser/parser_context.rs | 2 ++ src/parser/plain_list.rs | 4 +++- src/parser/util.rs | 1 + 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/parser/bold.rs b/src/parser/bold.rs index 5c64cf7..a8dc00a 100644 --- a/src/parser/bold.rs +++ b/src/parser/bold.rs @@ -95,6 +95,7 @@ fn _preceded_by_whitespace<'r, 's>(context: Context<'r, 's>) -> bool { return true; } ContextElement::Context(_) => {} + ContextElement::ListItem(_) => {} } } else { break; diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs index fc0ec87..4b99f84 100644 --- a/src/parser/combinator.rs +++ b/src/parser/combinator.rs @@ -46,6 +46,7 @@ where ContextElement::ExitMatcherNode(_) => None, ContextElement::Context(_) => None, ContextElement::StartOfParagraph => None, + ContextElement::ListItem(_) => None, }) .collect(); if elements.is_empty() { @@ -93,6 +94,7 @@ where }) => { ret.push(token); } + ContextElement::ListItem(_) => {} }; } ret.reverse(); diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 32bc039..07cd186 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -87,6 +87,7 @@ impl<'r, 's> ContextTree<'r, 's> { ContextElement::PreviousElementNode(_) => {} ContextElement::StartOfParagraph => {} ContextElement::Context(_) => {} + ContextElement::ListItem(_) => {} }; } // TODO: Make this a specific error instead of just a generic MyError @@ -99,6 +100,7 @@ pub enum ContextElement<'r, 's> { ExitMatcherNode(ExitMatcherNode<'r>), PreviousElementNode(PreviousElementNode<'s>), Context(&'r str), + ListItem(usize), StartOfParagraph, } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 59370a8..c226c87 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -16,6 +16,7 @@ use nom::sequence::tuple; use super::combinator::context_many_till; use super::error::Res; use super::paragraph::paragraph_end; +use super::parser_context::ContextElement; use super::text::space; use super::text::text_element; use super::token::ListItem; @@ -31,6 +32,7 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, pub fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> { let (remaining, leading_whitespace) = space0(i)?; let indent_level = leading_whitespace.len(); + let list_item_context = context.with_additional_node(ContextElement::ListItem(indent_level)); let (remaining, (bul, countset, check, tg, sp, (contents, end))) = tuple(( bullet, opt(tuple((space, counter_set))), @@ -38,7 +40,7 @@ pub fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListIt opt(tuple((space, item_tag))), space, // TODO: This context should probably be something involving the item - context_many_till(context, text_element, item_end), + context_many_till(&list_item_context, text_element, item_end), ))(remaining)?; let elements = contents diff --git a/src/parser/util.rs b/src/parser/util.rs index 44886ce..3600c13 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -9,6 +9,7 @@ pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) - ContextElement::Context(name) if *name == section_name => return true, ContextElement::Context(_) => {} ContextElement::StartOfParagraph => {} // TODO: If we specialize this to bold then this would be a good spot to stop scanning + ContextElement::ListItem(_) => {} } } false From 29904f2bb5974db6b3645a9d5d0c8da46cc14ea6 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 18 Mar 2023 14:36:33 -0400 Subject: [PATCH 08/18] Detect next list item as ender for current list item. --- src/main.rs | 2 +- src/parser/plain_list.rs | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 70abb81..2ac52e5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,7 +23,7 @@ fn main() -> Result<(), Box> { // let parsed = document(TEST_DOC); // println!("{}\n\n\n", TEST_DOC); let initial_context: ContextTree<'_, '_> = ContextTree::new(); - let parsed = item(&initial_context, " 1. foo\n"); + let parsed = item(&initial_context, " 1. foo\n2. bar"); println!("{:#?}", parsed); Ok(()) } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index c226c87..5175322 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -8,6 +8,7 @@ use nom::character::complete::space0; use nom::combinator::consumed; use nom::combinator::not; use nom::combinator::opt; +use nom::combinator::peek; use nom::combinator::recognize; use nom::multi::many0_count; use nom::multi::many1; @@ -17,6 +18,7 @@ use super::combinator::context_many_till; use super::error::Res; use super::paragraph::paragraph_end; use super::parser_context::ContextElement; +use super::parser_with_context::parser_with_context; use super::text::space; use super::text::text_element; use super::token::ListItem; @@ -107,5 +109,9 @@ fn tag_separator<'s>(i: &'s str) -> Res<&'s str, &'s str> { } pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> { - paragraph_end(i) + let item_matcher = parser_with_context!(item)(&context); + alt(( + paragraph_end, + recognize(tuple((line_ending, peek(item_matcher)))), + ))(i) } From 6d19eeb0f4dd3e50d380972b5a373de9b9a4940d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 18 Mar 2023 17:32:07 -0400 Subject: [PATCH 09/18] Add parsers for lines that are indented less than or equal to the current line item's indent. --- src/parser/plain_list.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 5175322..07fd7d1 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -10,11 +10,14 @@ use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; +use nom::combinator::verify; use nom::multi::many0_count; use nom::multi::many1; use nom::sequence::tuple; use super::combinator::context_many_till; +use super::error::CustomError; +use super::error::MyError; use super::error::Res; use super::paragraph::paragraph_end; use super::parser_context::ContextElement; @@ -115,3 +118,26 @@ pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &' recognize(tuple((line_ending, peek(item_matcher)))), ))(i) } + +fn line_indented_lte<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> { + let current_item_indent_level: &usize = get_context_item_indent(context).ok_or( + nom::Err::Error(CustomError::MyError(MyError("NotInPlainListItem"))), + )?; + + let matched = recognize(verify( + tuple((line_ending::<&str, _>, space0, anychar)), + |(_newline, _space0, _anychar)| _space0.len() <= *current_item_indent_level, + ))(i)?; + + Ok(matched) +} + +fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> { + for thing in context.iter() { + match thing.get_data() { + ContextElement::ListItem(depth) => return Some(depth), + _ => {} + }; + } + None +} From bf3464c65c95771803a4edcdd3932c230198a7b6 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 19 Mar 2023 13:05:37 -0400 Subject: [PATCH 10/18] End list items when the following line is indented less than or equal to the current item. --- src/main.rs | 8 ++------ src/parser/plain_list.rs | 8 +++++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/main.rs b/src/main.rs index 2ac52e5..a50a86c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,4 @@ use crate::parser::document; -use crate::parser::item; -use crate::parser::ContextTree; use tracing::Level; use tracing_subscriber::fmt::format::FmtSpan; @@ -20,10 +18,8 @@ fn main() -> Result<(), Box> { .with_span_events(FmtSpan::ENTER | FmtSpan::EXIT) .finish(); tracing::subscriber::set_global_default(subscriber)?; - // let parsed = document(TEST_DOC); - // println!("{}\n\n\n", TEST_DOC); - let initial_context: ContextTree<'_, '_> = ContextTree::new(); - let parsed = item(&initial_context, " 1. foo\n2. bar"); + let parsed = document(TEST_DOC); + println!("{}\n\n\n", TEST_DOC); println!("{:#?}", parsed); Ok(()) } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 07fd7d1..c579f5b 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -44,7 +44,6 @@ pub fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListIt opt(tuple((space, check_box))), opt(tuple((space, item_tag))), space, - // TODO: This context should probably be something involving the item context_many_till(&list_item_context, text_element, item_end), ))(remaining)?; @@ -113,8 +112,11 @@ fn tag_separator<'s>(i: &'s str) -> Res<&'s str, &'s str> { pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> { let item_matcher = parser_with_context!(item)(&context); + let line_indented_matcher = parser_with_context!(line_indented_lte)(&context); alt(( paragraph_end, + recognize(tuple((line_ending, peek(line_indented_matcher)))), + // TODO: Do we still need the item_matcher entry here? If we remove it, then child items should become part of the body of the parent item which would match the description on https://orgmode.org/worg/org-syntax.html recognize(tuple((line_ending, peek(item_matcher)))), ))(i) } @@ -125,8 +127,8 @@ fn line_indented_lte<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s st )?; let matched = recognize(verify( - tuple((line_ending::<&str, _>, space0, anychar)), - |(_newline, _space0, _anychar)| _space0.len() <= *current_item_indent_level, + tuple((space0::<&str, _>, anychar)), + |(_space0, _anychar)| _space0.len() <= *current_item_indent_level, ))(i)?; Ok(matched) From 4e7d7d3bcf3711de116cbc4e3bd1672cedf01f13 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 19 Mar 2023 14:07:23 -0400 Subject: [PATCH 11/18] Add scripts for dumping the org-mode ast to investigate how emacs parses various inputs. --- org_mode_samples/.gitignore | 1 + org_mode_samples/common.el | 8 ++++++++ org_mode_samples/dump_org_ast.bash | 17 ++++++++++++++++ org_mode_samples/plain_lists/Makefile | 22 +++++++++++++++++++++ org_mode_samples/plain_lists/paragraphs.org | 3 +++ 5 files changed, 51 insertions(+) create mode 100644 org_mode_samples/.gitignore create mode 100644 org_mode_samples/common.el create mode 100755 org_mode_samples/dump_org_ast.bash create mode 100644 org_mode_samples/plain_lists/Makefile create mode 100644 org_mode_samples/plain_lists/paragraphs.org diff --git a/org_mode_samples/.gitignore b/org_mode_samples/.gitignore new file mode 100644 index 0000000..fcf91aa --- /dev/null +++ b/org_mode_samples/.gitignore @@ -0,0 +1 @@ +*.tree.txt diff --git a/org_mode_samples/common.el b/org_mode_samples/common.el new file mode 100644 index 0000000..482bbb4 --- /dev/null +++ b/org_mode_samples/common.el @@ -0,0 +1,8 @@ +(defun org-dump-ast (outpath) + (let + ((parsed-tree (format "%s" (org-element-parse-buffer)))) + (with-temp-file outpath + (insert parsed-tree) + ) + ) + ) diff --git a/org_mode_samples/dump_org_ast.bash b/org_mode_samples/dump_org_ast.bash new file mode 100755 index 0000000..8ccd247 --- /dev/null +++ b/org_mode_samples/dump_org_ast.bash @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# +set -euo pipefail +IFS=$'\n\t' +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +INPUT_FILE="$1" +OUTPUT_FILE="$2" + +INIT_SCRIPT=$(cat < + +.PHONY: all +all: paragraphs.tree.txt + +.PHONY: clean +clean: +> rm -rf *.tree.txt + +%.tree.txt: %.org +> ../dump_org_ast.bash $< $@ diff --git a/org_mode_samples/plain_lists/paragraphs.org b/org_mode_samples/plain_lists/paragraphs.org new file mode 100644 index 0000000..71c0587 --- /dev/null +++ b/org_mode_samples/plain_lists/paragraphs.org @@ -0,0 +1,3 @@ +1. foo +2. bar +(message "%s" (org-element-parse-buffer)) From 33a69fbe0e6562e33d61c938b81624903b7ea271 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 19 Mar 2023 14:17:11 -0400 Subject: [PATCH 12/18] fixup --- org_mode_samples/plain_lists/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/org_mode_samples/plain_lists/Makefile b/org_mode_samples/plain_lists/Makefile index f4fe91c..ba131d4 100644 --- a/org_mode_samples/plain_lists/Makefile +++ b/org_mode_samples/plain_lists/Makefile @@ -18,5 +18,5 @@ all: paragraphs.tree.txt clean: > rm -rf *.tree.txt -%.tree.txt: %.org +%.tree.txt: %.org ../common.el ../dump_org_ast.bash > ../dump_org_ast.bash $< $@ From 6a0dc2fc62f7f297c4f825c00fb4c9330ae2df3b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 19 Mar 2023 14:21:02 -0400 Subject: [PATCH 13/18] Format the ast. --- org_mode_samples/common.el | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/org_mode_samples/common.el b/org_mode_samples/common.el index 482bbb4..8f32896 100644 --- a/org_mode_samples/common.el +++ b/org_mode_samples/common.el @@ -1,6 +1,9 @@ (defun org-dump-ast (outpath) (let - ((parsed-tree (format "%s" (org-element-parse-buffer)))) + ( + ;; (parsed-tree (format "%s" (org-element-parse-buffer))) + (parsed-tree (pp-to-string (org-element-parse-buffer))) + ) (with-temp-file outpath (insert parsed-tree) ) From a5a03126d202f9938861597284d9e2e5406127c6 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 19 Mar 2023 14:52:29 -0400 Subject: [PATCH 14/18] Add test for nested list end via 2 blank lines. --- org_mode_samples/plain_lists/Makefile | 2 +- org_mode_samples/plain_lists/nested_paragraphs.org | 6 ++++++ org_mode_samples/plain_lists/paragraphs.org | 6 +++++- 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 org_mode_samples/plain_lists/nested_paragraphs.org diff --git a/org_mode_samples/plain_lists/Makefile b/org_mode_samples/plain_lists/Makefile index ba131d4..61a8d82 100644 --- a/org_mode_samples/plain_lists/Makefile +++ b/org_mode_samples/plain_lists/Makefile @@ -12,7 +12,7 @@ endif .RECIPEPREFIX = > .PHONY: all -all: paragraphs.tree.txt +all: paragraphs.tree.txt nested_paragraphs.tree.txt .PHONY: clean clean: diff --git a/org_mode_samples/plain_lists/nested_paragraphs.org b/org_mode_samples/plain_lists/nested_paragraphs.org new file mode 100644 index 0000000..22b1a61 --- /dev/null +++ b/org_mode_samples/plain_lists/nested_paragraphs.org @@ -0,0 +1,6 @@ +lorem +1. foo + 1. bar + + +baz diff --git a/org_mode_samples/plain_lists/paragraphs.org b/org_mode_samples/plain_lists/paragraphs.org index 71c0587..4ed7933 100644 --- a/org_mode_samples/plain_lists/paragraphs.org +++ b/org_mode_samples/plain_lists/paragraphs.org @@ -1,3 +1,7 @@ 1. foo 2. bar -(message "%s" (org-element-parse-buffer)) + baz +3. lorem + + + ipsum From e3f0aaefdf3bb5a9e10825301fa7b2677d5d25d7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 19 Mar 2023 18:28:59 -0400 Subject: [PATCH 15/18] Add TODO. --- src/parser/plain_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index c579f5b..cfe05d8 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -114,6 +114,7 @@ pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &' let item_matcher = parser_with_context!(item)(&context); let line_indented_matcher = parser_with_context!(line_indented_lte)(&context); alt(( + // TODO: This should be two blank lines and it ends ALL of the items paragraph_end, recognize(tuple((line_ending, peek(line_indented_matcher)))), // TODO: Do we still need the item_matcher entry here? If we remove it, then child items should become part of the body of the parent item which would match the description on https://orgmode.org/worg/org-syntax.html From 2fcb445fe9a40a4a58d54a501a320e8511d01a84 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 21 Mar 2023 13:36:52 -0400 Subject: [PATCH 16/18] Switch to ending plain lists with 2 blank lines instead of just 1. --- src/parser/plain_list.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index cfe05d8..2d61713 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -6,6 +6,8 @@ use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; use nom::combinator::consumed; +use nom::combinator::eof; +use nom::combinator::map; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; @@ -22,10 +24,13 @@ use super::error::Res; use super::paragraph::paragraph_end; use super::parser_context::ContextElement; use super::parser_with_context::parser_with_context; +use super::text::blank_line; +use super::text::line_break; use super::text::space; use super::text::text_element; use super::token::ListItem; use super::token::PlainList; +use super::token::TextElement; use super::token::Token; use super::Context; @@ -114,8 +119,8 @@ pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &' let item_matcher = parser_with_context!(item)(&context); let line_indented_matcher = parser_with_context!(line_indented_lte)(&context); alt(( - // TODO: This should be two blank lines and it ends ALL of the items - paragraph_end, + // TODO: This should ends the highest plain list + plain_list_end, recognize(tuple((line_ending, peek(line_indented_matcher)))), // TODO: Do we still need the item_matcher entry here? If we remove it, then child items should become part of the body of the parent item which would match the description on https://orgmode.org/worg/org-syntax.html recognize(tuple((line_ending, peek(item_matcher)))), @@ -144,3 +149,14 @@ fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize } None } + +pub fn plain_list_end(input: &str) -> Res<&str, &str> { + alt(( + recognize(tuple(( + map(line_break, TextElement::LineBreak), + blank_line, + many1(blank_line), + ))), + eof, + ))(input) +} From 77d8c5e029f5f4b66ae1f01ac93c008c22622dda Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 21 Mar 2023 13:38:23 -0400 Subject: [PATCH 17/18] Mark the plain list functions as allow dead code. --- src/parser/plain_list.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 2d61713..a00c29f 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -5,7 +5,6 @@ use nom::character::complete::digit1; use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; -use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; @@ -13,7 +12,6 @@ use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; -use nom::multi::many0_count; use nom::multi::many1; use nom::sequence::tuple; @@ -21,7 +19,6 @@ use super::combinator::context_many_till; use super::error::CustomError; use super::error::MyError; use super::error::Res; -use super::paragraph::paragraph_end; use super::parser_context::ContextElement; use super::parser_with_context::parser_with_context; use super::text::blank_line; @@ -34,11 +31,13 @@ use super::token::TextElement; use super::token::Token; use super::Context; +#[allow(dead_code)] pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, PlainList<'s>> { // todo todo!() } +#[allow(dead_code)] pub fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> { let (remaining, leading_whitespace) = space0(i)?; let indent_level = leading_whitespace.len(); From cee12b751233ab72b1b93256a9ee42093982d531 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 21 Mar 2023 13:44:22 -0400 Subject: [PATCH 18/18] Allow IgnoreParent to be dead code. --- src/parser/parser_context.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 07cd186..ae3ae60 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -117,6 +117,7 @@ pub struct PreviousElementNode<'r> { #[derive(Clone)] pub enum ChainBehavior<'r> { AndParent(Option<&'r Matcher>), + #[allow(dead_code)] IgnoreParent(Option<&'r Matcher>), }