From 80724302650ad9c1ef0a2cd8b65c33d23caa796c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 17:31:21 -0400 Subject: [PATCH 01/31] Add an example of text markup opening and closing. --- .../text_markup/opening_and_closing.org | 13 ++++++ toy_language.txt | 41 +++++++++++++++---- 2 files changed, 47 insertions(+), 7 deletions(-) create mode 100644 org_mode_samples/text_markup/opening_and_closing.org diff --git a/org_mode_samples/text_markup/opening_and_closing.org b/org_mode_samples/text_markup/opening_and_closing.org new file mode 100644 index 0000000..fc07a3b --- /dev/null +++ b/org_mode_samples/text_markup/opening_and_closing.org @@ -0,0 +1,13 @@ +prologue *goes here* I guess *bold +text* + +bold*wont* start *or stop*when there is text outside it + +I guess *regular + +text* + +[[foo][foo *bar]] baz* car + + +*nesting *bold entrances* and* exits diff --git a/toy_language.txt b/toy_language.txt index 2323b99..5d18333 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -1,8 +1,35 @@ -#+name: foo -#+caption: bar -#+caption: baz -[[file:lorem/ipsum.png]] +prologue *goes here* I guess *bold +text* -#+name: cat -#+caption: dog -[[file:lorem/ipsum.png]] +bold*wont* start *or stop*when there is text outside it + +I guess *regular + +text* + +[foo *bar] baz* car + + +*nesting *bold entrances* and* exits + +* Heading + +body of heading + +** Child heading +** Immediate second child heading + +* Second top-level heading +foo bar +1. This is a list immediately after a paragraph +2. This is a second item in the list + 1. This is a child of the second item +#+begin_center +1. foo +2. bar +#+end_center +[fn:1] A footnote. + +[fn:2] A multi- + +line footnote. From b3e182d7fefe1ba0cc6dfb3e385c0d1125e34969 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 17:58:16 -0400 Subject: [PATCH 02/31] Plain text compare not yet working because the text is quoted from the sexp. --- src/compare/diff.rs | 140 +++++++++++++++++++++++++++++++++----------- src/parser/mod.rs | 4 ++ src/parser/sexp.rs | 11 +++- 3 files changed, 120 insertions(+), 35 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index b68ec67..2343942 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -19,16 +19,20 @@ use crate::parser::Heading; use crate::parser::HorizontalRule; use crate::parser::Keyword; use crate::parser::LatexEnvironment; +use crate::parser::Object; use crate::parser::Paragraph; use crate::parser::PlainList; use crate::parser::PlainListItem; +use crate::parser::PlainText; use crate::parser::Planning; use crate::parser::PropertyDrawer; +use crate::parser::RegularLink; use crate::parser::Section; use crate::parser::SrcBlock; use crate::parser::Table; use crate::parser::TableCell; use crate::parser::TableRow; +use crate::parser::TextMarkup; use crate::parser::VerseBlock; #[derive(Debug)] @@ -64,7 +68,13 @@ impl DiffResult { DiffStatus::Bad => "BAD", } }; - println!("{}{} {}", " ".repeat(indentation), status_text, self.name); + println!( + "{}{} {} {}", + " ".repeat(indentation), + status_text, + self.name, + self.message.as_ref().map(|m| m.as_str()).unwrap_or("") + ); for child in self.children.iter() { child.print_indented(indentation + 1)?; } @@ -85,6 +95,48 @@ impl DiffResult { } } +fn compare_element<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Element<'s>, +) -> Result> { + match rust { + Element::Paragraph(obj) => compare_paragraph(source, emacs, obj), + Element::PlainList(obj) => compare_plain_list(source, emacs, obj), + Element::GreaterBlock(obj) => compare_greater_block(source, emacs, obj), + Element::DynamicBlock(obj) => compare_dynamic_block(source, emacs, obj), + Element::FootnoteDefinition(obj) => compare_footnote_definition(source, emacs, obj), + Element::Comment(obj) => compare_comment(source, emacs, obj), + Element::Drawer(obj) => compare_drawer(source, emacs, obj), + Element::PropertyDrawer(obj) => compare_property_drawer(source, emacs, obj), + Element::Table(obj) => compare_table(source, emacs, obj), + Element::VerseBlock(obj) => compare_verse_block(source, emacs, obj), + Element::CommentBlock(obj) => compare_comment_block(source, emacs, obj), + Element::ExampleBlock(obj) => compare_example_block(source, emacs, obj), + Element::ExportBlock(obj) => compare_export_block(source, emacs, obj), + Element::SrcBlock(obj) => compare_src_block(source, emacs, obj), + Element::Clock(obj) => compare_clock(source, emacs, obj), + Element::DiarySexp(obj) => compare_diary_sexp(source, emacs, obj), + Element::Planning(obj) => compare_planning(source, emacs, obj), + Element::FixedWidthArea(obj) => compare_fixed_width_area(source, emacs, obj), + Element::HorizontalRule(obj) => compare_horizontal_rule(source, emacs, obj), + Element::Keyword(obj) => compare_keyword(source, emacs, obj), + Element::LatexEnvironment(obj) => compare_latex_environment(source, emacs, obj), + } +} + +fn compare_object<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Object<'s>, +) -> Result> { + match rust { + Object::TextMarkup(obj) => compare_text_markup(source, emacs, obj), + Object::PlainText(obj) => compare_plain_text(source, emacs, obj), + Object::RegularLink(obj) => compare_regular_link(source, emacs, obj), + } +} + pub fn compare_document<'s>( emacs: &'s Token<'s>, rust: &'s Document<'s>, @@ -200,43 +252,13 @@ fn compare_heading<'s>( }) } -fn compare_element<'s>( - source: &'s str, - emacs: &'s Token<'s>, - rust: &'s Element<'s>, -) -> Result> { - match rust { - Element::Paragraph(obj) => compare_paragraph(source, emacs, obj), - Element::PlainList(obj) => compare_plain_list(source, emacs, obj), - Element::GreaterBlock(obj) => compare_greater_block(source, emacs, obj), - Element::DynamicBlock(obj) => compare_dynamic_block(source, emacs, obj), - Element::FootnoteDefinition(obj) => compare_footnote_definition(source, emacs, obj), - Element::Comment(obj) => compare_comment(source, emacs, obj), - Element::Drawer(obj) => compare_drawer(source, emacs, obj), - Element::PropertyDrawer(obj) => compare_property_drawer(source, emacs, obj), - Element::Table(obj) => compare_table(source, emacs, obj), - Element::VerseBlock(obj) => compare_verse_block(source, emacs, obj), - Element::CommentBlock(obj) => compare_comment_block(source, emacs, obj), - Element::ExampleBlock(obj) => compare_example_block(source, emacs, obj), - Element::ExportBlock(obj) => compare_export_block(source, emacs, obj), - Element::SrcBlock(obj) => compare_src_block(source, emacs, obj), - Element::Clock(obj) => compare_clock(source, emacs, obj), - Element::DiarySexp(obj) => compare_diary_sexp(source, emacs, obj), - Element::Planning(obj) => compare_planning(source, emacs, obj), - Element::FixedWidthArea(obj) => compare_fixed_width_area(source, emacs, obj), - Element::HorizontalRule(obj) => compare_horizontal_rule(source, emacs, obj), - Element::Keyword(obj) => compare_keyword(source, emacs, obj), - Element::LatexEnvironment(obj) => compare_latex_environment(source, emacs, obj), - } -} - fn compare_paragraph<'s>( source: &'s str, emacs: &'s Token<'s>, rust: &'s Paragraph<'s>, ) -> Result> { let children = emacs.as_list()?; - let child_status = Vec::new(); + let mut child_status = Vec::new(); let mut this_status = DiffStatus::Good; let emacs_name = "paragraph"; if assert_name(emacs, emacs_name).is_err() { @@ -247,7 +269,9 @@ fn compare_paragraph<'s>( this_status = DiffStatus::Bad; } - for (_emacs_child, _rust_child) in children.iter().skip(2).zip(rust.children.iter()) {} + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { + child_status.push(compare_object(source, emacs_child, rust_child)?); + } Ok(DiffResult { status: this_status, @@ -856,3 +880,53 @@ fn compare_latex_environment<'s>( children: child_status, }) } + +fn compare_plain_text<'s>( + _source: &'s str, + emacs: &'s Token<'s>, + rust: &'s PlainText<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let mut message = None; + let text = emacs.as_text()?; + if text.text != rust.source { + this_status = DiffStatus::Bad; + message = Some(format!( + "(emacs != rust) {:?} != {:?}", + text.text, rust.source + )); + } + + Ok(DiffResult { + status: this_status, + name: "plain-text".to_owned(), + message, + children: Vec::new(), + }) +} + +fn compare_text_markup<'s>( + _source: &'s str, + emacs: &'s Token<'s>, + rust: &'s TextMarkup<'s>, +) -> Result> { + Ok(DiffResult { + status: DiffStatus::Good, + name: "text-markup".to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_regular_link<'s>( + _source: &'s str, + emacs: &'s Token<'s>, + rust: &'s RegularLink<'s>, +) -> Result> { + Ok(DiffResult { + status: DiffStatus::Good, + name: "regular-link".to_owned(), + message: None, + children: Vec::new(), + }) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 48c4cb6..3bd8761 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -60,5 +60,9 @@ pub use lesser_element::Planning; pub use lesser_element::SrcBlock; pub use lesser_element::TableCell; pub use lesser_element::VerseBlock; +pub use object::Object; +pub use object::PlainText; +pub use object::RegularLink; +pub use object::TextMarkup; pub use source::Source; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; diff --git a/src/parser/sexp.rs b/src/parser/sexp.rs index ea4a5a6..8543548 100644 --- a/src/parser/sexp.rs +++ b/src/parser/sexp.rs @@ -27,9 +27,9 @@ pub enum Token<'s> { #[derive(Debug)] pub struct TextWithProperties<'s> { #[allow(dead_code)] - text: &'s str, + pub text: &'s str, #[allow(dead_code)] - properties: Vec>, + pub properties: Vec>, } impl<'s> Token<'s> { @@ -47,6 +47,13 @@ impl<'s> Token<'s> { }?) } + pub fn as_text<'p>(&'p self) -> Result<&'p TextWithProperties<'s>, Box> { + Ok(match self { + Token::TextWithProperties(body) => Ok(body), + _ => Err(format!("wrong token type {:?}", self)), + }?) + } + pub fn as_map<'p>( &'p self, ) -> Result>, Box> { From 2ac04496304e23aa315ef13b29a801e4a16f83df Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 18:18:13 -0400 Subject: [PATCH 03/31] Unquote the text. --- src/compare/diff.rs | 5 +++-- src/parser/sexp.rs | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 2343942..f57eefc 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -889,11 +889,12 @@ fn compare_plain_text<'s>( let mut this_status = DiffStatus::Good; let mut message = None; let text = emacs.as_text()?; - if text.text != rust.source { + let unquoted_text = text.unquote()?; + if unquoted_text != rust.source { this_status = DiffStatus::Bad; message = Some(format!( "(emacs != rust) {:?} != {:?}", - text.text, rust.source + unquoted_text, rust.source )); } diff --git a/src/parser/sexp.rs b/src/parser/sexp.rs index 8543548..da5f2b7 100644 --- a/src/parser/sexp.rs +++ b/src/parser/sexp.rs @@ -32,6 +32,45 @@ pub struct TextWithProperties<'s> { pub properties: Vec>, } +impl<'s> TextWithProperties<'s> { + pub fn unquote(&self) -> Result> { + let mut out = String::with_capacity(self.text.len()); + if !self.text.starts_with(r#"""#) { + return Err("Quoted text does not start with quote.".into()); + } + if !self.text.ends_with(r#"""#) { + return Err("Quoted text does not end with quote.".into()); + } + let interior_text = &self.text[1..(self.text.len() - 1)]; + let mut state = ParseState::Normal; + for current_char in interior_text.chars().into_iter() { + state = match (state, current_char) { + (ParseState::Normal, '\\') => ParseState::Escape, + (ParseState::Normal, _) => { + out.push(current_char); + ParseState::Normal + } + (ParseState::Escape, 'n') => { + out.push('\n'); + ParseState::Normal + } + (ParseState::Escape, '\\') => { + out.push('\\'); + ParseState::Normal + } + _ => todo!(), + }; + } + + Ok(out) + } +} + +enum ParseState { + Normal, + Escape, +} + impl<'s> Token<'s> { pub fn as_list<'p>(&'p self) -> Result<&'p Vec>, Box> { Ok(match self { From 99645ea14ce954580f7b28f3459517636385a8d8 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 18:31:40 -0400 Subject: [PATCH 04/31] Mark tests that we expect to fail. --- build.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/build.rs b/build.rs index 615d8a7..3932a65 100644 --- a/build.rs +++ b/build.rs @@ -72,6 +72,16 @@ fn is_expect_fail(name: &str) -> Option<&str> { match name { "drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."), "element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."), + "element_container_priority_drawer_greater_block" => Some("Need to implement subscript."), + "element_container_priority_dynamic_block_greater_block" => Some("Need to implement subscript."), + "element_container_priority_footnote_definition_greater_block" => Some("Need to implement subscript."), + "element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."), + "element_container_priority_section_greater_block" => Some("Need to implement subscript."), + "exit_matcher_investigation_bold_with_asterisk_inside" => Some("Need to implement bold."), + "exit_matcher_investigation_table_list" => Some("Need to implement bold."), + "keyword_affiliated_keyword" => Some("Need to implement link."), + "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), + "text_markup_opening_and_closing" => Some("Need to implement bold and link."), _ => None, } } From 538031c688abe2b2a5c3002168e55e8517169f22 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 18:54:19 -0400 Subject: [PATCH 05/31] Call the text markup parser. --- src/parser/mod.rs | 1 + src/parser/object_parser.rs | 22 ++++++++++++++++------ src/parser/text_markup.rs | 11 +++++++++++ 3 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 src/parser/text_markup.rs diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3bd8761..1f5cfec 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -29,6 +29,7 @@ mod property_drawer; pub mod sexp; mod source; mod table; +mod text_markup; mod util; pub use document::document; pub use document::Document; diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index a6ce342..7b9920e 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -1,4 +1,6 @@ use crate::error::Res; +use crate::parser::text_markup::text_markup; +use nom::branch::alt; use nom::combinator::map; use nom::combinator::not; @@ -16,9 +18,13 @@ pub fn standard_set_object<'r, 's>( // TODO: add entities, LaTeX fragments, export snippets, footnote references, citations (NOT citation references), inline babel calls, inline source blocks, line breaks, links, macros, targets and radio targets, statistics cookies, subscript and superscript, timestamps, and text markup. not(|i| context.check_exit_matcher(i))(input)?; - let plain_text_matcher = parser_with_context!(plain_text)(context); - - map(plain_text_matcher, Object::PlainText)(input) + alt(( + map( + parser_with_context!(text_markup)(context), + Object::TextMarkup, + ), + map(parser_with_context!(plain_text)(context), Object::PlainText), + ))(input) } #[tracing::instrument(ret, level = "debug")] @@ -29,7 +35,11 @@ pub fn minimal_set_object<'r, 's>( // TODO: add text markup, entities, LaTeX fragments, superscripts and subscripts not(|i| context.check_exit_matcher(i))(input)?; - let plain_text_matcher = parser_with_context!(plain_text)(context); - - map(plain_text_matcher, Object::PlainText)(input) + alt(( + map( + parser_with_context!(text_markup)(context), + Object::TextMarkup, + ), + map(parser_with_context!(plain_text)(context), Object::PlainText), + ))(input) } diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs new file mode 100644 index 0000000..8b50aef --- /dev/null +++ b/src/parser/text_markup.rs @@ -0,0 +1,11 @@ +use super::Context; +use crate::error::Res; +use crate::parser::TextMarkup; + +#[tracing::instrument(ret, level = "debug")] +pub fn text_markup<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, TextMarkup<'s>> { + todo!() +} From 80f43d54daaf99f98aaaca5376e72cd2b4430444 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 18:55:24 -0400 Subject: [PATCH 06/31] Add a simple example of text markup. --- org_mode_samples/text_markup/simple.org | 1 + 1 file changed, 1 insertion(+) create mode 100644 org_mode_samples/text_markup/simple.org diff --git a/org_mode_samples/text_markup/simple.org b/org_mode_samples/text_markup/simple.org new file mode 100644 index 0000000..6c2c984 --- /dev/null +++ b/org_mode_samples/text_markup/simple.org @@ -0,0 +1 @@ +foo *bar* baz From c0809cce1070af6e3c483d0c4f56c75dcd52aa99 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 19:06:48 -0400 Subject: [PATCH 07/31] Starting to parse text markup. --- src/parser/text_markup.rs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 8b50aef..80f134d 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -1,5 +1,11 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; + use super::Context; +use crate::error::CustomError; +use crate::error::MyError; use crate::error::Res; +use crate::parser::util::get_one_before; use crate::parser::TextMarkup; #[tracing::instrument(ret, level = "debug")] @@ -7,5 +13,36 @@ pub fn text_markup<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, TextMarkup<'s>> { + let (remaining, _) = pre(context, input)?; + let (remaining, open) = marker(remaining)?; + return Err(nom::Err::Error(CustomError::MyError(MyError( + "text markup not implemented yet.", + )))); + todo!() } + +#[tracing::instrument(ret, level = "debug")] +pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + // If None, we are at the start of the file which is technically the beginning of a line. + None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(') + | Some('{') | Some('\'') | Some('"') => {} + Some(_) => { + // Not at start of line, cannot be a heading + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not a valid pre character for text markup.", + )))); + } + }; + Ok((input, ())) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn marker(input: &str) -> Res<&str, &str> { + alt((tag("*"), tag("/"), tag("_"), tag("="), tag("~"), tag("+")))(input) +} From f70babdcf4b4522de0342b440bbc4c7cab7522da Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 19:12:48 -0400 Subject: [PATCH 08/31] Get tracing back into the latex environemtn end parser. --- src/parser/latex_environment.rs | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/parser/latex_environment.rs b/src/parser/latex_environment.rs index 78d0209..a1353f7 100644 --- a/src/parser/latex_environment.rs +++ b/src/parser/latex_environment.rs @@ -60,16 +60,25 @@ fn latex_environment_end( ) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> { let current_name_lower = current_name.to_lowercase(); move |context: Context, input: &str| { - start_of_line(context, input)?; - let (remaining, _leading_whitespace) = space0(input)?; - let (remaining, (_begin, _name, _close_brace, _ws, _line_ending)) = tuple(( - tag_no_case(r#"\end{"#), - tag_no_case(current_name_lower.as_str()), - tag("}"), - space0, - alt((eof, line_ending)), - ))(remaining)?; - let source = get_consumed(input, remaining); - Ok((remaining, source)) + _latex_environment_end(context, input, current_name_lower.as_str()) } } + +#[tracing::instrument(ret, level = "debug")] +fn _latex_environment_end<'r, 's, 'x>( + context: Context<'r, 's>, + input: &'s str, + current_name_lower: &'x str, +) -> Res<&'s str, &'s str> { + start_of_line(context, input)?; + let (remaining, _leading_whitespace) = space0(input)?; + let (remaining, (_begin, _name, _close_brace, _ws, _line_ending)) = tuple(( + tag_no_case(r#"\end{"#), + tag_no_case(current_name_lower), + tag("}"), + space0, + alt((eof, line_ending)), + ))(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} From 0b3f414ecff654271b6810dccfc81d8f19ef2168 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 19:15:29 -0400 Subject: [PATCH 09/31] Get tracing back into the element parser. --- src/parser/element_parser.rs | 141 ++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 67 deletions(-) diff --git a/src/parser/element_parser.rs b/src/parser/element_parser.rs index 626a117..176b592 100644 --- a/src/parser/element_parser.rs +++ b/src/parser/element_parser.rs @@ -33,71 +33,78 @@ use nom::multi::many0; pub fn element( can_be_paragraph: bool, ) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, Element<'s>> { - move |context: Context, input: &str| { - let plain_list_matcher = parser_with_context!(plain_list)(context); - let greater_block_matcher = parser_with_context!(greater_block)(context); - let dynamic_block_matcher = parser_with_context!(dynamic_block)(context); - let footnote_definition_matcher = parser_with_context!(footnote_definition)(context); - let comment_matcher = parser_with_context!(comment)(context); - let drawer_matcher = parser_with_context!(drawer)(context); - let table_matcher = parser_with_context!(org_mode_table)(context); - let verse_block_matcher = parser_with_context!(verse_block)(context); - let comment_block_matcher = parser_with_context!(comment_block)(context); - let example_block_matcher = parser_with_context!(example_block)(context); - let export_block_matcher = parser_with_context!(export_block)(context); - let src_block_matcher = parser_with_context!(src_block)(context); - let clock_matcher = parser_with_context!(clock)(context); - let diary_sexp_matcher = parser_with_context!(diary_sexp)(context); - let fixed_width_area_matcher = parser_with_context!(fixed_width_area)(context); - let horizontal_rule_matcher = parser_with_context!(horizontal_rule)(context); - let keyword_matcher = parser_with_context!(keyword)(context); - let paragraph_matcher = parser_with_context!(paragraph)(context); - let latex_environment_matcher = parser_with_context!(latex_environment)(context); - - let (remaining, mut affiliated_keywords) = many0(keyword_matcher)(input)?; - let (remaining, mut element) = match alt(( - map(plain_list_matcher, Element::PlainList), - map(greater_block_matcher, Element::GreaterBlock), - map(dynamic_block_matcher, Element::DynamicBlock), - map(footnote_definition_matcher, Element::FootnoteDefinition), - map(comment_matcher, Element::Comment), - map(drawer_matcher, Element::Drawer), - map(table_matcher, Element::Table), - map(verse_block_matcher, Element::VerseBlock), - map(comment_block_matcher, Element::CommentBlock), - map(example_block_matcher, Element::ExampleBlock), - map(export_block_matcher, Element::ExportBlock), - map(src_block_matcher, Element::SrcBlock), - map(clock_matcher, Element::Clock), - map(diary_sexp_matcher, Element::DiarySexp), - map(fixed_width_area_matcher, Element::FixedWidthArea), - map(horizontal_rule_matcher, Element::HorizontalRule), - map(latex_environment_matcher, Element::LatexEnvironment), - ))(remaining) - { - the_ok @ Ok(_) => the_ok, - Err(_) => { - if can_be_paragraph { - match map(paragraph_matcher, Element::Paragraph)(remaining) { - the_ok @ Ok(_) => the_ok, - Err(_) => { - affiliated_keywords.clear(); - map(keyword_matcher, Element::Keyword)(input) - } - } - } else { - affiliated_keywords.clear(); - map(keyword_matcher, Element::Keyword)(input) - } - } - }?; - - let (remaining, _trailing_ws) = - maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; - - let source = get_consumed(input, remaining); - element.set_source(source); - - Ok((remaining, element)) - } + move |context: Context, input: &str| _element(context, input, can_be_paragraph) +} + +#[tracing::instrument(ret, level = "debug")] +fn _element<'r, 's>( + context: Context<'r, 's>, + input: &'s str, + can_be_paragraph: bool, +) -> Res<&'s str, Element<'s>> { + let plain_list_matcher = parser_with_context!(plain_list)(context); + let greater_block_matcher = parser_with_context!(greater_block)(context); + let dynamic_block_matcher = parser_with_context!(dynamic_block)(context); + let footnote_definition_matcher = parser_with_context!(footnote_definition)(context); + let comment_matcher = parser_with_context!(comment)(context); + let drawer_matcher = parser_with_context!(drawer)(context); + let table_matcher = parser_with_context!(org_mode_table)(context); + let verse_block_matcher = parser_with_context!(verse_block)(context); + let comment_block_matcher = parser_with_context!(comment_block)(context); + let example_block_matcher = parser_with_context!(example_block)(context); + let export_block_matcher = parser_with_context!(export_block)(context); + let src_block_matcher = parser_with_context!(src_block)(context); + let clock_matcher = parser_with_context!(clock)(context); + let diary_sexp_matcher = parser_with_context!(diary_sexp)(context); + let fixed_width_area_matcher = parser_with_context!(fixed_width_area)(context); + let horizontal_rule_matcher = parser_with_context!(horizontal_rule)(context); + let keyword_matcher = parser_with_context!(keyword)(context); + let paragraph_matcher = parser_with_context!(paragraph)(context); + let latex_environment_matcher = parser_with_context!(latex_environment)(context); + + let (remaining, mut affiliated_keywords) = many0(keyword_matcher)(input)?; + let (remaining, mut element) = match alt(( + map(plain_list_matcher, Element::PlainList), + map(greater_block_matcher, Element::GreaterBlock), + map(dynamic_block_matcher, Element::DynamicBlock), + map(footnote_definition_matcher, Element::FootnoteDefinition), + map(comment_matcher, Element::Comment), + map(drawer_matcher, Element::Drawer), + map(table_matcher, Element::Table), + map(verse_block_matcher, Element::VerseBlock), + map(comment_block_matcher, Element::CommentBlock), + map(example_block_matcher, Element::ExampleBlock), + map(export_block_matcher, Element::ExportBlock), + map(src_block_matcher, Element::SrcBlock), + map(clock_matcher, Element::Clock), + map(diary_sexp_matcher, Element::DiarySexp), + map(fixed_width_area_matcher, Element::FixedWidthArea), + map(horizontal_rule_matcher, Element::HorizontalRule), + map(latex_environment_matcher, Element::LatexEnvironment), + ))(remaining) + { + the_ok @ Ok(_) => the_ok, + Err(_) => { + if can_be_paragraph { + match map(paragraph_matcher, Element::Paragraph)(remaining) { + the_ok @ Ok(_) => the_ok, + Err(_) => { + affiliated_keywords.clear(); + map(keyword_matcher, Element::Keyword)(input) + } + } + } else { + affiliated_keywords.clear(); + map(keyword_matcher, Element::Keyword)(input) + } + } + }?; + + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; + + let source = get_consumed(input, remaining); + element.set_source(source); + + Ok((remaining, element)) } From 9a3bde0d80968582a3b7f98183f358b59efde542 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 19:34:13 -0400 Subject: [PATCH 10/31] First attempt at text markup. --- src/parser/lesser_element.rs | 4 +- src/parser/object.rs | 1 + src/parser/text_markup.rs | 76 ++++++++++++++++++++++++++++++++++-- toy_language.txt | 36 +---------------- 4 files changed, 76 insertions(+), 41 deletions(-) diff --git a/src/parser/lesser_element.rs b/src/parser/lesser_element.rs index 9c9a67f..cac3adf 100644 --- a/src/parser/lesser_element.rs +++ b/src/parser/lesser_element.rs @@ -1,6 +1,6 @@ use super::object::Object; -use super::object::TextMarkup; use super::source::Source; +use super::PlainText; #[derive(Debug)] pub struct Paragraph<'s> { @@ -97,7 +97,7 @@ pub struct LatexEnvironment<'s> { impl<'s> Paragraph<'s> { pub fn of_text(input: &'s str) -> Self { let mut objects = Vec::with_capacity(1); - objects.push(Object::TextMarkup(TextMarkup { source: input })); + objects.push(Object::PlainText(PlainText { source: input })); Paragraph { source: input, children: objects, diff --git a/src/parser/object.rs b/src/parser/object.rs index db0534e..9347e6c 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -14,6 +14,7 @@ pub enum Object<'s> { #[derive(Debug)] pub struct TextMarkup<'s> { pub source: &'s str, + pub children: Vec>, } #[derive(Debug)] diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 80f134d..40671f0 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -1,10 +1,21 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::combinator::peek; +use nom::combinator::verify; +use nom::multi::many_till; +use nom::sequence::terminated; use super::Context; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; +use crate::parser::exiting::ExitClass; +use crate::parser::object_parser::standard_set_object; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; use crate::parser::util::get_one_before; use crate::parser::TextMarkup; @@ -15,11 +26,27 @@ pub fn text_markup<'r, 's>( ) -> Res<&'s str, TextMarkup<'s>> { let (remaining, _) = pre(context, input)?; let (remaining, open) = marker(remaining)?; - return Err(nom::Err::Error(CustomError::MyError(MyError( - "text markup not implemented yet.", - )))); + let text_markup_end_specialized = text_markup_end(open); + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &text_markup_end_specialized, + })); - todo!() + let (remaining, (children, _exit_contents)) = verify( + many_till( + parser_with_context!(standard_set_object)(&parser_context), + parser_with_context!(exit_matcher_parser)(&parser_context), + ), + |(children, _exit_contents)| !children.is_empty(), + )(remaining)?; + + // TODO: Sometimes its plain text, not objects + let (remaining, close) = text_markup_end_specialized(context, remaining)?; + + let source = get_consumed(input, remaining); + + Ok((remaining, TextMarkup { source, children })) } #[tracing::instrument(ret, level = "debug")] @@ -42,7 +69,48 @@ pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> Ok((input, ())) } +#[tracing::instrument(ret, level = "debug")] +pub fn post<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + // If None, we are at the start of the file which is technically the beginning of a line. + None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(') + | Some('{') | Some('\'') | Some('"') => {} + Some(_) => { + // Not at start of line, cannot be a heading + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not a valid pre character for text markup.", + )))); + } + }; + Ok((input, ())) +} + #[tracing::instrument(ret, level = "debug")] pub fn marker(input: &str) -> Res<&str, &str> { alt((tag("*"), tag("/"), tag("_"), tag("="), tag("~"), tag("+")))(input) } + +fn text_markup_end( + marker_symbol: &str, +) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> { + let marker_symbol = marker_symbol.to_owned(); + move |context: Context, input: &str| _text_markup_end(context, input, marker_symbol.as_str()) +} + +#[tracing::instrument(ret, level = "debug")] +fn _text_markup_end<'r, 's, 'x>( + context: Context<'r, 's>, + input: &'s str, + marker_symbol: &'x str, +) -> Res<&'s str, &'s str> { + let (remaining, _marker) = terminated( + tag(marker_symbol), + peek(parser_with_context!(post)(context)), + )(input)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} diff --git a/toy_language.txt b/toy_language.txt index 5d18333..6c2c984 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -1,35 +1 @@ -prologue *goes here* I guess *bold -text* - -bold*wont* start *or stop*when there is text outside it - -I guess *regular - -text* - -[foo *bar] baz* car - - -*nesting *bold entrances* and* exits - -* Heading - -body of heading - -** Child heading -** Immediate second child heading - -* Second top-level heading -foo bar -1. This is a list immediately after a paragraph -2. This is a second item in the list - 1. This is a child of the second item -#+begin_center -1. foo -2. bar -#+end_center -[fn:1] A footnote. - -[fn:2] A multi- - -line footnote. +foo *bar* baz From a4cce121c0a6e47a31a656608bf178d00c048615 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 19:46:27 -0400 Subject: [PATCH 11/31] Add an exit matcher to plain text. --- rustfmt.toml | 1 + src/parser/object_parser.rs | 14 ++++++++++++++ src/parser/plain_text.rs | 24 ++++++++++++++++++++---- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/rustfmt.toml b/rustfmt.toml index da3d1e7..5795ff1 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,4 +1,5 @@ imports_granularity = "Item" +group_imports = "StdExternalCrate" # In rustfmt 2.0 I will want to adjust these settings. # diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index 7b9920e..0fd833b 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -43,3 +43,17 @@ pub fn minimal_set_object<'r, 's>( map(parser_with_context!(plain_text)(context), Object::PlainText), ))(input) } + +#[tracing::instrument(ret, level = "debug")] +pub fn any_object_except_plain_text<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Object<'s>> { + // TODO: add entities, LaTeX fragments, export snippets, footnote references, citations (NOT citation references), inline babel calls, inline source blocks, line breaks, links, macros, targets and radio targets, statistics cookies, subscript and superscript, timestamps, and text markup. + not(|i| context.check_exit_matcher(i))(input)?; + + alt((map( + parser_with_context!(text_markup)(context), + Object::TextMarkup, + ),))(input) +} diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 6b515aa..e65faaa 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -1,9 +1,16 @@ +use nom::combinator::not; +use nom::combinator::recognize; + use super::object::PlainText; use super::Context; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; -use nom::combinator::not; +use crate::parser::exiting::ExitClass; +use crate::parser::object_parser::any_object_except_plain_text; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; #[tracing::instrument(ret, level = "debug")] pub fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainText<'s>> { @@ -12,12 +19,17 @@ pub fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s "Zero input length to plain_text.", )))); } + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &plain_text_end, + })); let mut current_input = input.char_indices(); loop { match current_input.next() { Some((offset, _char)) => { let remaining = &input[offset..]; - let exit_matcher_status = not(|i| context.check_exit_matcher(i))(remaining); + let exit_matcher_status = not(|i| parser_context.check_exit_matcher(i))(remaining); if exit_matcher_status.is_err() { if offset == 0 { // If we're at the start of the input, then nothing is plain text, so fire an error for zero-length match. @@ -40,18 +52,22 @@ pub fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s } } +#[tracing::instrument(ret, level = "debug")] +fn plain_text_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + recognize(parser_with_context!(any_object_except_plain_text)(context))(input) +} + #[cfg(test)] mod tests { use nom::combinator::map; + use super::*; use crate::parser::object::Object; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_with_context::parser_with_context; use crate::parser::source::Source; - use super::*; - #[test] fn plain_text_simple() { let input = "foobarbaz"; From b6233811c3401b322e26ae530e9ccab56b770e7b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 20:02:51 -0400 Subject: [PATCH 12/31] Fix the implementation of post. --- src/parser/object_parser.rs | 11 ++++------- src/parser/text_markup.rs | 23 +++++++---------------- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index 0fd833b..dfbd866 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -1,14 +1,13 @@ -use crate::error::Res; -use crate::parser::text_markup::text_markup; use nom::branch::alt; use nom::combinator::map; use nom::combinator::not; -use crate::parser::object::Object; - use super::parser_with_context::parser_with_context; use super::plain_text::plain_text; use super::Context; +use crate::error::Res; +use crate::parser::object::Object; +use crate::parser::text_markup::text_markup; #[tracing::instrument(ret, level = "debug")] pub fn standard_set_object<'r, 's>( @@ -49,9 +48,7 @@ pub fn any_object_except_plain_text<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Object<'s>> { - // TODO: add entities, LaTeX fragments, export snippets, footnote references, citations (NOT citation references), inline babel calls, inline source blocks, line breaks, links, macros, targets and radio targets, statistics cookies, subscript and superscript, timestamps, and text markup. - not(|i| context.check_exit_matcher(i))(input)?; - + // Used for exit matchers so this does not check exit matcher condition. alt((map( parser_with_context!(text_markup)(context), Object::TextMarkup, diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 40671f0..f3159da 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -1,6 +1,10 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::character::complete::line_ending; +use nom::character::complete::one_of; +use nom::character::complete::space0; use nom::combinator::peek; +use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many_till; use nom::sequence::terminated; @@ -43,6 +47,7 @@ pub fn text_markup<'r, 's>( // TODO: Sometimes its plain text, not objects let (remaining, close) = text_markup_end_specialized(context, remaining)?; + let (remaining, _trailing_whitespace) = space0(remaining)?; let source = get_consumed(input, remaining); @@ -71,22 +76,8 @@ pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> #[tracing::instrument(ret, level = "debug")] pub fn post<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); - match preceding_character { - // If None, we are at the start of the file which is technically the beginning of a line. - None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(') - | Some('{') | Some('\'') | Some('"') => {} - Some(_) => { - // Not at start of line, cannot be a heading - return Err(nom::Err::Error(CustomError::MyError(MyError( - "Not a valid pre character for text markup.", - )))); - } - }; - Ok((input, ())) + let (remaining, _) = alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"")), line_ending))(input)?; + Ok((remaining, ())) } #[tracing::instrument(ret, level = "debug")] From fab4ce05448669a181d86347c58e1ce4b275a807 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 20:04:35 -0400 Subject: [PATCH 13/31] Fix a warning. --- src/parser/text_markup.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index f3159da..cac1918 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -46,7 +46,7 @@ pub fn text_markup<'r, 's>( )(remaining)?; // TODO: Sometimes its plain text, not objects - let (remaining, close) = text_markup_end_specialized(context, remaining)?; + let (remaining, _close) = text_markup_end_specialized(context, remaining)?; let (remaining, _trailing_whitespace) = space0(remaining)?; let source = get_consumed(input, remaining); From 401fb339d0d2a23d4cf0e503019b7713f26076c6 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 20:05:58 -0400 Subject: [PATCH 14/31] Add a text markup text with all variants. --- org_mode_samples/text_markup/all_variants.org | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 org_mode_samples/text_markup/all_variants.org diff --git a/org_mode_samples/text_markup/all_variants.org b/org_mode_samples/text_markup/all_variants.org new file mode 100644 index 0000000..caa226b --- /dev/null +++ b/org_mode_samples/text_markup/all_variants.org @@ -0,0 +1,6 @@ +*bold* +/italic/ +_underline_ +=verbatim= +~code~ ++strike-through+ From d2fc8a513f025f16e6a203a9e5ac1fd74e6b1fbe Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 20:22:07 -0400 Subject: [PATCH 15/31] Separate out the text markup types into their own types. --- src/compare/diff.rs | 85 ++++++++++++++++++++++++++++++++++++--- src/parser/mod.rs | 7 +++- src/parser/object.rs | 47 ++++++++++++++++++++-- src/parser/text_markup.rs | 5 +-- 4 files changed, 131 insertions(+), 13 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index f57eefc..377d827 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,7 +1,9 @@ use super::util::assert_bounds; use super::util::assert_name; use crate::parser::sexp::Token; +use crate::parser::Bold; use crate::parser::Clock; +use crate::parser::Code; use crate::parser::Comment; use crate::parser::CommentBlock; use crate::parser::DiarySexp; @@ -17,6 +19,7 @@ use crate::parser::FootnoteDefinition; use crate::parser::GreaterBlock; use crate::parser::Heading; use crate::parser::HorizontalRule; +use crate::parser::Italic; use crate::parser::Keyword; use crate::parser::LatexEnvironment; use crate::parser::Object; @@ -29,10 +32,12 @@ use crate::parser::PropertyDrawer; use crate::parser::RegularLink; use crate::parser::Section; use crate::parser::SrcBlock; +use crate::parser::StrikeThrough; use crate::parser::Table; use crate::parser::TableCell; use crate::parser::TableRow; -use crate::parser::TextMarkup; +use crate::parser::Underline; +use crate::parser::Verbatim; use crate::parser::VerseBlock; #[derive(Debug)] @@ -131,7 +136,12 @@ fn compare_object<'s>( rust: &'s Object<'s>, ) -> Result> { match rust { - Object::TextMarkup(obj) => compare_text_markup(source, emacs, obj), + Object::Bold(obj) => compare_bold(source, emacs, obj), + Object::Italic(obj) => compare_italic(source, emacs, obj), + Object::Underline(obj) => compare_underline(source, emacs, obj), + Object::Verbatim(obj) => compare_verbatim(source, emacs, obj), + Object::Code(obj) => compare_code(source, emacs, obj), + Object::StrikeThrough(obj) => compare_strike_through(source, emacs, obj), Object::PlainText(obj) => compare_plain_text(source, emacs, obj), Object::RegularLink(obj) => compare_regular_link(source, emacs, obj), } @@ -906,14 +916,79 @@ fn compare_plain_text<'s>( }) } -fn compare_text_markup<'s>( +fn compare_bold<'s>( _source: &'s str, emacs: &'s Token<'s>, - rust: &'s TextMarkup<'s>, + rust: &'s Bold<'s>, ) -> Result> { Ok(DiffResult { status: DiffStatus::Good, - name: "text-markup".to_owned(), + name: "bold".to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_italic<'s>( + _source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Italic<'s>, +) -> Result> { + Ok(DiffResult { + status: DiffStatus::Good, + name: "italic".to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_underline<'s>( + _source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Underline<'s>, +) -> Result> { + Ok(DiffResult { + status: DiffStatus::Good, + name: "underline".to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_verbatim<'s>( + _source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Verbatim<'s>, +) -> Result> { + Ok(DiffResult { + status: DiffStatus::Good, + name: "verbatim".to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_code<'s>( + _source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Code<'s>, +) -> Result> { + Ok(DiffResult { + status: DiffStatus::Good, + name: "code".to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_strike_through<'s>( + _source: &'s str, + emacs: &'s Token<'s>, + rust: &'s StrikeThrough<'s>, +) -> Result> { + Ok(DiffResult { + status: DiffStatus::Good, + name: "strike-through".to_owned(), message: None, children: Vec::new(), }) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1f5cfec..0d5e2a6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -61,9 +61,14 @@ pub use lesser_element::Planning; pub use lesser_element::SrcBlock; pub use lesser_element::TableCell; pub use lesser_element::VerseBlock; +pub use object::Bold; +pub use object::Code; +pub use object::Italic; pub use object::Object; pub use object::PlainText; pub use object::RegularLink; -pub use object::TextMarkup; +pub use object::StrikeThrough; +pub use object::Underline; +pub use object::Verbatim; pub use source::Source; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; diff --git a/src/parser/object.rs b/src/parser/object.rs index 9347e6c..e07b109 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -2,8 +2,12 @@ use super::source::Source; #[derive(Debug)] pub enum Object<'s> { - #[allow(dead_code)] - TextMarkup(TextMarkup<'s>), + Bold(Bold<'s>), + Italic(Italic<'s>), + Underline(Underline<'s>), + StrikeThrough(StrikeThrough<'s>), + Code(Code<'s>), + Verbatim(Verbatim<'s>), PlainText(PlainText<'s>), @@ -12,11 +16,41 @@ pub enum Object<'s> { } #[derive(Debug)] -pub struct TextMarkup<'s> { +pub struct Bold<'s> { pub source: &'s str, pub children: Vec>, } +#[derive(Debug)] +pub struct Italic<'s> { + pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct Underline<'s> { + pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct StrikeThrough<'s> { + pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct Code<'s> { + pub source: &'s str, + pub contents: &'s str, +} + +#[derive(Debug)] +pub struct Verbatim<'s> { + pub source: &'s str, + pub contents: &'s str, +} + #[derive(Debug)] pub struct PlainText<'s> { pub source: &'s str, @@ -30,7 +64,12 @@ pub struct RegularLink<'s> { impl<'s> Source<'s> for Object<'s> { fn get_source(&'s self) -> &'s str { match self { - Object::TextMarkup(obj) => obj.source, + Object::Bold(obj) => obj.source, + Object::Italic(obj) => obj.source, + Object::Underline(obj) => obj.source, + Object::StrikeThrough(obj) => obj.source, + Object::Code(obj) => obj.source, + Object::Verbatim(obj) => obj.source, Object::PlainText(obj) => obj.source, Object::RegularLink(obj) => obj.source, } diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index cac1918..303f4da 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -21,13 +21,12 @@ use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::get_one_before; -use crate::parser::TextMarkup; #[tracing::instrument(ret, level = "debug")] pub fn text_markup<'r, 's>( context: Context<'r, 's>, input: &'s str, -) -> Res<&'s str, TextMarkup<'s>> { +) -> Res<&'s str, TextMarkupObject<'s>> { let (remaining, _) = pre(context, input)?; let (remaining, open) = marker(remaining)?; let text_markup_end_specialized = text_markup_end(open); @@ -51,7 +50,7 @@ pub fn text_markup<'r, 's>( let source = get_consumed(input, remaining); - Ok((remaining, TextMarkup { source, children })) + Ok((remaining, TextMarkupObject { source, children })) } #[tracing::instrument(ret, level = "debug")] From 036f4add4a047127ec1e2c76ccab1c56095dea3f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 20:48:01 -0400 Subject: [PATCH 16/31] The object parsers separated out. --- src/compare/diff.rs | 100 ++++++++++++++++++++++++++++-------- src/parser/object.rs | 36 +++++++++++++ src/parser/object_parser.rs | 15 ++---- src/parser/text_markup.rs | 73 +++++++++++++++++++++++--- 4 files changed, 186 insertions(+), 38 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 377d827..0583b74 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -917,78 +917,138 @@ fn compare_plain_text<'s>( } fn compare_bold<'s>( - _source: &'s str, + source: &'s str, emacs: &'s Token<'s>, rust: &'s Bold<'s>, ) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "bold"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + Ok(DiffResult { - status: DiffStatus::Good, - name: "bold".to_owned(), + status: this_status, + name: emacs_name.to_owned(), message: None, children: Vec::new(), }) } fn compare_italic<'s>( - _source: &'s str, + source: &'s str, emacs: &'s Token<'s>, rust: &'s Italic<'s>, ) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "italic"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + Ok(DiffResult { - status: DiffStatus::Good, - name: "italic".to_owned(), + status: this_status, + name: emacs_name.to_owned(), message: None, children: Vec::new(), }) } fn compare_underline<'s>( - _source: &'s str, + source: &'s str, emacs: &'s Token<'s>, rust: &'s Underline<'s>, ) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "underline"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + Ok(DiffResult { - status: DiffStatus::Good, - name: "underline".to_owned(), + status: this_status, + name: emacs_name.to_owned(), message: None, children: Vec::new(), }) } fn compare_verbatim<'s>( - _source: &'s str, + source: &'s str, emacs: &'s Token<'s>, rust: &'s Verbatim<'s>, ) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "verbatim"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + Ok(DiffResult { - status: DiffStatus::Good, - name: "verbatim".to_owned(), + status: this_status, + name: emacs_name.to_owned(), message: None, children: Vec::new(), }) } fn compare_code<'s>( - _source: &'s str, + source: &'s str, emacs: &'s Token<'s>, rust: &'s Code<'s>, ) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "code"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + Ok(DiffResult { - status: DiffStatus::Good, - name: "code".to_owned(), + status: this_status, + name: emacs_name.to_owned(), message: None, children: Vec::new(), }) } fn compare_strike_through<'s>( - _source: &'s str, + source: &'s str, emacs: &'s Token<'s>, rust: &'s StrikeThrough<'s>, ) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "strike-through"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + Ok(DiffResult { - status: DiffStatus::Good, - name: "strike-through".to_owned(), + status: this_status, + name: emacs_name.to_owned(), message: None, children: Vec::new(), }) @@ -996,8 +1056,8 @@ fn compare_strike_through<'s>( fn compare_regular_link<'s>( _source: &'s str, - emacs: &'s Token<'s>, - rust: &'s RegularLink<'s>, + _emacs: &'s Token<'s>, + _rust: &'s RegularLink<'s>, ) -> Result> { Ok(DiffResult { status: DiffStatus::Good, diff --git a/src/parser/object.rs b/src/parser/object.rs index e07b109..5a3ec31 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -75,3 +75,39 @@ impl<'s> Source<'s> for Object<'s> { } } } + +impl<'s> Source<'s> for Bold<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Italic<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Underline<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for StrikeThrough<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Code<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Verbatim<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index dfbd866..a4ba457 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -18,10 +18,7 @@ pub fn standard_set_object<'r, 's>( not(|i| context.check_exit_matcher(i))(input)?; alt(( - map( - parser_with_context!(text_markup)(context), - Object::TextMarkup, - ), + parser_with_context!(text_markup)(context), map(parser_with_context!(plain_text)(context), Object::PlainText), ))(input) } @@ -35,10 +32,7 @@ pub fn minimal_set_object<'r, 's>( not(|i| context.check_exit_matcher(i))(input)?; alt(( - map( - parser_with_context!(text_markup)(context), - Object::TextMarkup, - ), + parser_with_context!(text_markup)(context), map(parser_with_context!(plain_text)(context), Object::PlainText), ))(input) } @@ -49,8 +43,5 @@ pub fn any_object_except_plain_text<'r, 's>( input: &'s str, ) -> Res<&'s str, Object<'s>> { // Used for exit matchers so this does not check exit matcher condition. - alt((map( - parser_with_context!(text_markup)(context), - Object::TextMarkup, - ),))(input) + alt((parser_with_context!(text_markup)(context),))(input) } diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 303f4da..5e77d1b 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -3,6 +3,7 @@ use nom::bytes::complete::tag; use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; +use nom::combinator::map; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; @@ -21,12 +22,75 @@ use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::get_one_before; +use crate::parser::Bold; +use crate::parser::Italic; +use crate::parser::Object; +use crate::parser::StrikeThrough; +use crate::parser::Underline; #[tracing::instrument(ret, level = "debug")] -pub fn text_markup<'r, 's>( +pub fn text_markup<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Object<'s>> { + alt(( + map(parser_with_context!(bold)(context), Object::Bold), + map(parser_with_context!(italic)(context), Object::Italic), + map(parser_with_context!(underline)(context), Object::Underline), + map( + parser_with_context!(strike_through)(context), + Object::StrikeThrough, + ), + // map(parser_with_context!(verbatim)(context), Object::Verbatim), + // map(parser_with_context!(code)(context), Object::Code), + ))(input) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn bold<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Bold<'s>> { + let text_markup_object_specialized = text_markup_object("*"); + let (remaining, children) = text_markup_object_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Bold { source, children })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn italic<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Italic<'s>> { + let text_markup_object_specialized = text_markup_object("/"); + let (remaining, children) = text_markup_object_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Italic { source, children })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn underline<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Underline<'s>> { + let text_markup_object_specialized = text_markup_object("_"); + let (remaining, children) = text_markup_object_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Underline { source, children })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn strike_through<'r, 's>( context: Context<'r, 's>, input: &'s str, -) -> Res<&'s str, TextMarkupObject<'s>> { +) -> Res<&'s str, StrikeThrough<'s>> { + let text_markup_object_specialized = text_markup_object("+"); + let (remaining, children) = text_markup_object_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, StrikeThrough { source, children })) +} + +fn text_markup_object( + marker_symbol: &str, +) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, Vec>> { + let marker_symbol = marker_symbol.to_owned(); + move |context: Context, input: &str| _text_markup_object(context, input, marker_symbol.as_str()) +} + +#[tracing::instrument(ret, level = "debug")] +fn _text_markup_object<'r, 's, 'x>( + context: Context<'r, 's>, + input: &'s str, + marker_symbol: &'x str, +) -> Res<&'s str, Vec>> { let (remaining, _) = pre(context, input)?; let (remaining, open) = marker(remaining)?; let text_markup_end_specialized = text_markup_end(open); @@ -47,10 +111,7 @@ pub fn text_markup<'r, 's>( // TODO: Sometimes its plain text, not objects let (remaining, _close) = text_markup_end_specialized(context, remaining)?; let (remaining, _trailing_whitespace) = space0(remaining)?; - - let source = get_consumed(input, remaining); - - Ok((remaining, TextMarkupObject { source, children })) + Ok((remaining, children)) } #[tracing::instrument(ret, level = "debug")] From e6c5670a85cc47c375b8b77489fbfe803e748fb0 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 20:57:30 -0400 Subject: [PATCH 17/31] Support string-based text markup (code and verbatim). --- src/parser/text_markup.rs | 59 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 5e77d1b..b8e24c7 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -1,5 +1,6 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; @@ -23,10 +24,12 @@ use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::get_one_before; use crate::parser::Bold; +use crate::parser::Code; use crate::parser::Italic; use crate::parser::Object; use crate::parser::StrikeThrough; use crate::parser::Underline; +use crate::parser::Verbatim; #[tracing::instrument(ret, level = "debug")] pub fn text_markup<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Object<'s>> { @@ -38,8 +41,8 @@ pub fn text_markup<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s parser_with_context!(strike_through)(context), Object::StrikeThrough, ), - // map(parser_with_context!(verbatim)(context), Object::Verbatim), - // map(parser_with_context!(code)(context), Object::Code), + map(parser_with_context!(verbatim)(context), Object::Verbatim), + map(parser_with_context!(code)(context), Object::Code), ))(input) } @@ -78,6 +81,22 @@ pub fn strike_through<'r, 's>( Ok((remaining, StrikeThrough { source, children })) } +#[tracing::instrument(ret, level = "debug")] +pub fn verbatim<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Verbatim<'s>> { + let text_markup_string_specialized = text_markup_string("+"); + let (remaining, contents) = text_markup_string_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Verbatim { source, contents })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn code<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Code<'s>> { + let text_markup_string_specialized = text_markup_string("+"); + let (remaining, contents) = text_markup_string_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Code { source, contents })) +} + fn text_markup_object( marker_symbol: &str, ) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, Vec>> { @@ -114,6 +133,42 @@ fn _text_markup_object<'r, 's, 'x>( Ok((remaining, children)) } +fn text_markup_string( + marker_symbol: &str, +) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> { + let marker_symbol = marker_symbol.to_owned(); + move |context: Context, input: &str| _text_markup_string(context, input, marker_symbol.as_str()) +} + +#[tracing::instrument(ret, level = "debug")] +fn _text_markup_string<'r, 's, 'x>( + context: Context<'r, 's>, + input: &'s str, + marker_symbol: &'x str, +) -> Res<&'s str, &'s str> { + let (remaining, _) = pre(context, input)?; + let (remaining, open) = marker(remaining)?; + let text_markup_end_specialized = text_markup_end(open); + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &text_markup_end_specialized, + })); + + let (remaining, contents) = recognize(verify( + many_till( + anychar, + parser_with_context!(exit_matcher_parser)(&parser_context), + ), + |(children, _exit_contents)| !children.is_empty(), + ))(remaining)?; + + // TODO: Sometimes its plain text, not objects + let (remaining, _close) = text_markup_end_specialized(context, remaining)?; + let (remaining, _trailing_whitespace) = space0(remaining)?; + Ok((remaining, contents)) +} + #[tracing::instrument(ret, level = "debug")] pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { let document_root = context.get_document_root().unwrap(); From 62c226fb478fec1d5844c94fe9fc153ead7f2e9f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 21:01:29 -0400 Subject: [PATCH 18/31] Fix matching only the specific markup. --- src/parser/text_markup.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index b8e24c7..db3ce05 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -83,7 +83,7 @@ pub fn strike_through<'r, 's>( #[tracing::instrument(ret, level = "debug")] pub fn verbatim<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Verbatim<'s>> { - let text_markup_string_specialized = text_markup_string("+"); + let text_markup_string_specialized = text_markup_string("="); let (remaining, contents) = text_markup_string_specialized(context, input)?; let source = get_consumed(input, remaining); Ok((remaining, Verbatim { source, contents })) @@ -91,7 +91,7 @@ pub fn verbatim<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str #[tracing::instrument(ret, level = "debug")] pub fn code<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Code<'s>> { - let text_markup_string_specialized = text_markup_string("+"); + let text_markup_string_specialized = text_markup_string("~"); let (remaining, contents) = text_markup_string_specialized(context, input)?; let source = get_consumed(input, remaining); Ok((remaining, Code { source, contents })) @@ -111,7 +111,7 @@ fn _text_markup_object<'r, 's, 'x>( marker_symbol: &'x str, ) -> Res<&'s str, Vec>> { let (remaining, _) = pre(context, input)?; - let (remaining, open) = marker(remaining)?; + let (remaining, open) = tag(marker_symbol)(remaining)?; let text_markup_end_specialized = text_markup_end(open); let parser_context = context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { @@ -147,7 +147,7 @@ fn _text_markup_string<'r, 's, 'x>( marker_symbol: &'x str, ) -> Res<&'s str, &'s str> { let (remaining, _) = pre(context, input)?; - let (remaining, open) = marker(remaining)?; + let (remaining, open) = tag(marker_symbol)(remaining)?; let text_markup_end_specialized = text_markup_end(open); let parser_context = context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { @@ -195,11 +195,6 @@ pub fn post<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, () Ok((remaining, ())) } -#[tracing::instrument(ret, level = "debug")] -pub fn marker(input: &str) -> Res<&str, &str> { - alt((tag("*"), tag("/"), tag("_"), tag("="), tag("~"), tag("+")))(input) -} - fn text_markup_end( marker_symbol: &str, ) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> { From 72e656751a877d5d96623418bd246379a774e483 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 21:11:22 -0400 Subject: [PATCH 19/31] It looks like paragraphs inside empty drawers just capture the first new line and then the rest are trailing whitespace capture. --- .../element_container_priority/paragraph_drawer.org | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/org_mode_samples/element_container_priority/paragraph_drawer.org b/org_mode_samples/element_container_priority/paragraph_drawer.org index 574a2f2..717fc3d 100644 --- a/org_mode_samples/element_container_priority/paragraph_drawer.org +++ b/org_mode_samples/element_container_priority/paragraph_drawer.org @@ -2,4 +2,12 @@ foo :drawername: + + + + + + + + :end: From 189edaa24a1f0a12e208f725924252114edd630d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 21:25:46 -0400 Subject: [PATCH 20/31] Simulate the trailing whitespace capture for empty drawer. --- src/parser/drawer.rs | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/parser/drawer.rs b/src/parser/drawer.rs index 9c93a94..4e6307d 100644 --- a/src/parser/drawer.rs +++ b/src/parser/drawer.rs @@ -1,29 +1,28 @@ -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; use nom::branch::alt; use nom::bytes::complete::tag; use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while; use nom::character::complete::line_ending; use nom::character::complete::space0; -use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::recognize; use nom::multi::many_till; use nom::sequence::tuple; use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; use crate::parser::element_parser::element; use crate::parser::exiting::ExitClass; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::source::SetSource; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::immediate_in_section; - use crate::parser::util::start_of_line; use crate::parser::util::WORD_CONSTITUENT_CHARACTERS; use crate::parser::Drawer; @@ -56,17 +55,20 @@ pub fn drawer<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, children) = match consumed(many_till(blank_line, exit_matcher))(remaining) { - Ok((remaining, (whitespace, (_children, _exit_contents)))) => ( - remaining, - vec![Element::Paragraph(Paragraph::of_text(whitespace))], - ), - Err(_) => { - let (remaining, (children, _exit_contents)) = - many_till(element_matcher, exit_matcher)(remaining)?; - (remaining, children) - } - }; + let (remaining, children) = + match tuple((blank_line, many_till(blank_line, exit_matcher)))(remaining) { + Ok((remain, (first_line, (_trailing_whitespace, _exit_contents)))) => { + let mut element = Element::Paragraph(Paragraph::of_text(first_line)); + let source = get_consumed(remaining, remain); + element.set_source(source); + (remain, vec![element]) + } + Err(_) => { + let (remaining, (children, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; + (remaining, children) + } + }; let (remaining, _end) = drawer_end(&parser_context, remaining)?; let source = get_consumed(input, remaining); From cade02eb7279dd8c79f9fd88d78f0f25b5e9417a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 21:39:31 -0400 Subject: [PATCH 21/31] Make the empty drawer honor an immediate exit condition. --- src/parser/drawer.rs | 33 +++++++++++++++++++-------------- toy_language.txt | 7 ++++++- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/parser/drawer.rs b/src/parser/drawer.rs index 4e6307d..69ebd4a 100644 --- a/src/parser/drawer.rs +++ b/src/parser/drawer.rs @@ -5,6 +5,7 @@ use nom::bytes::complete::take_while; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::combinator::eof; +use nom::combinator::not; use nom::combinator::recognize; use nom::multi::many_till; use nom::sequence::tuple; @@ -55,20 +56,24 @@ pub fn drawer<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, children) = - match tuple((blank_line, many_till(blank_line, exit_matcher)))(remaining) { - Ok((remain, (first_line, (_trailing_whitespace, _exit_contents)))) => { - let mut element = Element::Paragraph(Paragraph::of_text(first_line)); - let source = get_consumed(remaining, remain); - element.set_source(source); - (remain, vec![element]) - } - Err(_) => { - let (remaining, (children, _exit_contents)) = - many_till(element_matcher, exit_matcher)(remaining)?; - (remaining, children) - } - }; + let (remaining, children) = match tuple(( + not(exit_matcher), + blank_line, + many_till(blank_line, exit_matcher), + ))(remaining) + { + Ok((remain, (_not_immediate_exit, first_line, (_trailing_whitespace, _exit_contents)))) => { + let mut element = Element::Paragraph(Paragraph::of_text(first_line)); + let source = get_consumed(remaining, remain); + element.set_source(source); + (remain, vec![element]) + } + Err(_) => { + let (remaining, (children, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; + (remaining, children) + } + }; let (remaining, _end) = drawer_end(&parser_context, remaining)?; let source = get_consumed(input, remaining); diff --git a/toy_language.txt b/toy_language.txt index 6c2c984..ecf1b97 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -1 +1,6 @@ -foo *bar* baz +[fn:1] footnote. +:drawername: + + +:end: +Is this still in the footnote? From 4d4d30c597d3fbb3a42abd8ab2de5754f7455eec Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 21:43:11 -0400 Subject: [PATCH 22/31] Simulate trailing whitespace in empty dynamic blocks just like drawers. --- src/parser/dynamic_block.rs | 45 ++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/src/parser/dynamic_block.rs b/src/parser/dynamic_block.rs index d523920..ed7fbe1 100644 --- a/src/parser/dynamic_block.rs +++ b/src/parser/dynamic_block.rs @@ -1,3 +1,17 @@ +use nom::branch::alt; +use nom::bytes::complete::is_not; +use nom::bytes::complete::tag_no_case; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::character::complete::space1; +use nom::combinator::consumed; +use nom::combinator::eof; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::recognize; +use nom::multi::many_till; +use nom::sequence::tuple; + use super::Context; use crate::error::CustomError; use crate::error::MyError; @@ -9,25 +23,13 @@ use crate::parser::lesser_element::Paragraph; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::source::SetSource; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::immediate_in_section; - use crate::parser::util::start_of_line; use crate::parser::Element; -use nom::branch::alt; -use nom::bytes::complete::is_not; -use nom::bytes::complete::tag_no_case; -use nom::character::complete::line_ending; -use nom::character::complete::space0; -use nom::character::complete::space1; -use nom::combinator::consumed; -use nom::combinator::eof; -use nom::combinator::opt; -use nom::combinator::recognize; -use nom::multi::many_till; -use nom::sequence::tuple; #[tracing::instrument(ret, level = "debug")] pub fn dynamic_block<'r, 's>( @@ -61,11 +63,18 @@ pub fn dynamic_block<'r, 's>( }; let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, children) = match consumed(many_till(blank_line, exit_matcher))(remaining) { - Ok((remaining, (whitespace, (_children, _exit_contents)))) => ( - remaining, - vec![Element::Paragraph(Paragraph::of_text(whitespace))], - ), + let (remaining, children) = match tuple(( + not(exit_matcher), + blank_line, + many_till(blank_line, exit_matcher), + ))(remaining) + { + Ok((remain, (_not_immediate_exit, first_line, (_trailing_whitespace, _exit_contents)))) => { + let mut element = Element::Paragraph(Paragraph::of_text(first_line)); + let source = get_consumed(remaining, remain); + element.set_source(source); + (remain, vec![element]) + } Err(_) => { let (remaining, (children, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; From 0ca6ce504f5a4a192d3c1c1b93172209c09a4aa2 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 21:45:18 -0400 Subject: [PATCH 23/31] Simulate trailing whitespace in empty greater blocks just like drawers. --- build.rs | 1 + src/main.rs | 3 ++- src/parser/clock.rs | 1 - src/parser/comment.rs | 10 +++---- src/parser/diary_sexp.rs | 1 - src/parser/document.rs | 24 ++++++++--------- src/parser/element_parser.rs | 9 +++---- src/parser/fixed_width_area.rs | 1 - src/parser/footnote_definition.rs | 27 +++++++++---------- src/parser/greater_block.rs | 45 ++++++++++++++++++------------- src/parser/lesser_block.rs | 3 +-- src/parser/paragraph.rs | 14 +++++----- src/parser/parser_context.rs | 11 ++++---- src/parser/plain_list.rs | 37 +++++++++++++------------ src/parser/property_drawer.rs | 6 ++--- src/parser/sexp.rs | 3 ++- src/parser/table.rs | 3 +-- src/parser/util.rs | 14 +++++----- 18 files changed, 107 insertions(+), 106 deletions(-) diff --git a/build.rs b/build.rs index 3932a65..4a83920 100644 --- a/build.rs +++ b/build.rs @@ -2,6 +2,7 @@ use std::env; use std::fs::File; use std::io::Write; use std::path::Path; + use walkdir::WalkDir; fn main() { diff --git a/src/main.rs b/src/main.rs index 0916f22..59a2cd1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,8 @@ #![feature(round_char_boundary)] +use ::organic::parser::document; + use crate::init_tracing::init_telemetry; use crate::init_tracing::shutdown_telemetry; -use ::organic::parser::document; mod init_tracing; const TEST_DOC: &'static str = include_str!("../toy_language.txt"); diff --git a/src/parser/clock.rs b/src/parser/clock.rs index 22d2e2c..361f114 100644 --- a/src/parser/clock.rs +++ b/src/parser/clock.rs @@ -15,7 +15,6 @@ use super::Context; use crate::error::Res; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::get_consumed; - use crate::parser::util::start_of_line; use crate::parser::Clock; diff --git a/src/parser/comment.rs b/src/parser/comment.rs index 15a5abc..b5ce097 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -1,6 +1,3 @@ -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag; @@ -16,11 +13,13 @@ use nom::sequence::tuple; use super::util::get_consumed; use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; use crate::parser::parser_context::ContextElement; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::immediate_in_section; - use crate::parser::util::start_of_line; use crate::parser::Comment; @@ -57,12 +56,11 @@ fn comment_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str #[cfg(test)] mod tests { + use super::*; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_with_context::parser_with_context; - use super::*; - #[test] fn require_space_after_hash() { let input = "# Comment line diff --git a/src/parser/diary_sexp.rs b/src/parser/diary_sexp.rs index 856648b..87f4e83 100644 --- a/src/parser/diary_sexp.rs +++ b/src/parser/diary_sexp.rs @@ -10,7 +10,6 @@ use super::sexp::sexp; use super::Context; use crate::error::Res; use crate::parser::util::get_consumed; - use crate::parser::util::start_of_line; use crate::parser::DiarySexp; diff --git a/src/parser/document.rs b/src/parser/document.rs index 853db11..9954fab 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,15 +1,3 @@ -use crate::error::Res; -use crate::parser::comment::comment; -use crate::parser::element_parser::element; -use crate::parser::exiting::ExitClass; -use crate::parser::object_parser::standard_set_object; -use crate::parser::parser_context::ContextElement; -use crate::parser::parser_context::ContextTree; -use crate::parser::parser_context::ExitMatcherNode; -use crate::parser::planning::planning; -use crate::parser::property_drawer::property_drawer; -use crate::parser::util::blank_line; -use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::line_ending; @@ -34,6 +22,18 @@ use super::util::exit_matcher_parser; use super::util::get_consumed; use super::util::start_of_line; use super::Context; +use crate::error::Res; +use crate::parser::comment::comment; +use crate::parser::element_parser::element; +use crate::parser::exiting::ExitClass; +use crate::parser::object_parser::standard_set_object; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ContextTree; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::planning::planning; +use crate::parser::property_drawer::property_drawer; +use crate::parser::util::blank_line; +use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; #[derive(Debug)] pub struct Document<'s> { diff --git a/src/parser/element_parser.rs b/src/parser/element_parser.rs index 176b592..0612a78 100644 --- a/src/parser/element_parser.rs +++ b/src/parser/element_parser.rs @@ -1,3 +1,7 @@ +use nom::branch::alt; +use nom::combinator::map; +use nom::multi::many0; + use super::clock::clock; use super::comment::comment; use super::diary_sexp::diary_sexp; @@ -20,15 +24,10 @@ use super::plain_list::plain_list; use super::source::SetSource; use super::util::get_consumed; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; - use super::Context; use crate::error::Res; use crate::parser::parser_with_context::parser_with_context; use crate::parser::table::org_mode_table; -use nom::branch::alt; -use nom::combinator::map; - -use nom::multi::many0; pub fn element( can_be_paragraph: bool, diff --git a/src/parser/fixed_width_area.rs b/src/parser/fixed_width_area.rs index 147ef87..293aa0d 100644 --- a/src/parser/fixed_width_area.rs +++ b/src/parser/fixed_width_area.rs @@ -16,7 +16,6 @@ use crate::error::Res; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; - use crate::parser::util::start_of_line; use crate::parser::FixedWidthArea; diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index 00f1077..833360e 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -1,3 +1,15 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::bytes::complete::tag_no_case; +use nom::bytes::complete::take_while; +use nom::character::complete::digit1; +use nom::character::complete::space0; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many1; +use nom::multi::many_till; +use nom::sequence::tuple; + use super::util::WORD_CONSTITUENT_CHARACTERS; use super::Context; use crate::error::CustomError; @@ -14,19 +26,7 @@ use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::immediate_in_section; use crate::parser::util::maybe_consume_trailing_whitespace; - use crate::parser::util::start_of_line; -use nom::branch::alt; -use nom::bytes::complete::tag; -use nom::bytes::complete::tag_no_case; -use nom::bytes::complete::take_while; -use nom::character::complete::digit1; -use nom::character::complete::space0; -use nom::combinator::recognize; -use nom::combinator::verify; -use nom::multi::many1; -use nom::multi::many_till; -use nom::sequence::tuple; #[tracing::instrument(ret, level = "debug")] pub fn footnote_definition<'r, 's>( @@ -107,13 +107,12 @@ fn footnote_definition_end<'r, 's>( #[cfg(test)] mod tests { + use super::*; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_with_context::parser_with_context; use crate::parser::Source; - use super::*; - #[test] fn two_paragraphs() { let input = "[fn:1] A footnote. diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index 8ca1fe1..06c8656 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -1,3 +1,17 @@ +use nom::branch::alt; +use nom::bytes::complete::is_not; +use nom::bytes::complete::tag_no_case; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::character::complete::space1; +use nom::combinator::consumed; +use nom::combinator::eof; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::verify; +use nom::multi::many_till; +use nom::sequence::tuple; + use super::Context; use crate::error::CustomError; use crate::error::MyError; @@ -8,26 +22,14 @@ use crate::parser::greater_element::GreaterBlock; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::source::SetSource; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::immediate_in_section; - use crate::parser::util::start_of_line; use crate::parser::Element; use crate::parser::Paragraph; -use nom::branch::alt; -use nom::bytes::complete::is_not; -use nom::bytes::complete::tag_no_case; -use nom::character::complete::line_ending; -use nom::character::complete::space0; -use nom::character::complete::space1; -use nom::combinator::consumed; -use nom::combinator::eof; -use nom::combinator::opt; -use nom::combinator::verify; -use nom::multi::many_till; -use nom::sequence::tuple; #[tracing::instrument(ret, level = "debug")] pub fn greater_block<'r, 's>( @@ -72,11 +74,18 @@ pub fn greater_block<'r, 's>( let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); // Check for a completely empty block - let (remaining, children) = match consumed(many_till(blank_line, exit_matcher))(remaining) { - Ok((remaining, (whitespace, (_children, _exit_contents)))) => ( - remaining, - vec![Element::Paragraph(Paragraph::of_text(whitespace))], - ), + let (remaining, children) = match tuple(( + not(exit_matcher), + blank_line, + many_till(blank_line, exit_matcher), + ))(remaining) + { + Ok((remain, (_not_immediate_exit, first_line, (_trailing_whitespace, _exit_contents)))) => { + let mut element = Element::Paragraph(Paragraph::of_text(first_line)); + let source = get_consumed(remaining, remain); + element.set_source(source); + (remain, vec![element]) + } Err(_) => { let (remaining, (children, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; diff --git a/src/parser/lesser_block.rs b/src/parser/lesser_block.rs index 764a556..0d936dc 100644 --- a/src/parser/lesser_block.rs +++ b/src/parser/lesser_block.rs @@ -1,4 +1,3 @@ -use crate::error::Res; use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag_no_case; @@ -14,6 +13,7 @@ use nom::multi::many_till; use nom::sequence::tuple; use super::Context; +use crate::error::Res; use crate::parser::exiting::ExitClass; use crate::parser::lesser_element::CommentBlock; use crate::parser::lesser_element::ExampleBlock; @@ -30,7 +30,6 @@ use crate::parser::plain_text::plain_text; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; - use crate::parser::util::start_of_line; #[tracing::instrument(ret, level = "debug")] diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index ba9c557..36ee0d8 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -1,5 +1,3 @@ -use crate::error::Res; -use crate::parser::element_parser::element; use nom::branch::alt; use nom::combinator::eof; use nom::combinator::recognize; @@ -8,20 +6,20 @@ use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; +use super::lesser_element::Paragraph; +use super::util::blank_line; +use super::util::get_consumed; +use super::Context; +use crate::error::Res; +use crate::parser::element_parser::element; use crate::parser::exiting::ExitClass; use crate::parser::object_parser::standard_set_object; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; - use crate::parser::util::start_of_line; -use super::lesser_element::Paragraph; -use super::util::blank_line; -use super::util::get_consumed; -use super::Context; - #[tracing::instrument(ret, level = "debug")] pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Paragraph<'s>> { let parser_context = diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 1943eda..232db31 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -1,14 +1,15 @@ -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; use std::rc::Rc; +use nom::combinator::eof; +use nom::IResult; + use super::list::List; use super::list::Node; use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; use crate::parser::exiting::ExitClass; -use nom::combinator::eof; -use nom::IResult; type Matcher = dyn for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str>; diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index d84b04a..b606d76 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -1,20 +1,3 @@ -use super::greater_element::PlainList; -use super::greater_element::PlainListItem; -use super::parser_with_context::parser_with_context; - -use super::util::non_whitespace_character; -use super::Context; -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; -use crate::parser::element_parser::element; -use crate::parser::exiting::ExitClass; -use crate::parser::parser_context::ContextElement; -use crate::parser::parser_context::ExitMatcherNode; -use crate::parser::util::blank_line; -use crate::parser::util::exit_matcher_parser; -use crate::parser::util::get_consumed; -use crate::parser::util::start_of_line; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::digit1; @@ -33,6 +16,23 @@ use nom::sequence::terminated; use nom::sequence::tuple; use tracing::span; +use super::greater_element::PlainList; +use super::greater_element::PlainListItem; +use super::parser_with_context::parser_with_context; +use super::util::non_whitespace_character; +use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; +use crate::parser::element_parser::element; +use crate::parser::exiting::ExitClass; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::util::blank_line; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::util::start_of_line; + #[tracing::instrument(ret, level = "debug")] pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> { let parser_context = context @@ -278,13 +278,12 @@ fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize #[cfg(test)] mod tests { + use super::*; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_with_context::parser_with_context; use crate::parser::Source; - use super::*; - #[test] fn plain_list_item_empty() { let input = "1."; diff --git a/src/parser/property_drawer.rs b/src/parser/property_drawer.rs index fb347f8..ff8edbb 100644 --- a/src/parser/property_drawer.rs +++ b/src/parser/property_drawer.rs @@ -1,6 +1,3 @@ -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag; @@ -16,6 +13,9 @@ use nom::multi::many_till; use nom::sequence::tuple; use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; use crate::parser::exiting::ExitClass; use crate::parser::greater_element::NodeProperty; use crate::parser::greater_element::PropertyDrawer; diff --git a/src/parser/sexp.rs b/src/parser/sexp.rs index da5f2b7..fc97365 100644 --- a/src/parser/sexp.rs +++ b/src/parser/sexp.rs @@ -1,4 +1,3 @@ -use crate::error::Res; use std::collections::HashMap; use nom::branch::alt; @@ -17,6 +16,8 @@ use nom::sequence::delimited; use nom::sequence::preceded; use nom::sequence::tuple; +use crate::error::Res; + #[derive(Debug)] pub enum Token<'s> { Atom(&'s str), diff --git a/src/parser/table.rs b/src/parser/table.rs index 275e754..9c2025c 100644 --- a/src/parser/table.rs +++ b/src/parser/table.rs @@ -1,4 +1,3 @@ -use crate::error::Res; use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag; @@ -14,6 +13,7 @@ use nom::multi::many_till; use nom::sequence::tuple; use super::Context; +use crate::error::Res; use crate::parser::exiting::ExitClass; use crate::parser::greater_element::TableRow; use crate::parser::lesser_element::TableCell; @@ -24,7 +24,6 @@ use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; - use crate::parser::util::start_of_line; use crate::parser::Table; diff --git a/src/parser/util.rs b/src/parser/util.rs index b1ed386..45b1f09 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,10 +1,3 @@ -use crate::parser::parser_with_context::parser_with_context; - -use super::parser_context::ContextElement; -use super::Context; -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; use nom::branch::alt; use nom::character::complete::line_ending; use nom::character::complete::multispace0; @@ -18,6 +11,13 @@ use nom::combinator::recognize; use nom::multi::many0; use nom::sequence::tuple; +use super::parser_context::ContextElement; +use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; +use crate::parser::parser_with_context::parser_with_context; + pub const WORD_CONSTITUENT_CHARACTERS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; From 76c690870734c6c98dc453fa4a0c7ddfbeb4584e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 21:50:34 -0400 Subject: [PATCH 24/31] Fix the test runner script to work on mixed case names. --- scripts/run_integration_test.bash | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/run_integration_test.bash b/scripts/run_integration_test.bash index 9185c65..ca16183 100755 --- a/scripts/run_integration_test.bash +++ b/scripts/run_integration_test.bash @@ -13,9 +13,9 @@ function get_test_names { test_file_full_path=$(readlink -f "$test_file") relative_to_samples=$(realpath --relative-to "$samples_dir" "$test_file_full_path") without_extension="${relative_to_samples%.org}" - echo "${without_extension/\//_}" + echo "${without_extension/\//_}" | tr '[:upper:]' '[:lower:]' else - echo "$test_file" + echo "$test_file" | tr '[:upper:]' '[:lower:]' fi done } From 208e2cfe87f17270a65bf29063452206c61bceac Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 21:51:53 -0400 Subject: [PATCH 25/31] Clean up imports. --- src/parser/dynamic_block.rs | 1 - src/parser/greater_block.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/parser/dynamic_block.rs b/src/parser/dynamic_block.rs index ed7fbe1..d9eac68 100644 --- a/src/parser/dynamic_block.rs +++ b/src/parser/dynamic_block.rs @@ -4,7 +4,6 @@ use nom::bytes::complete::tag_no_case; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; -use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::opt; diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index 06c8656..4adb620 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -4,7 +4,6 @@ use nom::bytes::complete::tag_no_case; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; -use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::opt; From f96448154495f46f4552c9081b7fef7ed3baa6f6 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 22:06:34 -0400 Subject: [PATCH 26/31] Switch to using plain text with no additional exit matcher added. --- src/parser/lesser_block.rs | 18 +++++------------- src/parser/util.rs | 11 +++++++++++ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/parser/lesser_block.rs b/src/parser/lesser_block.rs index 0d936dc..fd80818 100644 --- a/src/parser/lesser_block.rs +++ b/src/parser/lesser_block.rs @@ -26,11 +26,11 @@ use crate::parser::object_parser::standard_set_object; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; -use crate::parser::plain_text::plain_text; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::start_of_line; +use crate::parser::util::text_until_exit; #[tracing::instrument(ret, level = "debug")] pub fn verse_block<'r, 's>( @@ -102,9 +102,7 @@ pub fn comment_block<'r, 's>( None => None, }; - let (remaining, contents) = map(parser_with_context!(plain_text)(&parser_context), |obj| { - obj.source - })(remaining)?; + let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let source = get_consumed(input, remaining); @@ -140,9 +138,7 @@ pub fn example_block<'r, 's>( None => None, }; - let (remaining, contents) = map(parser_with_context!(plain_text)(&parser_context), |obj| { - obj.source - })(remaining)?; + let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let source = get_consumed(input, remaining); @@ -179,9 +175,7 @@ pub fn export_block<'r, 's>( None => None, }; - let (remaining, contents) = map(parser_with_context!(plain_text)(&parser_context), |obj| { - obj.source - })(remaining)?; + let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let source = get_consumed(input, remaining); @@ -215,9 +209,7 @@ pub fn src_block<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st None => None, }; - let (remaining, contents) = map(parser_with_context!(plain_text)(&parser_context), |obj| { - obj.source - })(remaining)?; + let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let source = get_consumed(input, remaining); diff --git a/src/parser/util.rs b/src/parser/util.rs index 45b1f09..b5847f2 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,4 +1,5 @@ use nom::branch::alt; +use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::multispace0; use nom::character::complete::none_of; @@ -8,7 +9,9 @@ use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; +use nom::combinator::verify; use nom::multi::many0; +use nom::multi::many_till; use nom::sequence::tuple; use super::parser_context::ContextElement; @@ -203,6 +206,14 @@ pub fn whitespace_eof(input: &str) -> Res<&str, &str> { recognize(tuple((multispace0, eof)))(input) } +#[tracing::instrument(ret, level = "debug")] +pub fn text_until_exit<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + recognize(verify( + many_till(anychar, parser_with_context!(exit_matcher_parser)(context)), + |(children, _exit_contents)| !children.is_empty(), + ))(input) +} + #[cfg(test)] mod tests { use super::*; From da76d3714c1ecd17eb6f75cdee99fb9a84601121 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 22:10:49 -0400 Subject: [PATCH 27/31] Identify a problem. --- build.rs | 3 +-- toy_language.txt | 7 +------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/build.rs b/build.rs index 4a83920..b8c4dd5 100644 --- a/build.rs +++ b/build.rs @@ -78,8 +78,7 @@ fn is_expect_fail(name: &str) -> Option<&str> { "element_container_priority_footnote_definition_greater_block" => Some("Need to implement subscript."), "element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."), "element_container_priority_section_greater_block" => Some("Need to implement subscript."), - "exit_matcher_investigation_bold_with_asterisk_inside" => Some("Need to implement bold."), - "exit_matcher_investigation_table_list" => Some("Need to implement bold."), + // "exit_matcher_investigation_bold_with_asterisk_inside" => Some("Need to implement bold."), "keyword_affiliated_keyword" => Some("Need to implement link."), "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), "text_markup_opening_and_closing" => Some("Need to implement bold and link."), diff --git a/toy_language.txt b/toy_language.txt index ecf1b97..d990b43 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -1,6 +1 @@ -[fn:1] footnote. -:drawername: - - -:end: -Is this still in the footnote? +foo *bar baz * lorem* ipsum From 9968aedd7412e9987d100cecf1c61d058432fc51 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 22:34:37 -0400 Subject: [PATCH 28/31] Make sure text markup doesn't have interior spaces. --- src/parser/text_markup.rs | 6 ++++++ src/parser/util.rs | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index db3ce05..1c36c51 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -2,9 +2,11 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::anychar; use nom::character::complete::line_ending; +use nom::character::complete::multispace1; use nom::character::complete::one_of; use nom::character::complete::space0; use nom::combinator::map; +use nom::combinator::not; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; @@ -30,6 +32,7 @@ use crate::parser::Object; use crate::parser::StrikeThrough; use crate::parser::Underline; use crate::parser::Verbatim; +use crate::parser::util::preceded_by_whitespace; #[tracing::instrument(ret, level = "debug")] pub fn text_markup<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Object<'s>> { @@ -112,6 +115,7 @@ fn _text_markup_object<'r, 's, 'x>( ) -> Res<&'s str, Vec>> { let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; + let (remaining, _peek_not_whitespace) = peek(not(multispace1))(remaining)?; let text_markup_end_specialized = text_markup_end(open); let parser_context = context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { @@ -148,6 +152,7 @@ fn _text_markup_string<'r, 's, 'x>( ) -> Res<&'s str, &'s str> { let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; + let (remaining, _peek_not_whitespace) = peek(not(multispace1))(remaining)?; let text_markup_end_specialized = text_markup_end(open); let parser_context = context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { @@ -208,6 +213,7 @@ fn _text_markup_end<'r, 's, 'x>( input: &'s str, marker_symbol: &'x str, ) -> Res<&'s str, &'s str> { + not(parser_with_context!(preceded_by_whitespace)(context))(input)?; let (remaining, _marker) = terminated( tag(marker_symbol), peek(parser_with_context!(post)(context)), diff --git a/src/parser/util.rs b/src/parser/util.rs index b5847f2..fe1169e 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -151,6 +151,28 @@ pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&' Ok((input, ())) } +/// Check that we are at the start of a line +#[tracing::instrument(ret, level = "debug")] +pub fn preceded_by_whitespace<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + Some('\n') | Some('\r') | Some(' ') | Some('\t') => {} + // If None, we are at the start of the file which is not allowed + None | Some(_) => { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not preceded by whitespace.", + )))); + } + }; + Ok((input, ())) +} + /// Pull one non-whitespace character. /// /// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace. From c2bf2c6994db993802049d031e1b6b575d4b298e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 22:35:45 -0400 Subject: [PATCH 29/31] Enable a working test. --- build.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/build.rs b/build.rs index b8c4dd5..fe47ad4 100644 --- a/build.rs +++ b/build.rs @@ -78,7 +78,6 @@ fn is_expect_fail(name: &str) -> Option<&str> { "element_container_priority_footnote_definition_greater_block" => Some("Need to implement subscript."), "element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."), "element_container_priority_section_greater_block" => Some("Need to implement subscript."), - // "exit_matcher_investigation_bold_with_asterisk_inside" => Some("Need to implement bold."), "keyword_affiliated_keyword" => Some("Need to implement link."), "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), "text_markup_opening_and_closing" => Some("Need to implement bold and link."), From 37b91d171d98e0aae6d673928ec58b99fb1c012f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 22:56:36 -0400 Subject: [PATCH 30/31] Compare the objects in heading titles when diffing the asts. --- src/compare/diff.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 0583b74..1f13463 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -243,6 +243,22 @@ fn compare_heading<'s>( this_status = DiffStatus::Bad; } + let title = { + let children = emacs.as_list()?; + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let title = attributes_map + .get(":title") + .ok_or("Missing :title attribute."); + *title? + }; + for (emacs_child, rust_child) in title.as_list()?.iter().zip(rust.title.iter()) { + child_status.push(compare_object(source, emacs_child, rust_child)?); + } + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { match rust_child { DocumentElement::Heading(rust_heading) => { From 1044625acb5059ccda224124764a8b4e4176206a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Apr 2023 22:58:10 -0400 Subject: [PATCH 31/31] Clean up import. --- src/parser/lesser_block.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parser/lesser_block.rs b/src/parser/lesser_block.rs index fd80818..ace7a8b 100644 --- a/src/parser/lesser_block.rs +++ b/src/parser/lesser_block.rs @@ -6,7 +6,6 @@ use nom::character::complete::space0; use nom::character::complete::space1; use nom::combinator::consumed; use nom::combinator::eof; -use nom::combinator::map; use nom::combinator::opt; use nom::combinator::verify; use nom::multi::many_till;