From cfcf6443cac1cc779aa9db981b80ea328f6c909e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 15 Apr 2023 16:27:28 -0400 Subject: [PATCH 1/8] Added example org-mode file with comments. --- org_mode_samples/comment/Makefile | 23 +++++++++++++++++++ .../comment/multiline_comment.org | 5 ++++ 2 files changed, 28 insertions(+) create mode 100644 org_mode_samples/comment/Makefile create mode 100644 org_mode_samples/comment/multiline_comment.org diff --git a/org_mode_samples/comment/Makefile b/org_mode_samples/comment/Makefile new file mode 100644 index 0000000..c47a86c --- /dev/null +++ b/org_mode_samples/comment/Makefile @@ -0,0 +1,23 @@ +SHELL := bash +.ONESHELL: +.SHELLFLAGS := -eu -o pipefail -c +.DELETE_ON_ERROR: +MAKEFLAGS += --warn-undefined-variables +MAKEFLAGS += --no-builtin-rules +SRCFILES := $(wildcard *.org) +OUTFILES := $(patsubst %.org,%.tree.txt,$(SRCFILES)) + +ifeq ($(origin .RECIPEPREFIX), undefined) + $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later) +endif +.RECIPEPREFIX = > + +.PHONY: all +all: $(OUTFILES) + +.PHONY: clean +clean: +> rm -rf $(OUTFILES) + +%.tree.txt: %.org ../common.el ../dump_org_ast.bash +> ../dump_org_ast.bash $< $@ diff --git a/org_mode_samples/comment/multiline_comment.org b/org_mode_samples/comment/multiline_comment.org new file mode 100644 index 0000000..6c5fdee --- /dev/null +++ b/org_mode_samples/comment/multiline_comment.org @@ -0,0 +1,5 @@ +# Comment +# +# At the top of the file +foo +# Another comment From 1b10b197f2eb4dec058ac105233fe2e906ce46e9 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 15 Apr 2023 16:31:38 -0400 Subject: [PATCH 2/8] Initial structure for adding support for comments. --- src/compare/diff.rs | 1 + src/parser/comment.rs | 1 + src/parser/element.rs | 11 ++++++++++- src/parser/lesser_element.rs | 5 +++++ src/parser/mod.rs | 2 ++ 5 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 src/parser/comment.rs diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 2520af2..51d5100 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -216,6 +216,7 @@ fn compare_element<'s>( Element::PlainList(obj) => compare_plain_list(source, emacs, obj), Element::GreaterBlock(obj) => compare_greater_block(source, emacs, obj), Element::FootnoteDefinition(obj) => compare_footnote_definition(source, emacs, obj), + Element::Comment(obj) => todo!(), } } diff --git a/src/parser/comment.rs b/src/parser/comment.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/parser/comment.rs @@ -0,0 +1 @@ + diff --git a/src/parser/element.rs b/src/parser/element.rs index 9d2778b..24ece5b 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,15 +1,16 @@ -use super::PlainListItem; use super::error::Res; use super::footnote_definition::footnote_definition; use super::greater_block::greater_block; use super::greater_element::FootnoteDefinition; use super::greater_element::GreaterBlock; use super::greater_element::PlainList; +use super::lesser_element::Comment; use super::lesser_element::Paragraph; use super::paragraph::paragraph; use super::plain_list::plain_list; use super::source::Source; use super::Context; +use super::PlainListItem; use crate::parser::parser_with_context::parser_with_context; use nom::branch::alt; use nom::combinator::map; @@ -20,6 +21,7 @@ pub enum Element<'s> { PlainList(PlainList<'s>), GreaterBlock(GreaterBlock<'s>), FootnoteDefinition(FootnoteDefinition<'s>), + Comment(Comment<'s>), } impl<'s> Source<'s> for Element<'s> { @@ -29,6 +31,7 @@ impl<'s> Source<'s> for Element<'s> { Element::PlainList(obj) => obj.source, Element::GreaterBlock(obj) => obj.source, Element::FootnoteDefinition(obj) => obj.source, + Element::Comment(obj) => obj.source, } } } @@ -63,6 +66,12 @@ impl<'s> Source<'s> for FootnoteDefinition<'s> { } } +impl<'s> Source<'s> for Comment<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + #[tracing::instrument(ret, level = "debug")] pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> { let non_paragraph_matcher = parser_with_context!(non_paragraph_element)(context); diff --git a/src/parser/lesser_element.rs b/src/parser/lesser_element.rs index 5abc4b3..110e647 100644 --- a/src/parser/lesser_element.rs +++ b/src/parser/lesser_element.rs @@ -5,3 +5,8 @@ pub struct Paragraph<'s> { pub source: &'s str, pub children: Vec>, } + +#[derive(Debug)] +pub struct Comment<'s> { + pub source: &'s str, +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 169d5ea..c6dae3e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,3 +1,4 @@ +mod comment; mod document; mod element; mod error; @@ -24,6 +25,7 @@ pub use greater_element::FootnoteDefinition; pub use greater_element::GreaterBlock; pub use greater_element::PlainList; pub use greater_element::PlainListItem; +pub use lesser_element::Comment; pub use lesser_element::Paragraph; pub use source::Source; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; From 26e0ad58004a6b9194fc000ae7fc4e95ce554d47 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 15 Apr 2023 16:34:33 -0400 Subject: [PATCH 3/8] Add an example exploring indentation with comments. --- org_mode_samples/comment/indented.org | 5 +++++ src/parser/comment.rs | 9 +++++++++ 2 files changed, 14 insertions(+) create mode 100644 org_mode_samples/comment/indented.org diff --git a/org_mode_samples/comment/indented.org b/org_mode_samples/comment/indented.org new file mode 100644 index 0000000..4360a35 --- /dev/null +++ b/org_mode_samples/comment/indented.org @@ -0,0 +1,5 @@ +# Comment + # indented line +# At the top of the file +foo +# Another comment diff --git a/src/parser/comment.rs b/src/parser/comment.rs index 8b13789..8152259 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -1 +1,10 @@ +use super::Context; +use crate::parser::error::Res; +use crate::parser::util::start_of_line; +use crate::parser::Comment; +#[tracing::instrument(ret, level = "debug")] +pub fn comment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Comment<'s>> { + start_of_line(context, input)?; + todo!() +} From d1a7d0b835f14a4f5359d5a6ea5c63f9fa3f1a47 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 15 Apr 2023 16:37:25 -0400 Subject: [PATCH 4/8] Show that trailing whitespace belongs to comments. --- org_mode_samples/comment/indented.org | 3 +++ src/parser/comment.rs | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/org_mode_samples/comment/indented.org b/org_mode_samples/comment/indented.org index 4360a35..b98916d 100644 --- a/org_mode_samples/comment/indented.org +++ b/org_mode_samples/comment/indented.org @@ -1,5 +1,8 @@ # Comment # indented line # At the top of the file + + + foo # Another comment diff --git a/src/parser/comment.rs b/src/parser/comment.rs index 8152259..21ad4ab 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -8,3 +8,8 @@ pub fn comment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, start_of_line(context, input)?; todo!() } + +fn comment_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + start_of_line(context, input)?; + todo!() +} From 33bc1af17d47b9b9e9a9c7b30370eb3070690fc0 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 15 Apr 2023 16:53:58 -0400 Subject: [PATCH 5/8] First stab at implementing comments. --- src/compare/diff.rs | 44 ++++++++++++++++++++++++++++++++++++++++++- src/parser/comment.rs | 27 +++++++++++++++++++++++--- src/parser/element.rs | 3 +++ 3 files changed, 70 insertions(+), 4 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 51d5100..89f600a 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,5 +1,6 @@ use super::sexp::Token; use crate::compare::util::get_offsets; +use crate::parser::Comment; use crate::parser::Document; use crate::parser::DocumentElement; use crate::parser::Element; @@ -216,7 +217,7 @@ fn compare_element<'s>( Element::PlainList(obj) => compare_plain_list(source, emacs, obj), Element::GreaterBlock(obj) => compare_greater_block(source, emacs, obj), Element::FootnoteDefinition(obj) => compare_footnote_definition(source, emacs, obj), - Element::Comment(obj) => todo!(), + Element::Comment(obj) => compare_comment(source, emacs, obj), } } @@ -454,3 +455,44 @@ fn compare_footnote_definition<'s>( children: child_status, }) } + +fn compare_comment<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Comment<'s>, +) -> Result> { + let children = emacs.as_list()?; + let first_child = children + .first() + .ok_or("Should have at least one child.")? + .as_atom()?; + if first_child != "comment" { + return Err("Comment should correspond to a comment cell.".into()); + } + let mut child_status = Vec::new(); + let mut this_status = DiffStatus::Good; + + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let begin = attributes_map + .get(":begin") + .ok_or("Missing :begin attribute.")? + .as_atom()?; + let end = attributes_map + .get(":end") + .ok_or("Missing :end attribute.")? + .as_atom()?; + let (rust_begin, rust_end) = get_offsets(source, rust); + if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: "comment".to_owned(), + children: child_status, + }) +} diff --git a/src/parser/comment.rs b/src/parser/comment.rs index 21ad4ab..d674c9b 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -1,15 +1,36 @@ +use nom::bytes::complete::is_not; +use nom::bytes::complete::tag; +use nom::character::complete::space0; +use nom::combinator::not; +use nom::multi::many0; +use nom::sequence::preceded; +use nom::sequence::tuple; + +use super::util::get_consumed; use super::Context; use crate::parser::error::Res; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; use crate::parser::util::start_of_line; use crate::parser::Comment; #[tracing::instrument(ret, level = "debug")] pub fn comment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Comment<'s>> { - start_of_line(context, input)?; - todo!() + let comment_line_matcher = parser_with_context!(comment_line)(context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(context); + let (remaining, first_line) = comment_line_matcher(input)?; + let (remaining, remaining_lines) = + many0(preceded(not(exit_matcher), comment_line_matcher))(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Comment { source })) } +#[tracing::instrument(ret, level = "debug")] fn comment_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { start_of_line(context, input)?; - todo!() + let (remaining, _indent) = space0(input)?; + let (remaining, (_hash, _leading_whitespace, content)) = + tuple((tag("#"), space0, is_not("\r\n")))(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) } diff --git a/src/parser/element.rs b/src/parser/element.rs index 24ece5b..cb7cf19 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,3 +1,4 @@ +use super::comment::comment; use super::error::Res; use super::footnote_definition::footnote_definition; use super::greater_block::greater_block; @@ -90,9 +91,11 @@ pub fn non_paragraph_element<'r, 's>( let plain_list_matcher = parser_with_context!(plain_list)(context); let greater_block_matcher = parser_with_context!(greater_block)(context); let footnote_definition_matcher = parser_with_context!(footnote_definition)(context); + let comment_matcher = parser_with_context!(comment)(context); alt(( map(plain_list_matcher, Element::PlainList), map(greater_block_matcher, Element::GreaterBlock), map(footnote_definition_matcher, Element::FootnoteDefinition), + map(comment_matcher, Element::Comment), ))(input) } From 4e460e4a8c66a59227c4347196fbf56d283550e2 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 15 Apr 2023 16:59:30 -0400 Subject: [PATCH 6/8] Consume line ending in comments. --- src/parser/comment.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/parser/comment.rs b/src/parser/comment.rs index d674c9b..cb7d093 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -1,7 +1,11 @@ +use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag; +use nom::character::complete::line_ending; use nom::character::complete::space0; +use nom::combinator::eof; use nom::combinator::not; +use nom::combinator::opt; use nom::multi::many0; use nom::sequence::preceded; use nom::sequence::tuple; @@ -11,6 +15,7 @@ use super::Context; use crate::parser::error::Res; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; +use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::parser::util::start_of_line; use crate::parser::Comment; @@ -21,6 +26,10 @@ pub fn comment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, let (remaining, first_line) = comment_line_matcher(input)?; let (remaining, remaining_lines) = many0(preceded(not(exit_matcher), comment_line_matcher))(remaining)?; + + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; + let source = get_consumed(input, remaining); Ok((remaining, Comment { source })) } @@ -29,8 +38,8 @@ pub fn comment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, fn comment_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { start_of_line(context, input)?; let (remaining, _indent) = space0(input)?; - let (remaining, (_hash, _leading_whitespace, content)) = - tuple((tag("#"), space0, is_not("\r\n")))(remaining)?; + let (remaining, (_hash, _leading_whitespace, _content, _line_ending)) = + tuple((tag("#"), space0, opt(is_not("\r\n")), alt((line_ending, eof))))(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, source)) } From 6e4aa38fced85829e1970c2c59c6c1256932158d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 15 Apr 2023 17:04:47 -0400 Subject: [PATCH 7/8] Add test demonstrating problem. --- .../comment/require_whitespace_after_hash.org | 3 ++ src/parser/comment.rs | 30 +++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 org_mode_samples/comment/require_whitespace_after_hash.org diff --git a/org_mode_samples/comment/require_whitespace_after_hash.org b/org_mode_samples/comment/require_whitespace_after_hash.org new file mode 100644 index 0000000..d06c06e --- /dev/null +++ b/org_mode_samples/comment/require_whitespace_after_hash.org @@ -0,0 +1,3 @@ +# Comment line +#not a comment +# Comment again diff --git a/src/parser/comment.rs b/src/parser/comment.rs index cb7d093..f304ac8 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -43,3 +43,33 @@ fn comment_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str let source = get_consumed(input, remaining); Ok((remaining, source)) } + +#[cfg(test)] +mod tests { + use crate::parser::parser_context::ContextElement; + use crate::parser::parser_context::ContextTree; + use crate::parser::parser_with_context::parser_with_context; + + use super::*; + + #[test] + fn require_space_after_hash() { + let input = "# Comment line +#not a comment +# Comment again"; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let comment_matcher = + parser_with_context!(comment)(&document_context); + let (remaining, first_comment) = + comment_matcher(input).expect("Parse first comment"); + assert_eq!(remaining, r#"#not a comment +# Comment again"#); + assert_eq!( + first_comment.source, + "# Comment line +" + ); + } +} From 50d05b99bea4cadca676f9301815edfda85ab9bd Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 15 Apr 2023 17:08:22 -0400 Subject: [PATCH 8/8] Require space after hash in comment if comment line is not empty. --- src/parser/comment.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/parser/comment.rs b/src/parser/comment.rs index f304ac8..34c2ee0 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -3,6 +3,7 @@ use nom::bytes::complete::is_not; use nom::bytes::complete::tag; use nom::character::complete::line_ending; use nom::character::complete::space0; +use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::opt; @@ -38,8 +39,8 @@ pub fn comment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, fn comment_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { start_of_line(context, input)?; let (remaining, _indent) = space0(input)?; - let (remaining, (_hash, _leading_whitespace, _content, _line_ending)) = - tuple((tag("#"), space0, opt(is_not("\r\n")), alt((line_ending, eof))))(remaining)?; + let (remaining, (_hash, _leading_whitespace_and_content, _line_ending)) = + tuple((tag("#"), opt(tuple((space1, is_not("\r\n")))), alt((line_ending, eof))))(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, source)) }