From 93f1bcd7445876b151acb1fd1e31c36adab208c6 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 29 Sep 2023 20:47:18 -0400 Subject: [PATCH 01/11] Add getters for Document. --- src/types/document.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/types/document.rs b/src/types/document.rs index c8001e51..974d77b0 100644 --- a/src/types/document.rs +++ b/src/types/document.rs @@ -1,3 +1,4 @@ +use super::macros::ref_getter; use super::Element; use super::GetStandardProperties; use super::Object; @@ -8,9 +9,9 @@ pub type HeadlineLevel = u16; #[derive(Debug)] pub struct Document<'s> { - pub source: &'s str, - pub zeroth_section: Option>, - pub children: Vec>, + pub(crate) source: &'s str, + pub(crate) zeroth_section: Option>, + pub(crate) children: Vec>, } #[derive(Debug)] @@ -70,3 +71,8 @@ impl<'s> StandardProperties<'s> for Heading<'s> { self.source } } + +impl<'s> Document<'s> { + ref_getter!(get_zeroth_section, zeroth_section, Option>); + ref_getter!(get_children, children, Vec>); +} From d1dac0b8de96cfb22ed865dad16cd8a8e4011669 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 29 Sep 2023 20:23:02 -0400 Subject: [PATCH 02/11] Compare document category. --- org_mode_samples/document/category.org | 1 + src/compare/diff.rs | 23 ++++++++++++++++++++++- src/parser/document.rs | 1 + src/types/document.rs | 3 +++ 4 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 org_mode_samples/document/category.org diff --git a/org_mode_samples/document/category.org b/org_mode_samples/document/category.org new file mode 100644 index 00000000..8dfa4471 --- /dev/null +++ b/org_mode_samples/document/category.org @@ -0,0 +1 @@ +#+CATEGORY: theory diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 96757e95..ef37959b 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -438,7 +438,28 @@ pub fn compare_document<'s>( Ok(_) => {} } - // TODO: Compare :path :CATEGORY + // TODO: Compare :path + + // Compare category + let category = get_property_quoted_string(emacs, ":CATEGORY")?; + match (category.as_ref(), rust.category) { + (None, None) => {} + (None, Some(_)) | (Some(_), None) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Category mismatch (emacs != rust) {:?} != {:?}", + category, rust.category + )); + } + (Some(e), Some(r)) if e != r => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Category mismatch (emacs != rust) {:?} != {:?}", + category, rust.category + )); + } + (Some(_), Some(_)) => {} + }; // Skipping "org-data" and its properties for (i, token) in children.iter().skip(2).enumerate() { diff --git a/src/parser/document.rs b/src/parser/document.rs index 4d6a64be..53eb4e21 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -148,6 +148,7 @@ fn _document<'b, 'g, 'r, 's>( remaining, Document { source: source.into(), + category: None, zeroth_section, children, }, diff --git a/src/types/document.rs b/src/types/document.rs index 974d77b0..bb0c99c6 100644 --- a/src/types/document.rs +++ b/src/types/document.rs @@ -1,4 +1,5 @@ use super::macros::ref_getter; +use super::macros::simple_getter; use super::Element; use super::GetStandardProperties; use super::Object; @@ -10,6 +11,7 @@ pub type HeadlineLevel = u16; #[derive(Debug)] pub struct Document<'s> { pub(crate) source: &'s str, + pub(crate) category: Option<&'s str>, pub(crate) zeroth_section: Option>, pub(crate) children: Vec>, } @@ -73,6 +75,7 @@ impl<'s> StandardProperties<'s> for Heading<'s> { } impl<'s> Document<'s> { + simple_getter!(get_category, category, Option<&'s str>); ref_getter!(get_zeroth_section, zeroth_section, Option>); ref_getter!(get_children, children, Vec>); } From 3fb2b5d31cc7c2131f0782a4625ad16f8314c54d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 29 Sep 2023 21:14:55 -0400 Subject: [PATCH 03/11] Undo the getters change. The getters were a good idea, but if we are going to support editing later, we will need to expose the fields or write A LOT of boiler-plate. The getters also would prevent people from moving values out of the AST without even more boiler-plate. It is simply not worth it at this stage, so we will need to tolerate frequently changing semver versions as the public interface changes since *every* field in the AST is public. --- src/compare/diff.rs | 5 ++++- src/parser/document.rs | 1 + src/types/document.rs | 19 +++++++------------ src/types/greater_element.rs | 32 ++++++++------------------------ src/types/macros.rs | 21 --------------------- src/types/mod.rs | 1 - 6 files changed, 20 insertions(+), 59 deletions(-) delete mode 100644 src/types/macros.rs diff --git a/src/compare/diff.rs b/src/compare/diff.rs index ef37959b..29ea6fec 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -439,10 +439,13 @@ pub fn compare_document<'s>( } // TODO: Compare :path + // :path is a quoted string to the absolute path of the document. + let document_path = get_property_quoted_string(emacs, ":path")?; // Compare category + // :CATEGORY is specified either from "#+CATEGORY:" or it is the file name without the ".org" extension. let category = get_property_quoted_string(emacs, ":CATEGORY")?; - match (category.as_ref(), rust.category) { + match (category.as_ref(), rust.category.as_ref()) { (None, None) => {} (None, Some(_)) | (Some(_), None) => { this_status = DiffStatus::Bad; diff --git a/src/parser/document.rs b/src/parser/document.rs index 53eb4e21..31878d5e 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -149,6 +149,7 @@ fn _document<'b, 'g, 'r, 's>( Document { source: source.into(), category: None, + path: None, zeroth_section, children, }, diff --git a/src/types/document.rs b/src/types/document.rs index bb0c99c6..b5c9e97b 100644 --- a/src/types/document.rs +++ b/src/types/document.rs @@ -1,5 +1,5 @@ -use super::macros::ref_getter; -use super::macros::simple_getter; +use std::path::PathBuf; + use super::Element; use super::GetStandardProperties; use super::Object; @@ -10,10 +10,11 @@ pub type HeadlineLevel = u16; #[derive(Debug)] pub struct Document<'s> { - pub(crate) source: &'s str, - pub(crate) category: Option<&'s str>, - pub(crate) zeroth_section: Option>, - pub(crate) children: Vec>, + pub source: &'s str, + pub category: Option, + pub path: Option, + pub zeroth_section: Option>, + pub children: Vec>, } #[derive(Debug)] @@ -73,9 +74,3 @@ impl<'s> StandardProperties<'s> for Heading<'s> { self.source } } - -impl<'s> Document<'s> { - simple_getter!(get_category, category, Option<&'s str>); - ref_getter!(get_zeroth_section, zeroth_section, Option>); - ref_getter!(get_children, children, Vec>); -} diff --git a/src/types/greater_element.rs b/src/types/greater_element.rs index 2fbe2e14..85f4c579 100644 --- a/src/types/greater_element.rs +++ b/src/types/greater_element.rs @@ -1,7 +1,5 @@ use super::element::Element; use super::lesser_element::TableCell; -use super::macros::ref_getter; -use super::macros::simple_getter; use super::Keyword; use super::Object; use super::StandardProperties; @@ -25,14 +23,14 @@ pub type IndentationLevel = u16; #[derive(Debug)] pub struct PlainListItem<'s> { - pub(crate) source: &'s str, - pub(crate) indentation: IndentationLevel, - pub(crate) bullet: &'s str, - pub(crate) counter: Option, - pub(crate) checkbox: Option<(CheckboxType, &'s str)>, - pub(crate) tag: Vec>, - pub(crate) pre_blank: PlainListItemPreBlank, - pub(crate) children: Vec>, + pub source: &'s str, + pub indentation: IndentationLevel, + pub bullet: &'s str, + pub counter: Option, + pub checkbox: Option<(CheckboxType, &'s str)>, + pub tag: Vec>, + pub pre_blank: PlainListItemPreBlank, + pub children: Vec>, } pub type PlainListItemCounter = u16; @@ -161,20 +159,6 @@ impl<'s> StandardProperties<'s> for TableRow<'s> { } impl<'s> PlainListItem<'s> { - simple_getter!(get_indentation_level, indentation, IndentationLevel); - simple_getter!( - /// Get the bullet - /// - /// Example output: "1. " - get_bullet, - bullet, - &'s str - ); - simple_getter!(get_counter, counter, Option); - simple_getter!(get_pre_blank, pre_blank, PlainListItemPreBlank); - ref_getter!(get_tag, tag, Vec>); - ref_getter!(get_children, children, Vec>); - pub fn get_checkbox(&self) -> Option<&'s str> { self.checkbox.as_ref().map(|(_, checkbox)| *checkbox) } diff --git a/src/types/macros.rs b/src/types/macros.rs deleted file mode 100644 index ce1235a6..00000000 --- a/src/types/macros.rs +++ /dev/null @@ -1,21 +0,0 @@ -// TODO: Would be nice if I didn't have to specify a function name but it looks like concat_idents!() cannot be used to create an ident. -// TODO: Find out if proc macros could do this easier (for example, parsing out the field type) -macro_rules! simple_getter { - ($(#[$meta:meta])* $funcname: ident, $field:ident, $fieldtype:ty) => { - $(#[$meta])* - pub fn $funcname(&self) -> $fieldtype { - self.$field - } - }; -} -pub(crate) use simple_getter; - -macro_rules! ref_getter { - ($(#[$meta:meta])* $funcname: ident, $field:ident, $fieldtype:ty) => { - $(#[$meta])* - pub fn $funcname(&self) -> &$fieldtype { - &self.$field - } - }; -} -pub(crate) use ref_getter; diff --git a/src/types/mod.rs b/src/types/mod.rs index d0ea1f2a..7976ecd1 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -3,7 +3,6 @@ mod element; mod get_standard_properties; mod greater_element; mod lesser_element; -mod macros; mod object; mod source; mod standard_properties; From f1e35e317bce2e4a1177b26b7b4bbaee2259fbe1 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 29 Sep 2023 21:20:23 -0400 Subject: [PATCH 04/11] Compare document path. --- src/compare/diff.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 29ea6fec..1f7e1ce9 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -438,9 +438,31 @@ pub fn compare_document<'s>( Ok(_) => {} } - // TODO: Compare :path + // Compare :path // :path is a quoted string to the absolute path of the document. let document_path = get_property_quoted_string(emacs, ":path")?; + let rust_document_path = rust.path.as_ref().map(|p| p.to_str()).flatten(); + match ( + document_path.as_ref().map(|s| s.as_str()), + rust_document_path, + ) { + (None, None) => {} + (None, Some(_)) | (Some(_), None) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Path mismatch (emacs != rust) {:?} != {:?}", + document_path, rust_document_path + )); + } + (Some(e), Some(r)) if e != r => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Path mismatch (emacs != rust) {:?} != {:?}", + document_path, rust_document_path + )); + } + (Some(_), Some(_)) => {} + }; // Compare category // :CATEGORY is specified either from "#+CATEGORY:" or it is the file name without the ".org" extension. From fc7d4bd9494eca733f6f3b43b93dba235d7e4383 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 29 Sep 2023 21:46:52 -0400 Subject: [PATCH 05/11] Set Document path and category based on file path. --- src/compare/compare.rs | 3 ++- src/compare/parse.rs | 10 +++++--- src/lib.rs | 1 + src/parser/document.rs | 55 +++++++++++++++++++++++++++++++++++++----- src/parser/mod.rs | 2 ++ 5 files changed, 61 insertions(+), 10 deletions(-) diff --git a/src/compare/compare.rs b/src/compare/compare.rs index 02242675..9168155d 100644 --- a/src/compare/compare.rs +++ b/src/compare/compare.rs @@ -8,6 +8,7 @@ use crate::compare::parse::get_org_mode_version; use crate::compare::sexp::sexp; use crate::context::GlobalSettings; use crate::context::LocalFileAccessInterface; +use crate::parser::parse_file_with_settings; use crate::parser::parse_with_settings; pub fn run_anonymous_compare>( @@ -70,7 +71,7 @@ pub fn run_compare_on_file_with_settings>( global_settings.file_access = &file_access_interface; global_settings }; - let rust_parsed = parse_with_settings(org_contents, &global_settings)?; + let rust_parsed = parse_file_with_settings(org_contents, &global_settings, Some(org_path))?; let org_sexp = emacs_parse_file_org_document(org_path, &global_settings)?; let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).map_err(|e| e.to_string())?; diff --git a/src/compare/parse.rs b/src/compare/parse.rs index cfa3e176..3b6e0a3b 100644 --- a/src/compare/parse.rs +++ b/src/compare/parse.rs @@ -76,11 +76,17 @@ where r#"(progn (require 'org) (defun org-table-align () t) + (setq vc-handled-backends nil) {global_settings} + (find-file-read-only "{file_path}") (org-mode) (message "%s" (pp-to-string (org-element-parse-buffer))) )"#, - global_settings = global_settings_elisp(global_settings) + global_settings = global_settings_elisp(global_settings), + file_path = file_path + .as_os_str() + .to_str() + .expect("File name should be valid utf-8.") ); let mut cmd = Command::new("emacs"); let cmd = cmd @@ -89,8 +95,6 @@ where .arg("--no-site-file") .arg("--no-splash") .arg("--batch") - .arg("--insert") - .arg(file_path.as_os_str()) .arg("--eval") .arg(elisp_script); let out = cmd.output()?; diff --git a/src/lib.rs b/src/lib.rs index b8a8d108..da35ff75 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![feature(exit_status_error)] #![feature(trait_alias)] +#![feature(path_file_prefix)] // TODO: #![warn(missing_docs)] #[cfg(feature = "compare")] diff --git a/src/parser/document.rs b/src/parser/document.rs index 31878d5e..a9442388 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,3 +1,5 @@ +use std::path::Path; + use nom::combinator::all_consuming; use nom::combinator::opt; use nom::multi::many0; @@ -25,30 +27,71 @@ use crate::types::Object; /// Parse a full org-mode document. /// -/// This is the main entry point for Organic. It will parse the full contents of the input string as an org-mode document. +/// This is a main entry point for Organic. It will parse the full contents of the input string as an org-mode document without an underlying file attached. #[allow(dead_code)] pub fn parse<'s>(input: &'s str) -> Result, Box> { - parse_with_settings(input, &GlobalSettings::default()) + parse_file_with_settings::<&Path>(input, &GlobalSettings::default(), None) +} + +/// Parse a full org-mode document. +/// +/// This is a main entry point for Organic. It will parse the full contents of the input string as an org-mode document at the file_path. +/// +/// file_path is not used for reading the file contents. It is only used for determining the document category and filling in the path attribute on the Document. +#[allow(dead_code)] +pub fn parse_file<'s, P: AsRef>( + input: &'s str, + file_path: Option

, +) -> Result, Box> { + parse_file_with_settings(input, &GlobalSettings::default(), file_path) } /// Parse a full org-mode document with starting settings. /// -/// This is the secondary entry point for Organic. It will parse the full contents of the input string as an org-mode document starting with the settings you supplied. +/// This is a secondary entry point for Organic. It will parse the full contents of the input string as an org-mode document starting with the settings you supplied without an underlying file attached. /// /// This will not prevent additional settings from being learned during parsing, for example when encountering a "#+TODO". #[allow(dead_code)] pub fn parse_with_settings<'g, 's>( input: &'s str, global_settings: &'g GlobalSettings<'g, 's>, +) -> Result, Box> { + parse_file_with_settings::<&Path>(input, global_settings, None) +} + +/// Parse a full org-mode document with starting settings. +/// +/// This is the secondary entry point for Organic. It will parse the full contents of the input string as an org-mode document at the file_path starting with the settings you supplied. +/// +/// This will not prevent additional settings from being learned during parsing, for example when encountering a "#+TODO". +/// +/// file_path is not used for reading the file contents. It is only used for determining the document category and filling in the path attribute on the Document. +#[allow(dead_code)] +pub fn parse_file_with_settings<'g, 's, P: AsRef>( + input: &'s str, + global_settings: &'g GlobalSettings<'g, 's>, + file_path: Option

, ) -> Result, Box> { let initial_context = ContextElement::document_context(); let initial_context = Context::new(global_settings, List::new(&initial_context)); let wrapped_input = OrgSource::new(input); - let ret = + let mut doc = all_consuming(parser_with_context!(document_org_source)(&initial_context))(wrapped_input) .map_err(|err| err.to_string()) - .map(|(_remaining, parsed_document)| parsed_document); - Ok(ret?) + .map(|(_remaining, parsed_document)| parsed_document)?; + if let Some(file_path) = file_path { + let full_path = file_path.as_ref().canonicalize()?; + if doc.category.is_none() { + let category = full_path + .file_prefix() + .expect("File should have a name.") + .to_str() + .expect("File name should be valid utf-8."); + doc.category = Some(category.to_owned()); + } + doc.path = Some(full_path); + } + Ok(doc) } /// Parse a full org-mode document. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8729294e..b3e27101 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -45,5 +45,7 @@ mod text_markup; mod timestamp; mod util; pub use document::parse; +pub use document::parse_file; +pub use document::parse_file_with_settings; pub use document::parse_with_settings; pub(crate) use org_source::OrgSource; From 6c775869607cc1041978518dfce5e9d5f3c8025c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 29 Sep 2023 22:27:31 -0400 Subject: [PATCH 06/11] Improve error message. --- src/compare/diff.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 1f7e1ce9..5d68d32a 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -512,7 +512,11 @@ pub fn compare_document<'s>( .ok_or("Should have a corresponding heading.")?; child_status.push(compare_heading(rust.source, token, corresponding_heading)?); } else { - return Err("Document should only contain sections and headlines.".into()); + return Err(format!( + "Document should only contain sections and headlines, found: {}", + first_cell + ) + .into()); } } From 896250836b6c195128859593612057c7a62d5cf9 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 29 Sep 2023 22:54:50 -0400 Subject: [PATCH 07/11] Add support for parsing quoted strings containing escaped octals. --- src/compare/sexp.rs | 85 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 21 deletions(-) diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index 0cd558ec..069863a4 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -1,9 +1,10 @@ use std::collections::HashMap; use nom::branch::alt; -use nom::bytes::complete::escaped; use nom::bytes::complete::tag; use nom::bytes::complete::take_till1; +use nom::character::complete::anychar; +use nom::character::complete::digit1; use nom::character::complete::multispace0; use nom::character::complete::multispace1; use nom::character::complete::one_of; @@ -11,6 +12,7 @@ use nom::combinator::map; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; +use nom::combinator::recognize; use nom::multi::separated_list1; use nom::sequence::delimited; use nom::sequence::preceded; @@ -18,6 +20,8 @@ use nom::sequence::tuple; use crate::error::Res; +const MAX_OCTAL_LENGTH: usize = 3; + #[derive(Debug)] pub enum Token<'s> { Atom(&'s str), @@ -35,6 +39,7 @@ pub struct TextWithProperties<'s> { enum ParseState { Normal, Escape, + Octal(Vec), } impl<'s> Token<'s> { @@ -116,7 +121,7 @@ fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { } pub(crate) fn unquote(text: &str) -> Result> { - let mut out = String::with_capacity(text.len()); + let mut out: Vec = Vec::with_capacity(text.len()); if !text.starts_with(r#"""#) { return Err("Quoted text does not start with quote.".into()); } @@ -125,30 +130,53 @@ pub(crate) fn unquote(text: &str) -> Result> } let interior_text = &text[1..(text.len() - 1)]; let mut state = ParseState::Normal; - for current_char in interior_text.chars().into_iter() { + for current_char in interior_text.bytes().into_iter() { + // Check to see if octal finished state = match (state, current_char) { - (ParseState::Normal, '\\') => ParseState::Escape, + (ParseState::Octal(octal), b'0'..=b'7') if octal.len() < MAX_OCTAL_LENGTH => { + ParseState::Octal(octal) + } + (ParseState::Octal(octal), _) => { + let octal_number_string = String::from_utf8(octal)?; + let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?; + out.push(decoded_byte); + ParseState::Normal + } + (state, _) => state, + }; + + state = match (state, current_char) { + (ParseState::Normal, b'\\') => ParseState::Escape, (ParseState::Normal, _) => { out.push(current_char); ParseState::Normal } - (ParseState::Escape, 'n') => { - out.push('\n'); + (ParseState::Escape, b'n') => { + out.push(b'\n'); ParseState::Normal } - (ParseState::Escape, '\\') => { - out.push('\\'); + (ParseState::Escape, b'\\') => { + out.push(b'\\'); ParseState::Normal } - (ParseState::Escape, '"') => { - out.push('"'); + (ParseState::Escape, b'"') => { + out.push(b'"'); ParseState::Normal } - _ => todo!(), + (ParseState::Escape, b'0'..=b'7') => { + let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH); + octal.push(current_char); + ParseState::Octal(octal) + } + (ParseState::Octal(mut octal), b'0'..=b'7') => { + octal.push(current_char); + ParseState::Octal(octal) + } + _ => panic!("Invalid state unquoting string."), }; } - Ok(out) + Ok(String::from_utf8(out)?) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -210,15 +238,30 @@ fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { - let (remaining, _) = tag(r#"""#)(input)?; - let (remaining, _) = escaped( - take_till1(|c| match c { - '\\' | '"' => true, - _ => false, - }), - '\\', - one_of(r#""n\\"#), - )(remaining)?; + let (mut remaining, _) = tag(r#"""#)(input)?; + let mut in_escape = false; + loop { + if in_escape { + let (remain, _) = alt((recognize(one_of(r#""n\\"#)), digit1))(remaining)?; + remaining = remain; + in_escape = false; + } else { + let end_quote = tag::<_, _, nom::error::Error<_>>(r#"""#)(remaining); + if end_quote.is_ok() { + break; + } + + let escape_backslash = tag::<_, _, nom::error::Error<_>>("\\")(remaining); + if let Ok((remain, _)) = escape_backslash { + remaining = remain; + in_escape = true; + continue; + } + + let (remain, _) = anychar(remaining)?; + remaining = remain; + } + } let (remaining, _) = tag(r#"""#)(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, Token::Atom(source.into()))) From 846a8b3729eb5d528730670d3b357cd3ca04bf8a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 30 Sep 2023 00:14:26 -0400 Subject: [PATCH 08/11] Support reading category from in-buffer-settings. --- src/context/global_settings.rs | 3 +++ src/parser/document.rs | 2 +- src/parser/in_buffer_settings.rs | 8 ++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/context/global_settings.rs b/src/context/global_settings.rs index c12bbcf5..66c1d745 100644 --- a/src/context/global_settings.rs +++ b/src/context/global_settings.rs @@ -12,6 +12,8 @@ pub struct GlobalSettings<'g, 's> { pub radio_targets: Vec<&'g Vec>>, pub file_access: &'g dyn FileAccessInterface, pub in_progress_todo_keywords: BTreeSet, + pub category: Option, + pub complete_todo_keywords: BTreeSet, /// Set to true to allow for plain lists using single letters as the bullet in the same way that numbers are used. /// @@ -39,6 +41,7 @@ impl<'g, 's> GlobalSettings<'g, 's> { working_directory: None, }, in_progress_todo_keywords: BTreeSet::new(), + category: None, complete_todo_keywords: BTreeSet::new(), list_allow_alphabetical: false, tab_width: DEFAULT_TAB_WIDTH, diff --git a/src/parser/document.rs b/src/parser/document.rs index a9442388..b7d91a51 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -191,7 +191,7 @@ fn _document<'b, 'g, 'r, 's>( remaining, Document { source: source.into(), - category: None, + category: context.get_global_settings().category.clone(), path: None, zeroth_section, children, diff --git a/src/parser/in_buffer_settings.rs b/src/parser/in_buffer_settings.rs index 167d4284..b95ca742 100644 --- a/src/parser/in_buffer_settings.rs +++ b/src/parser/in_buffer_settings.rs @@ -111,6 +111,14 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( } } + // Category + for kw in keywords + .iter() + .filter(|kw| kw.key.eq_ignore_ascii_case("category")) + { + new_settings.category = Some(kw.value.to_owned()); + } + Ok(new_settings) } From 6ed35f4674485c62a5a0f50d231fcb2e867fb03a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 30 Sep 2023 00:16:19 -0400 Subject: [PATCH 09/11] Minor cleanup. --- src/compare/compare.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/compare/compare.rs b/src/compare/compare.rs index 9168155d..f77c8088 100644 --- a/src/compare/compare.rs +++ b/src/compare/compare.rs @@ -28,8 +28,7 @@ pub fn run_anonymous_compare_with_settings>( // TODO: This is a work-around to pretend that dos line endings do not exist. It would be better to handle the difference in line endings. let org_contents = org_contents.as_ref().replace("\r\n", "\n"); let org_contents = org_contents.as_str(); - eprintln!("Using emacs version: {}", get_emacs_version()?.trim()); - eprintln!("Using org-mode version: {}", get_org_mode_version()?.trim()); + print_versions()?; let rust_parsed = parse_with_settings(org_contents, global_settings)?; let org_sexp = emacs_parse_anonymous_org_document(org_contents, global_settings)?; let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).map_err(|e| e.to_string())?; @@ -54,8 +53,7 @@ pub fn run_compare_on_file_with_settings>( global_settings: &GlobalSettings, ) -> Result<(), Box> { let org_path = org_path.as_ref(); - eprintln!("Using emacs version: {}", get_emacs_version()?.trim()); - eprintln!("Using org-mode version: {}", get_org_mode_version()?.trim()); + print_versions()?; let parent_directory = org_path .parent() .ok_or("Should be contained inside a directory.")?; @@ -89,3 +87,9 @@ pub fn run_compare_on_file_with_settings>( Ok(()) } + +fn print_versions() -> Result<(), Box> { + eprintln!("Using emacs version: {}", get_emacs_version()?.trim()); + eprintln!("Using org-mode version: {}", get_org_mode_version()?.trim()); + Ok(()) +} From d38b0a84f67506edecf053df4e24c8e1c5ecfa62 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 30 Sep 2023 01:26:24 -0400 Subject: [PATCH 10/11] Fix handling file names with periods before the file extension. --- docker/organic_test/Dockerfile | 2 +- org_mode_samples/document/category_multiple.org | 5 +++++ src/parser/document.rs | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 org_mode_samples/document/category_multiple.org diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index 86f3a405..5d6a6fd6 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -88,7 +88,7 @@ ARG DOOMEMACS_PATH=/foreign_documents/doomemacs ARG DOOMEMACS_REPO=https://github.com/doomemacs/doomemacs.git RUN mkdir -p $DOOMEMACS_PATH && git -C $DOOMEMACS_PATH init --initial-branch=main && git -C $DOOMEMACS_PATH remote add origin $DOOMEMACS_REPO && git -C $DOOMEMACS_PATH fetch origin $DOOMEMACS_VERSION && git -C $DOOMEMACS_PATH checkout FETCH_HEAD -ARG WORG_VERSION=0c8d5679b536af450b61812246a3e02b8103f4b8 +ARG WORG_VERSION=ba6cda890f200d428a5d68e819eef15b5306055f ARG WORG_PATH=/foreign_documents/worg ARG WORG_REPO=https://git.sr.ht/~bzg/worg RUN mkdir -p $WORG_PATH && git -C $WORG_PATH init --initial-branch=main && git -C $WORG_PATH remote add origin $WORG_REPO && git -C $WORG_PATH fetch origin $WORG_VERSION && git -C $WORG_PATH checkout FETCH_HEAD diff --git a/org_mode_samples/document/category_multiple.org b/org_mode_samples/document/category_multiple.org new file mode 100644 index 00000000..591cac3a --- /dev/null +++ b/org_mode_samples/document/category_multiple.org @@ -0,0 +1,5 @@ +#+CATEGORY: foo +#+CATEGORY: bar +#+begin_src text +#+CATEGORY: baz +#+end_src diff --git a/src/parser/document.rs b/src/parser/document.rs index b7d91a51..f50291c3 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -83,7 +83,7 @@ pub fn parse_file_with_settings<'g, 's, P: AsRef>( let full_path = file_path.as_ref().canonicalize()?; if doc.category.is_none() { let category = full_path - .file_prefix() + .file_stem() .expect("File should have a name.") .to_str() .expect("File name should be valid utf-8."); From 186201a4b57e1218c2d52f6a380f795882d1df5f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 30 Sep 2023 14:35:22 -0400 Subject: [PATCH 11/11] Remove category from global settings. This setting does not impact parsing so we can iterate over the final document to find the keywords. --- src/context/global_settings.rs | 3 --- src/parser/document.rs | 19 +++++++++++++++++-- src/parser/in_buffer_settings.rs | 8 -------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/context/global_settings.rs b/src/context/global_settings.rs index 66c1d745..c12bbcf5 100644 --- a/src/context/global_settings.rs +++ b/src/context/global_settings.rs @@ -12,8 +12,6 @@ pub struct GlobalSettings<'g, 's> { pub radio_targets: Vec<&'g Vec>>, pub file_access: &'g dyn FileAccessInterface, pub in_progress_todo_keywords: BTreeSet, - pub category: Option, - pub complete_todo_keywords: BTreeSet, /// Set to true to allow for plain lists using single letters as the bullet in the same way that numbers are used. /// @@ -41,7 +39,6 @@ impl<'g, 's> GlobalSettings<'g, 's> { working_directory: None, }, in_progress_todo_keywords: BTreeSet::new(), - category: None, complete_todo_keywords: BTreeSet::new(), list_allow_alphabetical: false, tab_width: DEFAULT_TAB_WIDTH, diff --git a/src/parser/document.rs b/src/parser/document.rs index f50291c3..3eb3eeb4 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -149,7 +149,7 @@ fn document_org_source<'b, 'g, 'r, 's>( let new_context = context.with_global_settings(&new_settings); let context = &new_context; - let (remaining, document) = + let (remaining, mut document) = _document(context, input).map(|(rem, out)| (Into::<&str>::into(rem), out))?; { // If there are radio targets in this document then we need to parse the entire document again with the knowledge of the radio targets. @@ -173,6 +173,21 @@ fn document_org_source<'b, 'g, 'r, 's>( return Ok((remaining.into(), document)); } } + + // Find final in-buffer settings that do not impact parsing + document.category = Into::::into(&document) + .into_iter() + .filter_map(|ast_node| { + if let AstNode::Keyword(ast_node) = ast_node { + if ast_node.key.eq_ignore_ascii_case("category") { + return Some(ast_node); + } + } + None + }) + .last() + .map(|kw| kw.value.to_owned()); + Ok((remaining.into(), document)) } @@ -191,7 +206,7 @@ fn _document<'b, 'g, 'r, 's>( remaining, Document { source: source.into(), - category: context.get_global_settings().category.clone(), + category: None, path: None, zeroth_section, children, diff --git a/src/parser/in_buffer_settings.rs b/src/parser/in_buffer_settings.rs index b95ca742..167d4284 100644 --- a/src/parser/in_buffer_settings.rs +++ b/src/parser/in_buffer_settings.rs @@ -111,14 +111,6 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( } } - // Category - for kw in keywords - .iter() - .filter(|kw| kw.key.eq_ignore_ascii_case("category")) - { - new_settings.category = Some(kw.value.to_owned()); - } - Ok(new_settings) }