diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index 86f3a405..5d6a6fd6 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -88,7 +88,7 @@ ARG DOOMEMACS_PATH=/foreign_documents/doomemacs ARG DOOMEMACS_REPO=https://github.com/doomemacs/doomemacs.git RUN mkdir -p $DOOMEMACS_PATH && git -C $DOOMEMACS_PATH init --initial-branch=main && git -C $DOOMEMACS_PATH remote add origin $DOOMEMACS_REPO && git -C $DOOMEMACS_PATH fetch origin $DOOMEMACS_VERSION && git -C $DOOMEMACS_PATH checkout FETCH_HEAD -ARG WORG_VERSION=0c8d5679b536af450b61812246a3e02b8103f4b8 +ARG WORG_VERSION=ba6cda890f200d428a5d68e819eef15b5306055f ARG WORG_PATH=/foreign_documents/worg ARG WORG_REPO=https://git.sr.ht/~bzg/worg RUN mkdir -p $WORG_PATH && git -C $WORG_PATH init --initial-branch=main && git -C $WORG_PATH remote add origin $WORG_REPO && git -C $WORG_PATH fetch origin $WORG_VERSION && git -C $WORG_PATH checkout FETCH_HEAD diff --git a/org_mode_samples/document/category.org b/org_mode_samples/document/category.org new file mode 100644 index 00000000..8dfa4471 --- /dev/null +++ b/org_mode_samples/document/category.org @@ -0,0 +1 @@ +#+CATEGORY: theory diff --git a/org_mode_samples/document/category_multiple.org b/org_mode_samples/document/category_multiple.org new file mode 100644 index 00000000..591cac3a --- /dev/null +++ b/org_mode_samples/document/category_multiple.org @@ -0,0 +1,5 @@ +#+CATEGORY: foo +#+CATEGORY: bar +#+begin_src text +#+CATEGORY: baz +#+end_src diff --git a/src/compare/compare.rs b/src/compare/compare.rs index 02242675..f77c8088 100644 --- a/src/compare/compare.rs +++ b/src/compare/compare.rs @@ -8,6 +8,7 @@ use crate::compare::parse::get_org_mode_version; use crate::compare::sexp::sexp; use crate::context::GlobalSettings; use crate::context::LocalFileAccessInterface; +use crate::parser::parse_file_with_settings; use crate::parser::parse_with_settings; pub fn run_anonymous_compare>( @@ -27,8 +28,7 @@ pub fn run_anonymous_compare_with_settings>( // TODO: This is a work-around to pretend that dos line endings do not exist. It would be better to handle the difference in line endings. let org_contents = org_contents.as_ref().replace("\r\n", "\n"); let org_contents = org_contents.as_str(); - eprintln!("Using emacs version: {}", get_emacs_version()?.trim()); - eprintln!("Using org-mode version: {}", get_org_mode_version()?.trim()); + print_versions()?; let rust_parsed = parse_with_settings(org_contents, global_settings)?; let org_sexp = emacs_parse_anonymous_org_document(org_contents, global_settings)?; let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).map_err(|e| e.to_string())?; @@ -53,8 +53,7 @@ pub fn run_compare_on_file_with_settings>( global_settings: &GlobalSettings, ) -> Result<(), Box> { let org_path = org_path.as_ref(); - eprintln!("Using emacs version: {}", get_emacs_version()?.trim()); - eprintln!("Using org-mode version: {}", get_org_mode_version()?.trim()); + print_versions()?; let parent_directory = org_path .parent() .ok_or("Should be contained inside a directory.")?; @@ -70,7 +69,7 @@ pub fn run_compare_on_file_with_settings>( global_settings.file_access = &file_access_interface; global_settings }; - let rust_parsed = parse_with_settings(org_contents, &global_settings)?; + let rust_parsed = parse_file_with_settings(org_contents, &global_settings, Some(org_path))?; let org_sexp = emacs_parse_file_org_document(org_path, &global_settings)?; let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).map_err(|e| e.to_string())?; @@ -88,3 +87,9 @@ pub fn run_compare_on_file_with_settings>( Ok(()) } + +fn print_versions() -> Result<(), Box> { + eprintln!("Using emacs version: {}", get_emacs_version()?.trim()); + eprintln!("Using org-mode version: {}", get_org_mode_version()?.trim()); + Ok(()) +} diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 96757e95..5d68d32a 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -438,7 +438,53 @@ pub fn compare_document<'s>( Ok(_) => {} } - // TODO: Compare :path :CATEGORY + // Compare :path + // :path is a quoted string to the absolute path of the document. + let document_path = get_property_quoted_string(emacs, ":path")?; + let rust_document_path = rust.path.as_ref().map(|p| p.to_str()).flatten(); + match ( + document_path.as_ref().map(|s| s.as_str()), + rust_document_path, + ) { + (None, None) => {} + (None, Some(_)) | (Some(_), None) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Path mismatch (emacs != rust) {:?} != {:?}", + document_path, rust_document_path + )); + } + (Some(e), Some(r)) if e != r => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Path mismatch (emacs != rust) {:?} != {:?}", + document_path, rust_document_path + )); + } + (Some(_), Some(_)) => {} + }; + + // Compare category + // :CATEGORY is specified either from "#+CATEGORY:" or it is the file name without the ".org" extension. + let category = get_property_quoted_string(emacs, ":CATEGORY")?; + match (category.as_ref(), rust.category.as_ref()) { + (None, None) => {} + (None, Some(_)) | (Some(_), None) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Category mismatch (emacs != rust) {:?} != {:?}", + category, rust.category + )); + } + (Some(e), Some(r)) if e != r => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Category mismatch (emacs != rust) {:?} != {:?}", + category, rust.category + )); + } + (Some(_), Some(_)) => {} + }; // Skipping "org-data" and its properties for (i, token) in children.iter().skip(2).enumerate() { @@ -466,7 +512,11 @@ pub fn compare_document<'s>( .ok_or("Should have a corresponding heading.")?; child_status.push(compare_heading(rust.source, token, corresponding_heading)?); } else { - return Err("Document should only contain sections and headlines.".into()); + return Err(format!( + "Document should only contain sections and headlines, found: {}", + first_cell + ) + .into()); } } diff --git a/src/compare/parse.rs b/src/compare/parse.rs index cfa3e176..3b6e0a3b 100644 --- a/src/compare/parse.rs +++ b/src/compare/parse.rs @@ -76,11 +76,17 @@ where r#"(progn (require 'org) (defun org-table-align () t) + (setq vc-handled-backends nil) {global_settings} + (find-file-read-only "{file_path}") (org-mode) (message "%s" (pp-to-string (org-element-parse-buffer))) )"#, - global_settings = global_settings_elisp(global_settings) + global_settings = global_settings_elisp(global_settings), + file_path = file_path + .as_os_str() + .to_str() + .expect("File name should be valid utf-8.") ); let mut cmd = Command::new("emacs"); let cmd = cmd @@ -89,8 +95,6 @@ where .arg("--no-site-file") .arg("--no-splash") .arg("--batch") - .arg("--insert") - .arg(file_path.as_os_str()) .arg("--eval") .arg(elisp_script); let out = cmd.output()?; diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index 0cd558ec..069863a4 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -1,9 +1,10 @@ use std::collections::HashMap; use nom::branch::alt; -use nom::bytes::complete::escaped; use nom::bytes::complete::tag; use nom::bytes::complete::take_till1; +use nom::character::complete::anychar; +use nom::character::complete::digit1; use nom::character::complete::multispace0; use nom::character::complete::multispace1; use nom::character::complete::one_of; @@ -11,6 +12,7 @@ use nom::combinator::map; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; +use nom::combinator::recognize; use nom::multi::separated_list1; use nom::sequence::delimited; use nom::sequence::preceded; @@ -18,6 +20,8 @@ use nom::sequence::tuple; use crate::error::Res; +const MAX_OCTAL_LENGTH: usize = 3; + #[derive(Debug)] pub enum Token<'s> { Atom(&'s str), @@ -35,6 +39,7 @@ pub struct TextWithProperties<'s> { enum ParseState { Normal, Escape, + Octal(Vec), } impl<'s> Token<'s> { @@ -116,7 +121,7 @@ fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { } pub(crate) fn unquote(text: &str) -> Result> { - let mut out = String::with_capacity(text.len()); + let mut out: Vec = Vec::with_capacity(text.len()); if !text.starts_with(r#"""#) { return Err("Quoted text does not start with quote.".into()); } @@ -125,30 +130,53 @@ pub(crate) fn unquote(text: &str) -> Result> } let interior_text = &text[1..(text.len() - 1)]; let mut state = ParseState::Normal; - for current_char in interior_text.chars().into_iter() { + for current_char in interior_text.bytes().into_iter() { + // Check to see if octal finished state = match (state, current_char) { - (ParseState::Normal, '\\') => ParseState::Escape, + (ParseState::Octal(octal), b'0'..=b'7') if octal.len() < MAX_OCTAL_LENGTH => { + ParseState::Octal(octal) + } + (ParseState::Octal(octal), _) => { + let octal_number_string = String::from_utf8(octal)?; + let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?; + out.push(decoded_byte); + ParseState::Normal + } + (state, _) => state, + }; + + state = match (state, current_char) { + (ParseState::Normal, b'\\') => ParseState::Escape, (ParseState::Normal, _) => { out.push(current_char); ParseState::Normal } - (ParseState::Escape, 'n') => { - out.push('\n'); + (ParseState::Escape, b'n') => { + out.push(b'\n'); ParseState::Normal } - (ParseState::Escape, '\\') => { - out.push('\\'); + (ParseState::Escape, b'\\') => { + out.push(b'\\'); ParseState::Normal } - (ParseState::Escape, '"') => { - out.push('"'); + (ParseState::Escape, b'"') => { + out.push(b'"'); ParseState::Normal } - _ => todo!(), + (ParseState::Escape, b'0'..=b'7') => { + let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH); + octal.push(current_char); + ParseState::Octal(octal) + } + (ParseState::Octal(mut octal), b'0'..=b'7') => { + octal.push(current_char); + ParseState::Octal(octal) + } + _ => panic!("Invalid state unquoting string."), }; } - Ok(out) + Ok(String::from_utf8(out)?) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -210,15 +238,30 @@ fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { - let (remaining, _) = tag(r#"""#)(input)?; - let (remaining, _) = escaped( - take_till1(|c| match c { - '\\' | '"' => true, - _ => false, - }), - '\\', - one_of(r#""n\\"#), - )(remaining)?; + let (mut remaining, _) = tag(r#"""#)(input)?; + let mut in_escape = false; + loop { + if in_escape { + let (remain, _) = alt((recognize(one_of(r#""n\\"#)), digit1))(remaining)?; + remaining = remain; + in_escape = false; + } else { + let end_quote = tag::<_, _, nom::error::Error<_>>(r#"""#)(remaining); + if end_quote.is_ok() { + break; + } + + let escape_backslash = tag::<_, _, nom::error::Error<_>>("\\")(remaining); + if let Ok((remain, _)) = escape_backslash { + remaining = remain; + in_escape = true; + continue; + } + + let (remain, _) = anychar(remaining)?; + remaining = remain; + } + } let (remaining, _) = tag(r#"""#)(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, Token::Atom(source.into()))) diff --git a/src/lib.rs b/src/lib.rs index b8a8d108..da35ff75 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![feature(exit_status_error)] #![feature(trait_alias)] +#![feature(path_file_prefix)] // TODO: #![warn(missing_docs)] #[cfg(feature = "compare")] diff --git a/src/parser/document.rs b/src/parser/document.rs index 4d6a64be..3eb3eeb4 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,3 +1,5 @@ +use std::path::Path; + use nom::combinator::all_consuming; use nom::combinator::opt; use nom::multi::many0; @@ -25,30 +27,71 @@ use crate::types::Object; /// Parse a full org-mode document. /// -/// This is the main entry point for Organic. It will parse the full contents of the input string as an org-mode document. +/// This is a main entry point for Organic. It will parse the full contents of the input string as an org-mode document without an underlying file attached. #[allow(dead_code)] pub fn parse<'s>(input: &'s str) -> Result, Box> { - parse_with_settings(input, &GlobalSettings::default()) + parse_file_with_settings::<&Path>(input, &GlobalSettings::default(), None) +} + +/// Parse a full org-mode document. +/// +/// This is a main entry point for Organic. It will parse the full contents of the input string as an org-mode document at the file_path. +/// +/// file_path is not used for reading the file contents. It is only used for determining the document category and filling in the path attribute on the Document. +#[allow(dead_code)] +pub fn parse_file<'s, P: AsRef>( + input: &'s str, + file_path: Option

, +) -> Result, Box> { + parse_file_with_settings(input, &GlobalSettings::default(), file_path) } /// Parse a full org-mode document with starting settings. /// -/// This is the secondary entry point for Organic. It will parse the full contents of the input string as an org-mode document starting with the settings you supplied. +/// This is a secondary entry point for Organic. It will parse the full contents of the input string as an org-mode document starting with the settings you supplied without an underlying file attached. /// /// This will not prevent additional settings from being learned during parsing, for example when encountering a "#+TODO". #[allow(dead_code)] pub fn parse_with_settings<'g, 's>( input: &'s str, global_settings: &'g GlobalSettings<'g, 's>, +) -> Result, Box> { + parse_file_with_settings::<&Path>(input, global_settings, None) +} + +/// Parse a full org-mode document with starting settings. +/// +/// This is the secondary entry point for Organic. It will parse the full contents of the input string as an org-mode document at the file_path starting with the settings you supplied. +/// +/// This will not prevent additional settings from being learned during parsing, for example when encountering a "#+TODO". +/// +/// file_path is not used for reading the file contents. It is only used for determining the document category and filling in the path attribute on the Document. +#[allow(dead_code)] +pub fn parse_file_with_settings<'g, 's, P: AsRef>( + input: &'s str, + global_settings: &'g GlobalSettings<'g, 's>, + file_path: Option

, ) -> Result, Box> { let initial_context = ContextElement::document_context(); let initial_context = Context::new(global_settings, List::new(&initial_context)); let wrapped_input = OrgSource::new(input); - let ret = + let mut doc = all_consuming(parser_with_context!(document_org_source)(&initial_context))(wrapped_input) .map_err(|err| err.to_string()) - .map(|(_remaining, parsed_document)| parsed_document); - Ok(ret?) + .map(|(_remaining, parsed_document)| parsed_document)?; + if let Some(file_path) = file_path { + let full_path = file_path.as_ref().canonicalize()?; + if doc.category.is_none() { + let category = full_path + .file_stem() + .expect("File should have a name.") + .to_str() + .expect("File name should be valid utf-8."); + doc.category = Some(category.to_owned()); + } + doc.path = Some(full_path); + } + Ok(doc) } /// Parse a full org-mode document. @@ -106,7 +149,7 @@ fn document_org_source<'b, 'g, 'r, 's>( let new_context = context.with_global_settings(&new_settings); let context = &new_context; - let (remaining, document) = + let (remaining, mut document) = _document(context, input).map(|(rem, out)| (Into::<&str>::into(rem), out))?; { // If there are radio targets in this document then we need to parse the entire document again with the knowledge of the radio targets. @@ -130,6 +173,21 @@ fn document_org_source<'b, 'g, 'r, 's>( return Ok((remaining.into(), document)); } } + + // Find final in-buffer settings that do not impact parsing + document.category = Into::::into(&document) + .into_iter() + .filter_map(|ast_node| { + if let AstNode::Keyword(ast_node) = ast_node { + if ast_node.key.eq_ignore_ascii_case("category") { + return Some(ast_node); + } + } + None + }) + .last() + .map(|kw| kw.value.to_owned()); + Ok((remaining.into(), document)) } @@ -148,6 +206,8 @@ fn _document<'b, 'g, 'r, 's>( remaining, Document { source: source.into(), + category: None, + path: None, zeroth_section, children, }, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8729294e..b3e27101 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -45,5 +45,7 @@ mod text_markup; mod timestamp; mod util; pub use document::parse; +pub use document::parse_file; +pub use document::parse_file_with_settings; pub use document::parse_with_settings; pub(crate) use org_source::OrgSource; diff --git a/src/types/document.rs b/src/types/document.rs index c8001e51..b5c9e97b 100644 --- a/src/types/document.rs +++ b/src/types/document.rs @@ -1,3 +1,5 @@ +use std::path::PathBuf; + use super::Element; use super::GetStandardProperties; use super::Object; @@ -9,6 +11,8 @@ pub type HeadlineLevel = u16; #[derive(Debug)] pub struct Document<'s> { pub source: &'s str, + pub category: Option, + pub path: Option, pub zeroth_section: Option>, pub children: Vec>, } diff --git a/src/types/greater_element.rs b/src/types/greater_element.rs index 2fbe2e14..85f4c579 100644 --- a/src/types/greater_element.rs +++ b/src/types/greater_element.rs @@ -1,7 +1,5 @@ use super::element::Element; use super::lesser_element::TableCell; -use super::macros::ref_getter; -use super::macros::simple_getter; use super::Keyword; use super::Object; use super::StandardProperties; @@ -25,14 +23,14 @@ pub type IndentationLevel = u16; #[derive(Debug)] pub struct PlainListItem<'s> { - pub(crate) source: &'s str, - pub(crate) indentation: IndentationLevel, - pub(crate) bullet: &'s str, - pub(crate) counter: Option, - pub(crate) checkbox: Option<(CheckboxType, &'s str)>, - pub(crate) tag: Vec>, - pub(crate) pre_blank: PlainListItemPreBlank, - pub(crate) children: Vec>, + pub source: &'s str, + pub indentation: IndentationLevel, + pub bullet: &'s str, + pub counter: Option, + pub checkbox: Option<(CheckboxType, &'s str)>, + pub tag: Vec>, + pub pre_blank: PlainListItemPreBlank, + pub children: Vec>, } pub type PlainListItemCounter = u16; @@ -161,20 +159,6 @@ impl<'s> StandardProperties<'s> for TableRow<'s> { } impl<'s> PlainListItem<'s> { - simple_getter!(get_indentation_level, indentation, IndentationLevel); - simple_getter!( - /// Get the bullet - /// - /// Example output: "1. " - get_bullet, - bullet, - &'s str - ); - simple_getter!(get_counter, counter, Option); - simple_getter!(get_pre_blank, pre_blank, PlainListItemPreBlank); - ref_getter!(get_tag, tag, Vec>); - ref_getter!(get_children, children, Vec>); - pub fn get_checkbox(&self) -> Option<&'s str> { self.checkbox.as_ref().map(|(_, checkbox)| *checkbox) } diff --git a/src/types/macros.rs b/src/types/macros.rs deleted file mode 100644 index ce1235a6..00000000 --- a/src/types/macros.rs +++ /dev/null @@ -1,21 +0,0 @@ -// TODO: Would be nice if I didn't have to specify a function name but it looks like concat_idents!() cannot be used to create an ident. -// TODO: Find out if proc macros could do this easier (for example, parsing out the field type) -macro_rules! simple_getter { - ($(#[$meta:meta])* $funcname: ident, $field:ident, $fieldtype:ty) => { - $(#[$meta])* - pub fn $funcname(&self) -> $fieldtype { - self.$field - } - }; -} -pub(crate) use simple_getter; - -macro_rules! ref_getter { - ($(#[$meta:meta])* $funcname: ident, $field:ident, $fieldtype:ty) => { - $(#[$meta])* - pub fn $funcname(&self) -> &$fieldtype { - &self.$field - } - }; -} -pub(crate) use ref_getter; diff --git a/src/types/mod.rs b/src/types/mod.rs index d0ea1f2a..7976ecd1 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -3,7 +3,6 @@ mod element; mod get_standard_properties; mod greater_element; mod lesser_element; -mod macros; mod object; mod source; mod standard_properties;