Merge branch 'document_properties'
rustfmt Build rustfmt has succeeded Details
rust-build Build rust-build has succeeded Details
rust-test Build rust-test has succeeded Details
rust-foreign-document-test Build rust-foreign-document-test has failed Details

This commit is contained in:
Tom Alexander 2023-09-30 16:06:05 -04:00
commit 12ab9beada
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
14 changed files with 222 additions and 85 deletions

View File

@ -88,7 +88,7 @@ ARG DOOMEMACS_PATH=/foreign_documents/doomemacs
ARG DOOMEMACS_REPO=https://github.com/doomemacs/doomemacs.git
RUN mkdir -p $DOOMEMACS_PATH && git -C $DOOMEMACS_PATH init --initial-branch=main && git -C $DOOMEMACS_PATH remote add origin $DOOMEMACS_REPO && git -C $DOOMEMACS_PATH fetch origin $DOOMEMACS_VERSION && git -C $DOOMEMACS_PATH checkout FETCH_HEAD
ARG WORG_VERSION=0c8d5679b536af450b61812246a3e02b8103f4b8
ARG WORG_VERSION=ba6cda890f200d428a5d68e819eef15b5306055f
ARG WORG_PATH=/foreign_documents/worg
ARG WORG_REPO=https://git.sr.ht/~bzg/worg
RUN mkdir -p $WORG_PATH && git -C $WORG_PATH init --initial-branch=main && git -C $WORG_PATH remote add origin $WORG_REPO && git -C $WORG_PATH fetch origin $WORG_VERSION && git -C $WORG_PATH checkout FETCH_HEAD

View File

@ -0,0 +1 @@
#+CATEGORY: theory

View File

@ -0,0 +1,5 @@
#+CATEGORY: foo
#+CATEGORY: bar
#+begin_src text
#+CATEGORY: baz
#+end_src

View File

@ -8,6 +8,7 @@ use crate::compare::parse::get_org_mode_version;
use crate::compare::sexp::sexp;
use crate::context::GlobalSettings;
use crate::context::LocalFileAccessInterface;
use crate::parser::parse_file_with_settings;
use crate::parser::parse_with_settings;
pub fn run_anonymous_compare<P: AsRef<str>>(
@ -27,8 +28,7 @@ pub fn run_anonymous_compare_with_settings<P: AsRef<str>>(
// TODO: This is a work-around to pretend that dos line endings do not exist. It would be better to handle the difference in line endings.
let org_contents = org_contents.as_ref().replace("\r\n", "\n");
let org_contents = org_contents.as_str();
eprintln!("Using emacs version: {}", get_emacs_version()?.trim());
eprintln!("Using org-mode version: {}", get_org_mode_version()?.trim());
print_versions()?;
let rust_parsed = parse_with_settings(org_contents, global_settings)?;
let org_sexp = emacs_parse_anonymous_org_document(org_contents, global_settings)?;
let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).map_err(|e| e.to_string())?;
@ -53,8 +53,7 @@ pub fn run_compare_on_file_with_settings<P: AsRef<Path>>(
global_settings: &GlobalSettings,
) -> Result<(), Box<dyn std::error::Error>> {
let org_path = org_path.as_ref();
eprintln!("Using emacs version: {}", get_emacs_version()?.trim());
eprintln!("Using org-mode version: {}", get_org_mode_version()?.trim());
print_versions()?;
let parent_directory = org_path
.parent()
.ok_or("Should be contained inside a directory.")?;
@ -70,7 +69,7 @@ pub fn run_compare_on_file_with_settings<P: AsRef<Path>>(
global_settings.file_access = &file_access_interface;
global_settings
};
let rust_parsed = parse_with_settings(org_contents, &global_settings)?;
let rust_parsed = parse_file_with_settings(org_contents, &global_settings, Some(org_path))?;
let org_sexp = emacs_parse_file_org_document(org_path, &global_settings)?;
let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).map_err(|e| e.to_string())?;
@ -88,3 +87,9 @@ pub fn run_compare_on_file_with_settings<P: AsRef<Path>>(
Ok(())
}
fn print_versions() -> Result<(), Box<dyn std::error::Error>> {
eprintln!("Using emacs version: {}", get_emacs_version()?.trim());
eprintln!("Using org-mode version: {}", get_org_mode_version()?.trim());
Ok(())
}

View File

@ -438,7 +438,53 @@ pub fn compare_document<'s>(
Ok(_) => {}
}
// TODO: Compare :path :CATEGORY
// Compare :path
// :path is a quoted string to the absolute path of the document.
let document_path = get_property_quoted_string(emacs, ":path")?;
let rust_document_path = rust.path.as_ref().map(|p| p.to_str()).flatten();
match (
document_path.as_ref().map(|s| s.as_str()),
rust_document_path,
) {
(None, None) => {}
(None, Some(_)) | (Some(_), None) => {
this_status = DiffStatus::Bad;
message = Some(format!(
"Path mismatch (emacs != rust) {:?} != {:?}",
document_path, rust_document_path
));
}
(Some(e), Some(r)) if e != r => {
this_status = DiffStatus::Bad;
message = Some(format!(
"Path mismatch (emacs != rust) {:?} != {:?}",
document_path, rust_document_path
));
}
(Some(_), Some(_)) => {}
};
// Compare category
// :CATEGORY is specified either from "#+CATEGORY:" or it is the file name without the ".org" extension.
let category = get_property_quoted_string(emacs, ":CATEGORY")?;
match (category.as_ref(), rust.category.as_ref()) {
(None, None) => {}
(None, Some(_)) | (Some(_), None) => {
this_status = DiffStatus::Bad;
message = Some(format!(
"Category mismatch (emacs != rust) {:?} != {:?}",
category, rust.category
));
}
(Some(e), Some(r)) if e != r => {
this_status = DiffStatus::Bad;
message = Some(format!(
"Category mismatch (emacs != rust) {:?} != {:?}",
category, rust.category
));
}
(Some(_), Some(_)) => {}
};
// Skipping "org-data" and its properties
for (i, token) in children.iter().skip(2).enumerate() {
@ -466,7 +512,11 @@ pub fn compare_document<'s>(
.ok_or("Should have a corresponding heading.")?;
child_status.push(compare_heading(rust.source, token, corresponding_heading)?);
} else {
return Err("Document should only contain sections and headlines.".into());
return Err(format!(
"Document should only contain sections and headlines, found: {}",
first_cell
)
.into());
}
}

View File

@ -76,11 +76,17 @@ where
r#"(progn
(require 'org)
(defun org-table-align () t)
(setq vc-handled-backends nil)
{global_settings}
(find-file-read-only "{file_path}")
(org-mode)
(message "%s" (pp-to-string (org-element-parse-buffer)))
)"#,
global_settings = global_settings_elisp(global_settings)
global_settings = global_settings_elisp(global_settings),
file_path = file_path
.as_os_str()
.to_str()
.expect("File name should be valid utf-8.")
);
let mut cmd = Command::new("emacs");
let cmd = cmd
@ -89,8 +95,6 @@ where
.arg("--no-site-file")
.arg("--no-splash")
.arg("--batch")
.arg("--insert")
.arg(file_path.as_os_str())
.arg("--eval")
.arg(elisp_script);
let out = cmd.output()?;

View File

@ -1,9 +1,10 @@
use std::collections::HashMap;
use nom::branch::alt;
use nom::bytes::complete::escaped;
use nom::bytes::complete::tag;
use nom::bytes::complete::take_till1;
use nom::character::complete::anychar;
use nom::character::complete::digit1;
use nom::character::complete::multispace0;
use nom::character::complete::multispace1;
use nom::character::complete::one_of;
@ -11,6 +12,7 @@ use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::separated_list1;
use nom::sequence::delimited;
use nom::sequence::preceded;
@ -18,6 +20,8 @@ use nom::sequence::tuple;
use crate::error::Res;
const MAX_OCTAL_LENGTH: usize = 3;
#[derive(Debug)]
pub enum Token<'s> {
Atom(&'s str),
@ -35,6 +39,7 @@ pub struct TextWithProperties<'s> {
enum ParseState {
Normal,
Escape,
Octal(Vec<u8>),
}
impl<'s> Token<'s> {
@ -116,7 +121,7 @@ fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
}
pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>> {
let mut out = String::with_capacity(text.len());
let mut out: Vec<u8> = Vec::with_capacity(text.len());
if !text.starts_with(r#"""#) {
return Err("Quoted text does not start with quote.".into());
}
@ -125,30 +130,53 @@ pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>>
}
let interior_text = &text[1..(text.len() - 1)];
let mut state = ParseState::Normal;
for current_char in interior_text.chars().into_iter() {
for current_char in interior_text.bytes().into_iter() {
// Check to see if octal finished
state = match (state, current_char) {
(ParseState::Normal, '\\') => ParseState::Escape,
(ParseState::Octal(octal), b'0'..=b'7') if octal.len() < MAX_OCTAL_LENGTH => {
ParseState::Octal(octal)
}
(ParseState::Octal(octal), _) => {
let octal_number_string = String::from_utf8(octal)?;
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
out.push(decoded_byte);
ParseState::Normal
}
(state, _) => state,
};
state = match (state, current_char) {
(ParseState::Normal, b'\\') => ParseState::Escape,
(ParseState::Normal, _) => {
out.push(current_char);
ParseState::Normal
}
(ParseState::Escape, 'n') => {
out.push('\n');
(ParseState::Escape, b'n') => {
out.push(b'\n');
ParseState::Normal
}
(ParseState::Escape, '\\') => {
out.push('\\');
(ParseState::Escape, b'\\') => {
out.push(b'\\');
ParseState::Normal
}
(ParseState::Escape, '"') => {
out.push('"');
(ParseState::Escape, b'"') => {
out.push(b'"');
ParseState::Normal
}
_ => todo!(),
(ParseState::Escape, b'0'..=b'7') => {
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
octal.push(current_char);
ParseState::Octal(octal)
}
(ParseState::Octal(mut octal), b'0'..=b'7') => {
octal.push(current_char);
ParseState::Octal(octal)
}
_ => panic!("Invalid state unquoting string."),
};
}
Ok(out)
Ok(String::from_utf8(out)?)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -210,15 +238,30 @@ fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag(r#"""#)(input)?;
let (remaining, _) = escaped(
take_till1(|c| match c {
'\\' | '"' => true,
_ => false,
}),
'\\',
one_of(r#""n\\"#),
)(remaining)?;
let (mut remaining, _) = tag(r#"""#)(input)?;
let mut in_escape = false;
loop {
if in_escape {
let (remain, _) = alt((recognize(one_of(r#""n\\"#)), digit1))(remaining)?;
remaining = remain;
in_escape = false;
} else {
let end_quote = tag::<_, _, nom::error::Error<_>>(r#"""#)(remaining);
if end_quote.is_ok() {
break;
}
let escape_backslash = tag::<_, _, nom::error::Error<_>>("\\")(remaining);
if let Ok((remain, _)) = escape_backslash {
remaining = remain;
in_escape = true;
continue;
}
let (remain, _) = anychar(remaining)?;
remaining = remain;
}
}
let (remaining, _) = tag(r#"""#)(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Token::Atom(source.into())))

View File

@ -1,5 +1,6 @@
#![feature(exit_status_error)]
#![feature(trait_alias)]
#![feature(path_file_prefix)]
// TODO: #![warn(missing_docs)]
#[cfg(feature = "compare")]

View File

@ -1,3 +1,5 @@
use std::path::Path;
use nom::combinator::all_consuming;
use nom::combinator::opt;
use nom::multi::many0;
@ -25,30 +27,71 @@ use crate::types::Object;
/// Parse a full org-mode document.
///
/// This is the main entry point for Organic. It will parse the full contents of the input string as an org-mode document.
/// This is a main entry point for Organic. It will parse the full contents of the input string as an org-mode document without an underlying file attached.
#[allow(dead_code)]
pub fn parse<'s>(input: &'s str) -> Result<Document<'s>, Box<dyn std::error::Error>> {
parse_with_settings(input, &GlobalSettings::default())
parse_file_with_settings::<&Path>(input, &GlobalSettings::default(), None)
}
/// Parse a full org-mode document.
///
/// This is a main entry point for Organic. It will parse the full contents of the input string as an org-mode document at the file_path.
///
/// file_path is not used for reading the file contents. It is only used for determining the document category and filling in the path attribute on the Document.
#[allow(dead_code)]
pub fn parse_file<'s, P: AsRef<Path>>(
input: &'s str,
file_path: Option<P>,
) -> Result<Document<'s>, Box<dyn std::error::Error>> {
parse_file_with_settings(input, &GlobalSettings::default(), file_path)
}
/// Parse a full org-mode document with starting settings.
///
/// This is the secondary entry point for Organic. It will parse the full contents of the input string as an org-mode document starting with the settings you supplied.
/// This is a secondary entry point for Organic. It will parse the full contents of the input string as an org-mode document starting with the settings you supplied without an underlying file attached.
///
/// This will not prevent additional settings from being learned during parsing, for example when encountering a "#+TODO".
#[allow(dead_code)]
pub fn parse_with_settings<'g, 's>(
input: &'s str,
global_settings: &'g GlobalSettings<'g, 's>,
) -> Result<Document<'s>, Box<dyn std::error::Error>> {
parse_file_with_settings::<&Path>(input, global_settings, None)
}
/// Parse a full org-mode document with starting settings.
///
/// This is the secondary entry point for Organic. It will parse the full contents of the input string as an org-mode document at the file_path starting with the settings you supplied.
///
/// This will not prevent additional settings from being learned during parsing, for example when encountering a "#+TODO".
///
/// file_path is not used for reading the file contents. It is only used for determining the document category and filling in the path attribute on the Document.
#[allow(dead_code)]
pub fn parse_file_with_settings<'g, 's, P: AsRef<Path>>(
input: &'s str,
global_settings: &'g GlobalSettings<'g, 's>,
file_path: Option<P>,
) -> Result<Document<'s>, Box<dyn std::error::Error>> {
let initial_context = ContextElement::document_context();
let initial_context = Context::new(global_settings, List::new(&initial_context));
let wrapped_input = OrgSource::new(input);
let ret =
let mut doc =
all_consuming(parser_with_context!(document_org_source)(&initial_context))(wrapped_input)
.map_err(|err| err.to_string())
.map(|(_remaining, parsed_document)| parsed_document);
Ok(ret?)
.map(|(_remaining, parsed_document)| parsed_document)?;
if let Some(file_path) = file_path {
let full_path = file_path.as_ref().canonicalize()?;
if doc.category.is_none() {
let category = full_path
.file_stem()
.expect("File should have a name.")
.to_str()
.expect("File name should be valid utf-8.");
doc.category = Some(category.to_owned());
}
doc.path = Some(full_path);
}
Ok(doc)
}
/// Parse a full org-mode document.
@ -106,7 +149,7 @@ fn document_org_source<'b, 'g, 'r, 's>(
let new_context = context.with_global_settings(&new_settings);
let context = &new_context;
let (remaining, document) =
let (remaining, mut document) =
_document(context, input).map(|(rem, out)| (Into::<&str>::into(rem), out))?;
{
// If there are radio targets in this document then we need to parse the entire document again with the knowledge of the radio targets.
@ -130,6 +173,21 @@ fn document_org_source<'b, 'g, 'r, 's>(
return Ok((remaining.into(), document));
}
}
// Find final in-buffer settings that do not impact parsing
document.category = Into::<AstNode>::into(&document)
.into_iter()
.filter_map(|ast_node| {
if let AstNode::Keyword(ast_node) = ast_node {
if ast_node.key.eq_ignore_ascii_case("category") {
return Some(ast_node);
}
}
None
})
.last()
.map(|kw| kw.value.to_owned());
Ok((remaining.into(), document))
}
@ -148,6 +206,8 @@ fn _document<'b, 'g, 'r, 's>(
remaining,
Document {
source: source.into(),
category: None,
path: None,
zeroth_section,
children,
},

View File

@ -45,5 +45,7 @@ mod text_markup;
mod timestamp;
mod util;
pub use document::parse;
pub use document::parse_file;
pub use document::parse_file_with_settings;
pub use document::parse_with_settings;
pub(crate) use org_source::OrgSource;

View File

@ -1,3 +1,5 @@
use std::path::PathBuf;
use super::Element;
use super::GetStandardProperties;
use super::Object;
@ -9,6 +11,8 @@ pub type HeadlineLevel = u16;
#[derive(Debug)]
pub struct Document<'s> {
pub source: &'s str,
pub category: Option<String>,
pub path: Option<PathBuf>,
pub zeroth_section: Option<Section<'s>>,
pub children: Vec<Heading<'s>>,
}

View File

@ -1,7 +1,5 @@
use super::element::Element;
use super::lesser_element::TableCell;
use super::macros::ref_getter;
use super::macros::simple_getter;
use super::Keyword;
use super::Object;
use super::StandardProperties;
@ -25,14 +23,14 @@ pub type IndentationLevel = u16;
#[derive(Debug)]
pub struct PlainListItem<'s> {
pub(crate) source: &'s str,
pub(crate) indentation: IndentationLevel,
pub(crate) bullet: &'s str,
pub(crate) counter: Option<PlainListItemCounter>,
pub(crate) checkbox: Option<(CheckboxType, &'s str)>,
pub(crate) tag: Vec<Object<'s>>,
pub(crate) pre_blank: PlainListItemPreBlank,
pub(crate) children: Vec<Element<'s>>,
pub source: &'s str,
pub indentation: IndentationLevel,
pub bullet: &'s str,
pub counter: Option<PlainListItemCounter>,
pub checkbox: Option<(CheckboxType, &'s str)>,
pub tag: Vec<Object<'s>>,
pub pre_blank: PlainListItemPreBlank,
pub children: Vec<Element<'s>>,
}
pub type PlainListItemCounter = u16;
@ -161,20 +159,6 @@ impl<'s> StandardProperties<'s> for TableRow<'s> {
}
impl<'s> PlainListItem<'s> {
simple_getter!(get_indentation_level, indentation, IndentationLevel);
simple_getter!(
/// Get the bullet
///
/// Example output: "1. "
get_bullet,
bullet,
&'s str
);
simple_getter!(get_counter, counter, Option<PlainListItemCounter>);
simple_getter!(get_pre_blank, pre_blank, PlainListItemPreBlank);
ref_getter!(get_tag, tag, Vec<Object<'s>>);
ref_getter!(get_children, children, Vec<Element<'s>>);
pub fn get_checkbox(&self) -> Option<&'s str> {
self.checkbox.as_ref().map(|(_, checkbox)| *checkbox)
}

View File

@ -1,21 +0,0 @@
// TODO: Would be nice if I didn't have to specify a function name but it looks like concat_idents!() cannot be used to create an ident.
// TODO: Find out if proc macros could do this easier (for example, parsing out the field type)
macro_rules! simple_getter {
($(#[$meta:meta])* $funcname: ident, $field:ident, $fieldtype:ty) => {
$(#[$meta])*
pub fn $funcname(&self) -> $fieldtype {
self.$field
}
};
}
pub(crate) use simple_getter;
macro_rules! ref_getter {
($(#[$meta:meta])* $funcname: ident, $field:ident, $fieldtype:ty) => {
$(#[$meta])*
pub fn $funcname(&self) -> &$fieldtype {
&self.$field
}
};
}
pub(crate) use ref_getter;

View File

@ -3,7 +3,6 @@ mod element;
mod get_standard_properties;
mod greater_element;
mod lesser_element;
mod macros;
mod object;
mod source;
mod standard_properties;