From e59d1b8fde22f61bef9b8f575f58ed5e77ec12ae Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 10 Apr 2023 13:38:31 -0400 Subject: [PATCH 01/18] Add a feature for compare. --- Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 627ec01..f466c66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,6 @@ license = "0BSD" name = "toy" path = "src/main.rs" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] nom = "7.1.1" @@ -19,3 +18,5 @@ tracing-opentelemetry = "0.17.2" tracing-subscriber = {version="0.3.16", features=["env-filter"]} [features] +default = ["compare"] +compare = [] From 552ac974d547d70432801176cb16a7ca95c2fb2f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 10 Apr 2023 13:45:36 -0400 Subject: [PATCH 02/18] Initial setup for a separate compare target. --- Cargo.toml | 5 +++++ src/compare/mod.rs | 1 + src/org_compare.rs | 6 ++++++ 3 files changed, 12 insertions(+) create mode 100644 src/compare/mod.rs create mode 100644 src/org_compare.rs diff --git a/Cargo.toml b/Cargo.toml index f466c66..540389a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,11 @@ name = "toy" path = "src/main.rs" +[[bin]] +name = "org_compare" +path = "src/org_compare.rs" + + [dependencies] nom = "7.1.1" opentelemetry = "0.17.0" diff --git a/src/compare/mod.rs b/src/compare/mod.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/compare/mod.rs @@ -0,0 +1 @@ + diff --git a/src/org_compare.rs b/src/org_compare.rs new file mode 100644 index 0000000..96e940e --- /dev/null +++ b/src/org_compare.rs @@ -0,0 +1,6 @@ +#![feature(round_char_boundary)] +mod compare; + +fn main() -> Result<(), Box> { + Ok(()) +} From 96d2cc7c6a006bfe0e5adf49fdbd61d2f3b4ec80 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 10 Apr 2023 14:28:40 -0400 Subject: [PATCH 03/18] Dump the elisp when running compare. --- Cargo.toml | 1 + src/compare/mod.rs | 3 ++- src/compare/parse.rs | 26 ++++++++++++++++++++++++++ src/org_compare.rs | 3 +++ 4 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 src/compare/parse.rs diff --git a/Cargo.toml b/Cargo.toml index 540389a..7041f50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ name = "toy" version = "0.1.0" edition = "2021" license = "0BSD" +default-run = "toy" [[bin]] name = "toy" diff --git a/src/compare/mod.rs b/src/compare/mod.rs index 8b13789..e694bcb 100644 --- a/src/compare/mod.rs +++ b/src/compare/mod.rs @@ -1 +1,2 @@ - +mod parse; +pub use parse::emacs_parse_org_document; diff --git a/src/compare/parse.rs b/src/compare/parse.rs new file mode 100644 index 0000000..79633a9 --- /dev/null +++ b/src/compare/parse.rs @@ -0,0 +1,26 @@ +use std::path::Path; +use std::process::Command; + +pub fn emacs_parse_org_document<'a, C>(file_path: C) -> Result> +where + C: AsRef, +{ + let elisp_script = r#"(progn + (org-mode) + (message "%s" (pp-to-string (org-element-parse-buffer))) +)"#; + let mut cmd = Command::new("emacs"); + let proc = cmd + .arg("-q") + .arg("--no-site-file") + .arg("--no-splash") + .arg("--batch") + .arg("--insert") + .arg(file_path.as_ref().as_os_str()) + .arg("--eval") + .arg(elisp_script); + let out = proc.output()?; + out.status.exit_ok()?; + let org_sexp = out.stderr; + Ok(String::from_utf8(org_sexp)?) +} diff --git a/src/org_compare.rs b/src/org_compare.rs index 96e940e..7fd6685 100644 --- a/src/org_compare.rs +++ b/src/org_compare.rs @@ -1,6 +1,9 @@ #![feature(round_char_boundary)] +#![feature(exit_status_error)] +use compare::emacs_parse_org_document; mod compare; fn main() -> Result<(), Box> { + emacs_parse_org_document("./org_mode_samples/footnote_definition/simple.org")?; Ok(()) } From 751a3beffd7019768fca5aded6b1a28dd7d3e5f7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 14:50:37 -0400 Subject: [PATCH 04/18] Implement a very basic first stab at lisp parser. --- src/compare/error.rs | 25 +++++++++++++++++ src/compare/mod.rs | 2 ++ src/compare/parse.rs | 13 +++++++++ src/compare/sexp.rs | 65 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+) create mode 100644 src/compare/error.rs create mode 100644 src/compare/sexp.rs diff --git a/src/compare/error.rs b/src/compare/error.rs new file mode 100644 index 0000000..eb23965 --- /dev/null +++ b/src/compare/error.rs @@ -0,0 +1,25 @@ +use nom::error::ErrorKind; +use nom::error::ParseError; +use nom::IResult; + +pub type Res = IResult>; + +#[derive(Debug, PartialEq)] +pub enum CustomError { + MyError(MyError), + Nom(I, ErrorKind), +} + +#[derive(Debug, PartialEq)] +pub struct MyError(pub I); + +impl ParseError for CustomError { + fn from_error_kind(input: I, kind: ErrorKind) -> Self { + CustomError::Nom(input, kind) + } + + fn append(_input: I, _kind: ErrorKind, mut other: Self) -> Self { + // Doesn't do append like VerboseError + other + } +} diff --git a/src/compare/mod.rs b/src/compare/mod.rs index e694bcb..d2ae927 100644 --- a/src/compare/mod.rs +++ b/src/compare/mod.rs @@ -1,2 +1,4 @@ +mod error; mod parse; +mod sexp; pub use parse::emacs_parse_org_document; diff --git a/src/compare/parse.rs b/src/compare/parse.rs index 79633a9..89367c7 100644 --- a/src/compare/parse.rs +++ b/src/compare/parse.rs @@ -1,7 +1,20 @@ use std::path::Path; use std::process::Command; +use crate::compare::sexp::sexp; + pub fn emacs_parse_org_document<'a, C>(file_path: C) -> Result> +where + C: AsRef, +{ + let org_sexp = emacs_parse_org_document_to_sexp(file_path)?; + let parsed_sexp = sexp(org_sexp.as_str()).expect("Parse failure"); + todo!() +} + +fn emacs_parse_org_document_to_sexp<'a, C>( + file_path: C, +) -> Result> where C: AsRef, { diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs new file mode 100644 index 0000000..d50dbdb --- /dev/null +++ b/src/compare/sexp.rs @@ -0,0 +1,65 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::bytes::complete::take_till1; +use nom::character::complete::multispace0; +use nom::character::complete::multispace1; +use nom::multi::separated_list1; + +use super::error::Res; + +#[derive(Debug)] +pub enum Token<'s> { + Atom(&'s str), + List(Vec>), +} + +#[tracing::instrument(ret, level = "debug")] +pub fn sexp<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, _) = multispace0(input)?; + let (remaining, tkn) = token(remaining)?; + let (remaining, _) = multispace0(remaining)?; + Ok((remaining, tkn)) +} + +#[tracing::instrument(ret, level = "debug")] +fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + alt((list, atom))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, opening_paren) = tag("(")(input)?; + let (remaining, children) = separated_list1(multispace1, token)(remaining)?; + let (remaining, closing_paren) = tag(")")(remaining)?; + Ok((remaining, Token::List(children))) +} + +#[tracing::instrument(ret, level = "debug")] +fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + unquoted_atom(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, body) = take_till1(|c| match c { + ' ' | '\t' | '\r' | '\n' => true, + _ => false, + })(input)?; + Ok((remaining, Token::Atom(body))) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple() { + let input = " (foo bar baz ) "; + let (remaining, parsed) = sexp(input).expect("Parse the input"); + assert_eq!(remaining, ""); + assert!(match parsed { + Token::Atom(_) => false, + Token::List(_) => true, + }); + } +} From 5d7ca1b96627eca3eaf1a28ac568f73f81d257cd Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 15:08:46 -0400 Subject: [PATCH 05/18] Fix handling of whitespace. --- src/compare/mod.rs | 1 + src/compare/sexp.rs | 6 +++++- src/init_tracing.rs | 34 ++++++++++++++++++++++++++++++++++ src/main.rs | 33 ++++----------------------------- src/org_compare.rs | 9 ++++++++- 5 files changed, 52 insertions(+), 31 deletions(-) create mode 100644 src/init_tracing.rs diff --git a/src/compare/mod.rs b/src/compare/mod.rs index d2ae927..d364ed7 100644 --- a/src/compare/mod.rs +++ b/src/compare/mod.rs @@ -2,3 +2,4 @@ mod error; mod parse; mod sexp; pub use parse::emacs_parse_org_document; +pub use sexp::sexp; diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index d50dbdb..136c1e7 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -3,7 +3,10 @@ use nom::bytes::complete::tag; use nom::bytes::complete::take_till1; use nom::character::complete::multispace0; use nom::character::complete::multispace1; +use nom::combinator::not; +use nom::combinator::peek; use nom::multi::separated_list1; +use nom::sequence::delimited; use super::error::Res; @@ -29,13 +32,14 @@ fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { #[tracing::instrument(ret, level = "debug")] fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, opening_paren) = tag("(")(input)?; - let (remaining, children) = separated_list1(multispace1, token)(remaining)?; + let (remaining, children) = delimited(multispace0, separated_list1(multispace1, token), multispace0)(remaining)?; let (remaining, closing_paren) = tag(")")(remaining)?; Ok((remaining, Token::List(children))) } #[tracing::instrument(ret, level = "debug")] fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + not(peek(tag(")")))(input)?; unquoted_atom(input) } diff --git a/src/init_tracing.rs b/src/init_tracing.rs new file mode 100644 index 0000000..171de8d --- /dev/null +++ b/src/init_tracing.rs @@ -0,0 +1,34 @@ +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::EnvFilter; + +pub fn init_telemetry() -> Result<(), Box> { + let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN")); + + // let stdout = tracing_subscriber::fmt::Layer::new() + // .pretty() + // .with_file(true) + // .with_line_number(true) + // .with_thread_ids(false) + // .with_target(false); + + opentelemetry::global::set_text_map_propagator(opentelemetry_jaeger::Propagator::new()); + let tracer = opentelemetry_jaeger::new_pipeline() + .with_service_name("toy_language") + .install_simple()?; + + let opentelemetry = tracing_opentelemetry::layer().with_tracer(tracer); + + tracing_subscriber::registry() + .with(env_filter) + .with(opentelemetry) + // .with(stdout) + .try_init()?; + + Ok(()) +} + +pub fn shutdown_telemetry() -> Result<(), Box> { + opentelemetry::global::shutdown_tracer_provider(); + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index 8f1e4f1..6201ac9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,9 @@ #![feature(round_char_boundary)] +use crate::init_tracing::init_telemetry; +use crate::init_tracing::shutdown_telemetry; use crate::parser::document; -use tracing_subscriber::EnvFilter; +mod init_tracing; mod parser; -use tracing_subscriber::layer::SubscriberExt; -use tracing_subscriber::util::SubscriberInitExt; const TEST_DOC: &'static str = include_str!("../toy_language.txt"); @@ -12,31 +12,6 @@ fn main() -> Result<(), Box> { let parsed = document(TEST_DOC); println!("{}\n\n\n", TEST_DOC); println!("{:#?}", parsed); - opentelemetry::global::shutdown_tracer_provider(); - Ok(()) -} - -fn init_telemetry() -> Result<(), Box> { - let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN")); - - // let stdout = tracing_subscriber::fmt::Layer::new() - // .pretty() - // .with_file(true) - // .with_line_number(true) - // .with_thread_ids(false) - // .with_target(false); - - opentelemetry::global::set_text_map_propagator(opentelemetry_jaeger::Propagator::new()); - let tracer = opentelemetry_jaeger::new_pipeline() - .with_service_name("toy_language") - .install_simple()?; - - let opentelemetry = tracing_opentelemetry::layer().with_tracer(tracer); - - tracing_subscriber::registry() - .with(env_filter) - .with(opentelemetry) - // .with(stdout) - .try_init()?; + shutdown_telemetry()?; Ok(()) } diff --git a/src/org_compare.rs b/src/org_compare.rs index 7fd6685..76ce3b9 100644 --- a/src/org_compare.rs +++ b/src/org_compare.rs @@ -1,9 +1,16 @@ #![feature(round_char_boundary)] #![feature(exit_status_error)] +use crate::init_tracing::init_telemetry; +use crate::init_tracing::shutdown_telemetry; use compare::emacs_parse_org_document; +use compare::sexp; mod compare; +mod init_tracing; fn main() -> Result<(), Box> { - emacs_parse_org_document("./org_mode_samples/footnote_definition/simple.org")?; + init_telemetry()?; + // emacs_parse_org_document("./org_mode_samples/footnote_definition/simple.org")?; + sexp(" (foo bar baz ) ")?; + shutdown_telemetry()?; Ok(()) } From 3bdb1e38411fa45480fdc41dbe1d20650bf8b64e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 15:23:16 -0400 Subject: [PATCH 06/18] Beginning of handling quoted strings. --- src/compare/sexp.rs | 56 +++++++++++++++++++++++++++++++++++++++++++-- src/org_compare.rs | 2 +- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index 136c1e7..2adf545 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -1,8 +1,10 @@ use nom::branch::alt; +use nom::bytes::complete::escaped; use nom::bytes::complete::tag; use nom::bytes::complete::take_till1; use nom::character::complete::multispace0; use nom::character::complete::multispace1; +use nom::character::complete::one_of; use nom::combinator::not; use nom::combinator::peek; use nom::multi::separated_list1; @@ -32,7 +34,11 @@ fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { #[tracing::instrument(ret, level = "debug")] fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, opening_paren) = tag("(")(input)?; - let (remaining, children) = delimited(multispace0, separated_list1(multispace1, token), multispace0)(remaining)?; + let (remaining, children) = delimited( + multispace0, + separated_list1(multispace1, token), + multispace0, + )(remaining)?; let (remaining, closing_paren) = tag(")")(remaining)?; Ok((remaining, Token::List(children))) } @@ -40,7 +46,7 @@ fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { #[tracing::instrument(ret, level = "debug")] fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { not(peek(tag(")")))(input)?; - unquoted_atom(input) + alt((quoted_atom, unquoted_atom))(input) } #[tracing::instrument(ret, level = "debug")] @@ -52,6 +58,41 @@ fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { Ok((remaining, Token::Atom(body))) } +#[tracing::instrument(ret, level = "debug")] +fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, _) = tag(r#"""#)(input)?; + let (remaining, _) = escaped( + take_till1(|c| match c { + '\\' | '"' => true, + _ => false, + }), + '\\', + one_of(r#"""#), + )(remaining)?; + let (remaining, _) = tag(r#"""#)(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Token::Atom(source))) +} + +/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. +fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { + assert!(is_slice_of(input, remaining)); + let source = { + let offset = remaining.as_ptr() as usize - input.as_ptr() as usize; + &input[..offset] + }; + source +} + +/// Check if the child string slice is a slice of the parent string slice. +fn is_slice_of(parent: &str, child: &str) -> bool { + let parent_start = parent.as_ptr() as usize; + let parent_end = parent_start + parent.len(); + let child_start = child.as_ptr() as usize; + let child_end = child_start + child.len(); + child_start >= parent_start && child_end <= parent_end +} + #[cfg(test)] mod tests { use super::*; @@ -66,4 +107,15 @@ mod tests { Token::List(_) => true, }); } + + #[test] + fn quoted() { + let input = r#" ("foo" bar baz ) "#; + let (remaining, parsed) = sexp(input).expect("Parse the input"); + assert_eq!(remaining, ""); + assert!(match parsed { + Token::Atom(_) => false, + Token::List(_) => true, + }); + } } diff --git a/src/org_compare.rs b/src/org_compare.rs index 76ce3b9..6a06a0f 100644 --- a/src/org_compare.rs +++ b/src/org_compare.rs @@ -10,7 +10,7 @@ mod init_tracing; fn main() -> Result<(), Box> { init_telemetry()?; // emacs_parse_org_document("./org_mode_samples/footnote_definition/simple.org")?; - sexp(" (foo bar baz ) ")?; + sexp(r#" ("foo" bar baz ) "#)?; shutdown_telemetry()?; Ok(()) } From 8df02fa8b94f5d395c6c3a0f9a640630a4fa8c35 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 16:02:57 -0400 Subject: [PATCH 07/18] Handle text with properties. --- src/compare/sexp.rs | 54 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index 2adf545..e8cf1fa 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -5,10 +5,15 @@ use nom::bytes::complete::take_till1; use nom::character::complete::multispace0; use nom::character::complete::multispace1; use nom::character::complete::one_of; +use nom::combinator::map; use nom::combinator::not; +use nom::combinator::opt; use nom::combinator::peek; +use nom::combinator::verify; use nom::multi::separated_list1; use nom::sequence::delimited; +use nom::sequence::preceded; +use nom::sequence::tuple; use super::error::Res; @@ -16,6 +21,13 @@ use super::error::Res; pub enum Token<'s> { Atom(&'s str), List(Vec>), + TextWithProperties(TextWithProperties<'s>), +} + +#[derive(Debug)] +pub struct TextWithProperties<'s> { + text: &'s str, + properties: Vec>, } #[tracing::instrument(ret, level = "debug")] @@ -33,20 +45,20 @@ fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { #[tracing::instrument(ret, level = "debug")] fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { - let (remaining, opening_paren) = tag("(")(input)?; + let (remaining, _) = tag("(")(input)?; let (remaining, children) = delimited( multispace0, separated_list1(multispace1, token), multispace0, )(remaining)?; - let (remaining, closing_paren) = tag(")")(remaining)?; + let (remaining, _) = tag(")")(remaining)?; Ok((remaining, Token::List(children))) } #[tracing::instrument(ret, level = "debug")] fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { not(peek(tag(")")))(input)?; - alt((quoted_atom, unquoted_atom))(input) + alt((text_with_properties, quoted_atom, unquoted_atom))(input) } #[tracing::instrument(ret, level = "debug")] @@ -74,6 +86,29 @@ fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { Ok((remaining, Token::Atom(source))) } +fn text_with_properties<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, _) = tag("#(")(input)?; + let (remaining, (text, props)) = delimited( + multispace0, + tuple(( + map(quoted_atom, |atom| match atom { + Token::Atom(body) => body, + _ => unreachable!(), + }), + preceded(multispace1, opt(separated_list1(multispace1, token))), + )), + multispace0, + )(remaining)?; + let (remaining, _) = tag(")")(remaining)?; + Ok(( + remaining, + Token::TextWithProperties(TextWithProperties { + text, + properties: props.unwrap_or(Vec::new()), + }), + )) +} + /// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { assert!(is_slice_of(input, remaining)); @@ -105,6 +140,7 @@ mod tests { assert!(match parsed { Token::Atom(_) => false, Token::List(_) => true, + Token::TextWithProperties(_) => false, }); } @@ -116,6 +152,18 @@ mod tests { assert!(match parsed { Token::Atom(_) => false, Token::List(_) => true, + Token::TextWithProperties(_) => false, }); + let children = match parsed { + Token::List(children) => children, + _ => panic!("Should be a list."), + }; + assert_eq!( + match children.first() { + Some(Token::Atom(body)) => *body, + _ => panic!("First child should be an atom."), + }, + r#""foo""# + ) } } From 5305ae7627aa63f893bd64df579a395b2ea317d9 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 16:28:56 -0400 Subject: [PATCH 08/18] Fix ending atoms at end of list. --- src/compare/parse.rs | 8 +++----- src/compare/sexp.rs | 6 +++--- src/org_compare.rs | 6 ++++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/compare/parse.rs b/src/compare/parse.rs index 89367c7..03e5b52 100644 --- a/src/compare/parse.rs +++ b/src/compare/parse.rs @@ -3,18 +3,16 @@ use std::process::Command; use crate::compare::sexp::sexp; -pub fn emacs_parse_org_document<'a, C>(file_path: C) -> Result> +pub fn compare_parse_org_document<'a, C>(file_path: C) -> Result> where C: AsRef, { - let org_sexp = emacs_parse_org_document_to_sexp(file_path)?; + let org_sexp = emacs_parse_org_document(file_path)?; let parsed_sexp = sexp(org_sexp.as_str()).expect("Parse failure"); todo!() } -fn emacs_parse_org_document_to_sexp<'a, C>( - file_path: C, -) -> Result> +pub fn emacs_parse_org_document<'a, C>(file_path: C) -> Result> where C: AsRef, { diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index e8cf1fa..4da408d 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -64,7 +64,7 @@ fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { #[tracing::instrument(ret, level = "debug")] fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, body) = take_till1(|c| match c { - ' ' | '\t' | '\r' | '\n' => true, + ' ' | '\t' | '\r' | '\n' | ')' => true, _ => false, })(input)?; Ok((remaining, Token::Atom(body))) @@ -75,11 +75,11 @@ fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, _) = tag(r#"""#)(input)?; let (remaining, _) = escaped( take_till1(|c| match c { - '\\' | '"' => true, + '\\' | '"' | ')' => true, _ => false, }), '\\', - one_of(r#"""#), + one_of(r#""n"#), )(remaining)?; let (remaining, _) = tag(r#"""#)(remaining)?; let source = get_consumed(input, remaining); diff --git a/src/org_compare.rs b/src/org_compare.rs index 6a06a0f..0182ce3 100644 --- a/src/org_compare.rs +++ b/src/org_compare.rs @@ -9,8 +9,10 @@ mod init_tracing; fn main() -> Result<(), Box> { init_telemetry()?; - // emacs_parse_org_document("./org_mode_samples/footnote_definition/simple.org")?; - sexp(r#" ("foo" bar baz ) "#)?; + let org_sexp = emacs_parse_org_document("./org_mode_samples/footnote_definition/simple.org")?; + println!("{}", org_sexp); + let parsed_sexp = sexp(org_sexp.as_str()).expect("Parse failure"); + println!("{:#?}", parsed_sexp); shutdown_telemetry()?; Ok(()) } From 287cc8dea3bebac06462f3fb76e214b3299608af Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 17:35:09 -0400 Subject: [PATCH 09/18] Just barely starting to diff the two parsed forms. --- src/compare/diff.rs | 25 +++++++++++++++++++++++++ src/compare/mod.rs | 2 ++ src/compare/sexp.rs | 16 ++++++++++++++++ src/org_compare.rs | 12 ++++++++++-- src/parser/mod.rs | 1 + 5 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 src/compare/diff.rs diff --git a/src/compare/diff.rs b/src/compare/diff.rs new file mode 100644 index 0000000..6bd231c --- /dev/null +++ b/src/compare/diff.rs @@ -0,0 +1,25 @@ +use super::sexp::Token; +use crate::parser::Document; + +pub fn compare_document<'s>( + emacs: &'s Token<'s>, + rust: &'s Document<'s>, +) -> Result<(), Box> { + compare_document_indented(0, emacs, rust) +} + +fn compare_document_indented<'s>( + indentation: usize, + emacs: &'s Token<'s>, + rust: &'s Document<'s>, +) -> Result<(), Box> { + let children = emacs.as_list()?; + let first_child = children.first().ok_or("Should have at least one child")?; + let first_child_text = first_child.as_atom()?; + if first_child_text != "org-data" { + return Err("Document should correspond to an org-data cell.".into()); + } + // TODO: compare the children + + Ok(()) +} diff --git a/src/compare/mod.rs b/src/compare/mod.rs index d364ed7..4dbb825 100644 --- a/src/compare/mod.rs +++ b/src/compare/mod.rs @@ -1,5 +1,7 @@ +mod diff; mod error; mod parse; mod sexp; +pub use diff::compare_document; pub use parse::emacs_parse_org_document; pub use sexp::sexp; diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index 4da408d..5257940 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -30,6 +30,22 @@ pub struct TextWithProperties<'s> { properties: Vec>, } +impl<'s> Token<'s> { + pub fn as_list<'p>(&'p self) -> Result<&'p Vec>, Box> { + Ok(match self { + Token::List(children) => Ok(children), + _ => Err("wrong token type"), + }?) + } + + pub fn as_atom<'p>(&'p self) -> Result<&'s str, Box> { + Ok(match self { + Token::Atom(body) => Ok(*body), + _ => Err("wrong token type"), + }?) + } +} + #[tracing::instrument(ret, level = "debug")] pub fn sexp<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, _) = multispace0(input)?; diff --git a/src/org_compare.rs b/src/org_compare.rs index 0182ce3..7249f47 100644 --- a/src/org_compare.rs +++ b/src/org_compare.rs @@ -1,18 +1,26 @@ #![feature(round_char_boundary)] #![feature(exit_status_error)] +use crate::compare::compare_document; use crate::init_tracing::init_telemetry; use crate::init_tracing::shutdown_telemetry; +use crate::parser::document; use compare::emacs_parse_org_document; use compare::sexp; mod compare; mod init_tracing; +mod parser; fn main() -> Result<(), Box> { init_telemetry()?; - let org_sexp = emacs_parse_org_document("./org_mode_samples/footnote_definition/simple.org")?; + let org_path = "./org_mode_samples/footnote_definition/simple.org"; + let org_contents = std::fs::read_to_string(org_path)?; + let org_sexp = emacs_parse_org_document(org_path)?; println!("{}", org_sexp); - let parsed_sexp = sexp(org_sexp.as_str()).expect("Parse failure"); + let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).expect("Sexp Parse failure"); println!("{:#?}", parsed_sexp); + let (_remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure"); + println!("{:#?}", rust_parsed); + compare_document(&parsed_sexp, &rust_parsed)?; shutdown_telemetry()?; Ok(()) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8f3915e..77672d8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15,4 +15,5 @@ mod plain_text; mod source; mod util; pub use document::document; +pub use document::Document; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; From be2d0141a5299255246433616bd92ed569a06cc7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 17:41:23 -0400 Subject: [PATCH 10/18] Iterate over argv for file paths. --- src/org_compare.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/org_compare.rs b/src/org_compare.rs index 7249f47..7459de9 100644 --- a/src/org_compare.rs +++ b/src/org_compare.rs @@ -12,15 +12,16 @@ mod parser; fn main() -> Result<(), Box> { init_telemetry()?; - let org_path = "./org_mode_samples/footnote_definition/simple.org"; - let org_contents = std::fs::read_to_string(org_path)?; - let org_sexp = emacs_parse_org_document(org_path)?; - println!("{}", org_sexp); - let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).expect("Sexp Parse failure"); - println!("{:#?}", parsed_sexp); - let (_remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure"); - println!("{:#?}", rust_parsed); - compare_document(&parsed_sexp, &rust_parsed)?; + for org_path in std::env::args().skip(1) { + let org_contents = std::fs::read_to_string(&org_path)?; + let org_sexp = emacs_parse_org_document(&org_path)?; + println!("{}", org_sexp); + let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).expect("Sexp Parse failure"); + println!("{:#?}", parsed_sexp); + let (_remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure"); + println!("{:#?}", rust_parsed); + compare_document(&parsed_sexp, &rust_parsed)?; + } shutdown_telemetry()?; Ok(()) } From 6e62bd5ff2f49516d58b4d30b272495a6971d362 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 18:27:01 -0400 Subject: [PATCH 11/18] Initial return and printing of tree diff output. --- src/compare/diff.rs | 120 ++++++++++++++++++++++++++++++++++++++++---- src/org_compare.rs | 4 +- src/parser/mod.rs | 1 + 3 files changed, 113 insertions(+), 12 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 6bd231c..5c42e7d 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,25 +1,123 @@ use super::sexp::Token; use crate::parser::Document; +use crate::parser::Section; + +#[derive(Debug)] +pub struct DiffResult { + status: DiffStatus, + name: String, + children: Vec, +} + +#[derive(Debug)] +pub enum DiffStatus { + Good, + ChildBad, + Bad, +} + +impl DiffResult { + pub fn print(&self) -> Result<(), Box> { + self.print_indented(0) + } + + fn print_indented(&self, indentation: usize) -> Result<(), Box> { + println!( + "{}{:?} {}", + " ".repeat(indentation), + self.status, + self.name + ); + for child in self.children.iter() { + child.print_indented(indentation + 1)?; + } + Ok(()) + } +} pub fn compare_document<'s>( emacs: &'s Token<'s>, rust: &'s Document<'s>, -) -> Result<(), Box> { - compare_document_indented(0, emacs, rust) -} - -fn compare_document_indented<'s>( - indentation: usize, - emacs: &'s Token<'s>, - rust: &'s Document<'s>, -) -> Result<(), Box> { +) -> Result> { let children = emacs.as_list()?; - let first_child = children.first().ok_or("Should have at least one child")?; + let first_child = children.first().ok_or("Should have at least one child.")?; let first_child_text = first_child.as_atom()?; if first_child_text != "org-data" { return Err("Document should correspond to an org-data cell.".into()); } + let mut child_status = Vec::new(); // TODO: compare the children - Ok(()) + // Skipping "org-data" and the first parameter which is often nil + for (i, token) in children.iter().skip(2).enumerate() { + let section_or_headline = token.as_list()?; + let first_cell = section_or_headline + .first() + .ok_or("Should have at least one child.")? + .as_atom()?; + if first_cell == "section" { + if i != 0 { + return Err("Section cannot be after the first child of document.".into()); + } + child_status.push(compare_section( + token, + rust.zeroth_section + .as_ref() + .ok_or("No corresponding zeroth-section")?, + )?); + } else if first_cell == "headline" { + let corresponding_heading = rust + .children + .iter() + .nth(i - rust.zeroth_section.as_ref().map(|_| 1).unwrap_or(0)) + .ok_or("Should have a corresponding heading.")?; + child_status.push(compare_heading(token, rust)?); + } else { + return Err("Document should only contain sections and headlines.".into()); + } + } + + Ok(DiffResult { + status: DiffStatus::Good, + name: "document".to_owned(), + children: child_status, + }) +} + +pub fn compare_section<'s>( + emacs: &'s Token<'s>, + rust: &'s Section<'s>, +) -> Result> { + let children = emacs.as_list()?; + let first_child = children.first().ok_or("Should have at least one child.")?; + let first_child_text = first_child.as_atom()?; + if first_child_text != "section" { + return Err("Section should correspond to a section cell.".into()); + } + let mut child_status = Vec::new(); + + Ok(DiffResult { + status: DiffStatus::Good, + name: "section".to_owned(), + children: child_status, + }) +} + +pub fn compare_heading<'s>( + emacs: &'s Token<'s>, + rust: &'s Document<'s>, +) -> Result> { + let children = emacs.as_list()?; + let first_child = children.first().ok_or("Should have at least one child.")?; + let first_child_text = first_child.as_atom()?; + if first_child_text != "headline" { + return Err("Heading should correspond to a headline cell.".into()); + } + let mut child_status = Vec::new(); + + Ok(DiffResult { + status: DiffStatus::Good, + name: "heading".to_owned(), + children: child_status, + }) } diff --git a/src/org_compare.rs b/src/org_compare.rs index 7459de9..9f1f615 100644 --- a/src/org_compare.rs +++ b/src/org_compare.rs @@ -20,8 +20,10 @@ fn main() -> Result<(), Box> { println!("{:#?}", parsed_sexp); let (_remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure"); println!("{:#?}", rust_parsed); - compare_document(&parsed_sexp, &rust_parsed)?; + let diff_result = compare_document(&parsed_sexp, &rust_parsed)?; + diff_result.print()?; } + println!("Done."); shutdown_telemetry()?; Ok(()) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 77672d8..74c00c5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -16,4 +16,5 @@ mod source; mod util; pub use document::document; pub use document::Document; +pub use document::Section; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; From 276e8abb137ee5f0151a307c7fd2e767b874e2bb Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 18:34:11 -0400 Subject: [PATCH 12/18] Pass the source through to child diff status so I can calculate offsets. --- src/compare/diff.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 5c42e7d..6f624e9 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -60,6 +60,7 @@ pub fn compare_document<'s>( return Err("Section cannot be after the first child of document.".into()); } child_status.push(compare_section( + rust.source, token, rust.zeroth_section .as_ref() @@ -71,7 +72,7 @@ pub fn compare_document<'s>( .iter() .nth(i - rust.zeroth_section.as_ref().map(|_| 1).unwrap_or(0)) .ok_or("Should have a corresponding heading.")?; - child_status.push(compare_heading(token, rust)?); + child_status.push(compare_heading(rust.source, token, rust)?); } else { return Err("Document should only contain sections and headlines.".into()); } @@ -85,6 +86,7 @@ pub fn compare_document<'s>( } pub fn compare_section<'s>( + source: &'s str, emacs: &'s Token<'s>, rust: &'s Section<'s>, ) -> Result> { @@ -104,6 +106,7 @@ pub fn compare_section<'s>( } pub fn compare_heading<'s>( + source: &'s str, emacs: &'s Token<'s>, rust: &'s Document<'s>, ) -> Result> { From 52b401d548b0cf41d8378e29f19556a1c2f987ef Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 19:16:04 -0400 Subject: [PATCH 13/18] comparing begin and end offsets for top-level sections and headlines. --- src/compare/diff.rs | 52 +++++++++++++++++++++++++++++++++++++----- src/compare/mod.rs | 1 + src/compare/sexp.rs | 27 ++++++++++++++++++++++ src/compare/util.rs | 21 +++++++++++++++++ src/parser/document.rs | 12 ++++++++++ src/parser/mod.rs | 2 ++ 6 files changed, 109 insertions(+), 6 deletions(-) create mode 100644 src/compare/util.rs diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 6f624e9..798a29c 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,6 +1,8 @@ use super::sexp::Token; +use crate::compare::util::get_offsets; use crate::parser::Document; use crate::parser::Section; +use crate::parser::Heading; #[derive(Debug)] pub struct DiffResult { @@ -46,7 +48,7 @@ pub fn compare_document<'s>( return Err("Document should correspond to an org-data cell.".into()); } let mut child_status = Vec::new(); - // TODO: compare the children + let mut this_status = DiffStatus::Good; // Skipping "org-data" and the first parameter which is often nil for (i, token) in children.iter().skip(2).enumerate() { @@ -72,14 +74,14 @@ pub fn compare_document<'s>( .iter() .nth(i - rust.zeroth_section.as_ref().map(|_| 1).unwrap_or(0)) .ok_or("Should have a corresponding heading.")?; - child_status.push(compare_heading(rust.source, token, rust)?); + child_status.push(compare_heading(rust.source, token, corresponding_heading)?); } else { return Err("Document should only contain sections and headlines.".into()); } } Ok(DiffResult { - status: DiffStatus::Good, + status: this_status, name: "document".to_owned(), children: child_status, }) @@ -97,9 +99,28 @@ pub fn compare_section<'s>( return Err("Section should correspond to a section cell.".into()); } let mut child_status = Vec::new(); + let mut this_status = DiffStatus::Good; + + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let begin = attributes_map + .get(":begin") + .ok_or("Missing :begin attribute.")? + .as_atom()?; + let end = attributes_map + .get(":end") + .ok_or("Missing :end attribute.")? + .as_atom()?; + let (rust_begin, rust_end) = get_offsets(source, rust); + if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end { + this_status = DiffStatus::Bad; + } Ok(DiffResult { - status: DiffStatus::Good, + status: this_status, name: "section".to_owned(), children: child_status, }) @@ -108,7 +129,7 @@ pub fn compare_section<'s>( pub fn compare_heading<'s>( source: &'s str, emacs: &'s Token<'s>, - rust: &'s Document<'s>, + rust: &'s Heading<'s>, ) -> Result> { let children = emacs.as_list()?; let first_child = children.first().ok_or("Should have at least one child.")?; @@ -117,9 +138,28 @@ pub fn compare_heading<'s>( return Err("Heading should correspond to a headline cell.".into()); } let mut child_status = Vec::new(); + let mut this_status = DiffStatus::Good; + + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let begin = attributes_map + .get(":begin") + .ok_or("Missing :begin attribute.")? + .as_atom()?; + let end = attributes_map + .get(":end") + .ok_or("Missing :end attribute.")? + .as_atom()?; + let (rust_begin, rust_end) = get_offsets(source, rust); + if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end { + this_status = DiffStatus::Bad; + } Ok(DiffResult { - status: DiffStatus::Good, + status: this_status, name: "heading".to_owned(), children: child_status, }) diff --git a/src/compare/mod.rs b/src/compare/mod.rs index 4dbb825..07f9207 100644 --- a/src/compare/mod.rs +++ b/src/compare/mod.rs @@ -2,6 +2,7 @@ mod diff; mod error; mod parse; mod sexp; +mod util; pub use diff::compare_document; pub use parse::emacs_parse_org_document; pub use sexp::sexp; diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index 5257940..948181d 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use nom::branch::alt; use nom::bytes::complete::escaped; use nom::bytes::complete::tag; @@ -44,6 +46,31 @@ impl<'s> Token<'s> { _ => Err("wrong token type"), }?) } + + pub fn as_map<'p>( + &'p self, + ) -> Result>, Box> { + let mut hashmap = HashMap::new(); + + let children = self.as_list()?; + if children.len() % 2 != 0 { + return Err("Expecting an even number of children".into()); + } + let mut key: Option<&str> = None; + for child in children.iter() { + match key { + None => { + key = Some(child.as_atom()?); + } + Some(key_val) => { + key = None; + hashmap.insert(key_val, child); + } + }; + } + + Ok(hashmap) + } } #[tracing::instrument(ret, level = "debug")] diff --git a/src/compare/util.rs b/src/compare/util.rs new file mode 100644 index 0000000..1dd9463 --- /dev/null +++ b/src/compare/util.rs @@ -0,0 +1,21 @@ +use crate::parser::Source; + +/// Check if the child string slice is a slice of the parent string slice. +fn is_slice_of(parent: &str, child: &str) -> bool { + let parent_start = parent.as_ptr() as usize; + let parent_end = parent_start + parent.len(); + let child_start = child.as_ptr() as usize; + let child_end = child_start + child.len(); + child_start >= parent_start && child_end <= parent_end +} + +/// Get the offset into source that the rust object exists at. +/// +/// These offsets are zero-based unlike the elisp ones. +pub fn get_offsets<'s, S: Source<'s>>(source: &'s str, rust_object: &'s S) -> (usize, usize) { + let rust_object_source = rust_object.get_source(); + assert!(is_slice_of(source, rust_object_source)); + let offset = rust_object_source.as_ptr() as usize - source.as_ptr() as usize; + let end = offset + rust_object_source.len(); + (offset, end) +} diff --git a/src/parser/document.rs b/src/parser/document.rs index 83a4854..cf5addd 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -74,6 +74,18 @@ impl<'s> Source<'s> for DocumentElement<'s> { } } +impl<'s> Source<'s> for Section<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Heading<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + #[tracing::instrument(ret, level = "debug")] #[allow(dead_code)] pub fn document(input: &str) -> Res<&str, Document> { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 74c00c5..63bf5d8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17,4 +17,6 @@ mod util; pub use document::document; pub use document::Document; pub use document::Section; +pub use document::Heading; +pub use source::Source; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; From 52df37655333798ab27844e2c873b4c6facc1483 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 19:22:42 -0400 Subject: [PATCH 14/18] Calculate bad children on-the-fly. --- src/compare/diff.rs | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 798a29c..c4b15c8 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -11,10 +11,9 @@ pub struct DiffResult { children: Vec, } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum DiffStatus { Good, - ChildBad, Bad, } @@ -24,10 +23,22 @@ impl DiffResult { } fn print_indented(&self, indentation: usize) -> Result<(), Box> { + let status_text = { + match self.status { + DiffStatus::Good => { + if self.has_bad_children() { + "BADCHILD" + } else { + "GOOD" + } + }, + DiffStatus::Bad => "BAD", + } + }; println!( - "{}{:?} {}", + "{}{} {}", " ".repeat(indentation), - self.status, + status_text, self.name ); for child in self.children.iter() { @@ -35,6 +46,10 @@ impl DiffResult { } Ok(()) } + + pub fn has_bad_children(&self) -> bool { + self.children.iter().any(|child| {child.status == DiffStatus::Bad || child.has_bad_children()}) + } } pub fn compare_document<'s>( From a456acd1e954ba59ffe1213609fbff0243f1a142 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 19:51:20 -0400 Subject: [PATCH 15/18] Do not print the parsed sexp. These were particularly spammy. --- src/org_compare.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/org_compare.rs b/src/org_compare.rs index 9f1f615..b065720 100644 --- a/src/org_compare.rs +++ b/src/org_compare.rs @@ -17,7 +17,6 @@ fn main() -> Result<(), Box> { let org_sexp = emacs_parse_org_document(&org_path)?; println!("{}", org_sexp); let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).expect("Sexp Parse failure"); - println!("{:#?}", parsed_sexp); let (_remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure"); println!("{:#?}", rust_parsed); let diff_result = compare_document(&parsed_sexp, &rust_parsed)?; From 4dbf8b9c69acb6c5e7bf15dc0a024b21b5086d6b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 12 Apr 2023 11:17:21 -0400 Subject: [PATCH 16/18] Capture the trailing whitespace for the file as regular trailing whitespace for elements. This makes our ranges for sections and headlines match up with the emacs org-mode parser for the example at org_mode_samples/sections_and_headings/sections_and_headings.org. --- src/parser/parser_context.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 24d988d..9f578f9 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -1,9 +1,8 @@ use std::rc::Rc; +use nom::combinator::eof; use nom::IResult; -use crate::parser::util::whitespace_eof; - use super::error::CustomError; use super::error::MyError; use super::error::Res; @@ -25,7 +24,7 @@ impl<'r, 's> ContextTree<'r, 's> { pub fn branch_from(trunk: &Rc>>) -> Self { ContextTree { - tree: List::branch_from(trunk) + tree: List::branch_from(trunk), } } @@ -67,7 +66,7 @@ impl<'r, 's> ContextTree<'r, 's> { i: &'s str, ) -> IResult<&'s str, &'s str, CustomError<&'s str>> { // Special check for EOF. We don't just make this a document-level exit matcher since the IgnoreParent ChainBehavior could cause early exit matchers to not run. - let at_end_of_file = whitespace_eof(i); + let at_end_of_file = eof(i); if at_end_of_file.is_ok() { return at_end_of_file; } From c4e6549feb32f56d6f32b88cbf10c7fc836bb071 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 12 Apr 2023 11:35:02 -0400 Subject: [PATCH 17/18] Compare children of heading. --- src/compare/diff.rs | 27 ++++++++++++++++++--------- src/parser/mod.rs | 3 ++- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index c4b15c8..3e69a6a 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,8 +1,9 @@ use super::sexp::Token; use crate::compare::util::get_offsets; use crate::parser::Document; -use crate::parser::Section; +use crate::parser::DocumentElement; use crate::parser::Heading; +use crate::parser::Section; #[derive(Debug)] pub struct DiffResult { @@ -31,16 +32,11 @@ impl DiffResult { } else { "GOOD" } - }, + } DiffStatus::Bad => "BAD", } }; - println!( - "{}{} {}", - " ".repeat(indentation), - status_text, - self.name - ); + println!("{}{} {}", " ".repeat(indentation), status_text, self.name); for child in self.children.iter() { child.print_indented(indentation + 1)?; } @@ -48,7 +44,9 @@ impl DiffResult { } pub fn has_bad_children(&self) -> bool { - self.children.iter().any(|child| {child.status == DiffStatus::Bad || child.has_bad_children()}) + self.children + .iter() + .any(|child| child.status == DiffStatus::Bad || child.has_bad_children()) } } @@ -173,6 +171,17 @@ pub fn compare_heading<'s>( this_status = DiffStatus::Bad; } + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { + match rust_child { + DocumentElement::Heading(rust_heading) => { + child_status.push(compare_heading(source, emacs_child, rust_heading)?); + }, + DocumentElement::Section(rust_section) => { + child_status.push(compare_section(source, emacs_child, rust_section)?); + }, + }; + } + Ok(DiffResult { status: this_status, name: "heading".to_owned(), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 63bf5d8..9c810ce 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -16,7 +16,8 @@ mod source; mod util; pub use document::document; pub use document::Document; -pub use document::Section; +pub use document::DocumentElement; pub use document::Heading; +pub use document::Section; pub use source::Source; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; From 2a601475fd60eaced4946d48df7242d1b7e20f12 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 12 Apr 2023 11:46:49 -0400 Subject: [PATCH 18/18] Compare children of sections. --- src/compare/diff.rs | 60 +++++++++++++++++++++++++++++++++++++++++++ src/parser/element.rs | 6 +++++ src/parser/mod.rs | 2 ++ 3 files changed, 68 insertions(+) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 3e69a6a..415082e 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -4,6 +4,8 @@ use crate::parser::Document; use crate::parser::DocumentElement; use crate::parser::Heading; use crate::parser::Section; +use crate::parser::Paragraph; +use crate::parser::Element; #[derive(Debug)] pub struct DiffResult { @@ -132,6 +134,10 @@ pub fn compare_section<'s>( this_status = DiffStatus::Bad; } + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { + child_status.push(compare_element(source, emacs_child, rust_child)?); + } + Ok(DiffResult { status: this_status, name: "section".to_owned(), @@ -188,3 +194,57 @@ pub fn compare_heading<'s>( children: child_status, }) } + +pub fn compare_element<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Element<'s>, +) -> Result> { + match rust { + Element::Paragraph(obj) => compare_paragraph(source, emacs, obj), + Element::PlainList(_) => todo!(), + Element::GreaterBlock(_) => todo!(), + Element::FootnoteDefinition(_) => todo!(), + } +} + +pub fn compare_paragraph<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Paragraph<'s>, +) -> Result> { + let children = emacs.as_list()?; + let first_child = children.first().ok_or("Should have at least one child.")?.as_atom()?; + if first_child != "paragraph" { + return Err("Paragraph should correspond to a paragraph cell.".into()); + } + let mut child_status = Vec::new(); + let mut this_status = DiffStatus::Good; + + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let begin = attributes_map + .get(":begin") + .ok_or("Missing :begin attribute.")? + .as_atom()?; + let end = attributes_map + .get(":end") + .ok_or("Missing :end attribute.")? + .as_atom()?; + let (rust_begin, rust_end) = get_offsets(source, rust); + if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end { + this_status = DiffStatus::Bad; + } + + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { + } + + Ok(DiffResult { + status: this_status, + name: "paragraph".to_owned(), + children: child_status, + }) +} diff --git a/src/parser/element.rs b/src/parser/element.rs index f92a106..9a2d189 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -32,6 +32,12 @@ impl<'s> Source<'s> for Element<'s> { } } +impl<'s> Source<'s> for Paragraph<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + #[tracing::instrument(ret, level = "debug")] pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> { let non_paragraph_matcher = parser_with_context!(non_paragraph_element)(context); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9c810ce..c966331 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -19,5 +19,7 @@ pub use document::Document; pub use document::DocumentElement; pub use document::Heading; pub use document::Section; +pub use element::Element; +pub use lesser_element::Paragraph; pub use source::Source; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>;