diff --git a/Cargo.toml b/Cargo.toml index 627ec01..7041f50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,12 +3,17 @@ name = "toy" version = "0.1.0" edition = "2021" license = "0BSD" +default-run = "toy" [[bin]] name = "toy" path = "src/main.rs" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[[bin]] +name = "org_compare" +path = "src/org_compare.rs" + [dependencies] nom = "7.1.1" @@ -19,3 +24,5 @@ tracing-opentelemetry = "0.17.2" tracing-subscriber = {version="0.3.16", features=["env-filter"]} [features] +default = ["compare"] +compare = [] diff --git a/src/compare/diff.rs b/src/compare/diff.rs new file mode 100644 index 0000000..415082e --- /dev/null +++ b/src/compare/diff.rs @@ -0,0 +1,250 @@ +use super::sexp::Token; +use crate::compare::util::get_offsets; +use crate::parser::Document; +use crate::parser::DocumentElement; +use crate::parser::Heading; +use crate::parser::Section; +use crate::parser::Paragraph; +use crate::parser::Element; + +#[derive(Debug)] +pub struct DiffResult { + status: DiffStatus, + name: String, + children: Vec, +} + +#[derive(Debug, PartialEq)] +pub enum DiffStatus { + Good, + Bad, +} + +impl DiffResult { + pub fn print(&self) -> Result<(), Box> { + self.print_indented(0) + } + + fn print_indented(&self, indentation: usize) -> Result<(), Box> { + let status_text = { + match self.status { + DiffStatus::Good => { + if self.has_bad_children() { + "BADCHILD" + } else { + "GOOD" + } + } + DiffStatus::Bad => "BAD", + } + }; + println!("{}{} {}", " ".repeat(indentation), status_text, self.name); + for child in self.children.iter() { + child.print_indented(indentation + 1)?; + } + Ok(()) + } + + pub fn has_bad_children(&self) -> bool { + self.children + .iter() + .any(|child| child.status == DiffStatus::Bad || child.has_bad_children()) + } +} + +pub fn compare_document<'s>( + emacs: &'s Token<'s>, + rust: &'s Document<'s>, +) -> Result> { + let children = emacs.as_list()?; + let first_child = children.first().ok_or("Should have at least one child.")?; + let first_child_text = first_child.as_atom()?; + if first_child_text != "org-data" { + return Err("Document should correspond to an org-data cell.".into()); + } + let mut child_status = Vec::new(); + let mut this_status = DiffStatus::Good; + + // Skipping "org-data" and the first parameter which is often nil + for (i, token) in children.iter().skip(2).enumerate() { + let section_or_headline = token.as_list()?; + let first_cell = section_or_headline + .first() + .ok_or("Should have at least one child.")? + .as_atom()?; + if first_cell == "section" { + if i != 0 { + return Err("Section cannot be after the first child of document.".into()); + } + child_status.push(compare_section( + rust.source, + token, + rust.zeroth_section + .as_ref() + .ok_or("No corresponding zeroth-section")?, + )?); + } else if first_cell == "headline" { + let corresponding_heading = rust + .children + .iter() + .nth(i - rust.zeroth_section.as_ref().map(|_| 1).unwrap_or(0)) + .ok_or("Should have a corresponding heading.")?; + child_status.push(compare_heading(rust.source, token, corresponding_heading)?); + } else { + return Err("Document should only contain sections and headlines.".into()); + } + } + + Ok(DiffResult { + status: this_status, + name: "document".to_owned(), + children: child_status, + }) +} + +pub fn compare_section<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Section<'s>, +) -> Result> { + let children = emacs.as_list()?; + let first_child = children.first().ok_or("Should have at least one child.")?; + let first_child_text = first_child.as_atom()?; + if first_child_text != "section" { + return Err("Section should correspond to a section cell.".into()); + } + let mut child_status = Vec::new(); + let mut this_status = DiffStatus::Good; + + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let begin = attributes_map + .get(":begin") + .ok_or("Missing :begin attribute.")? + .as_atom()?; + let end = attributes_map + .get(":end") + .ok_or("Missing :end attribute.")? + .as_atom()?; + let (rust_begin, rust_end) = get_offsets(source, rust); + if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end { + this_status = DiffStatus::Bad; + } + + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { + child_status.push(compare_element(source, emacs_child, rust_child)?); + } + + Ok(DiffResult { + status: this_status, + name: "section".to_owned(), + children: child_status, + }) +} + +pub fn compare_heading<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Heading<'s>, +) -> Result> { + let children = emacs.as_list()?; + let first_child = children.first().ok_or("Should have at least one child.")?; + let first_child_text = first_child.as_atom()?; + if first_child_text != "headline" { + return Err("Heading should correspond to a headline cell.".into()); + } + let mut child_status = Vec::new(); + let mut this_status = DiffStatus::Good; + + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let begin = attributes_map + .get(":begin") + .ok_or("Missing :begin attribute.")? + .as_atom()?; + let end = attributes_map + .get(":end") + .ok_or("Missing :end attribute.")? + .as_atom()?; + let (rust_begin, rust_end) = get_offsets(source, rust); + if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end { + this_status = DiffStatus::Bad; + } + + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { + match rust_child { + DocumentElement::Heading(rust_heading) => { + child_status.push(compare_heading(source, emacs_child, rust_heading)?); + }, + DocumentElement::Section(rust_section) => { + child_status.push(compare_section(source, emacs_child, rust_section)?); + }, + }; + } + + Ok(DiffResult { + status: this_status, + name: "heading".to_owned(), + children: child_status, + }) +} + +pub fn compare_element<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Element<'s>, +) -> Result> { + match rust { + Element::Paragraph(obj) => compare_paragraph(source, emacs, obj), + Element::PlainList(_) => todo!(), + Element::GreaterBlock(_) => todo!(), + Element::FootnoteDefinition(_) => todo!(), + } +} + +pub fn compare_paragraph<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Paragraph<'s>, +) -> Result> { + let children = emacs.as_list()?; + let first_child = children.first().ok_or("Should have at least one child.")?.as_atom()?; + if first_child != "paragraph" { + return Err("Paragraph should correspond to a paragraph cell.".into()); + } + let mut child_status = Vec::new(); + let mut this_status = DiffStatus::Good; + + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let begin = attributes_map + .get(":begin") + .ok_or("Missing :begin attribute.")? + .as_atom()?; + let end = attributes_map + .get(":end") + .ok_or("Missing :end attribute.")? + .as_atom()?; + let (rust_begin, rust_end) = get_offsets(source, rust); + if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end { + this_status = DiffStatus::Bad; + } + + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { + } + + Ok(DiffResult { + status: this_status, + name: "paragraph".to_owned(), + children: child_status, + }) +} diff --git a/src/compare/error.rs b/src/compare/error.rs new file mode 100644 index 0000000..eb23965 --- /dev/null +++ b/src/compare/error.rs @@ -0,0 +1,25 @@ +use nom::error::ErrorKind; +use nom::error::ParseError; +use nom::IResult; + +pub type Res = IResult>; + +#[derive(Debug, PartialEq)] +pub enum CustomError { + MyError(MyError), + Nom(I, ErrorKind), +} + +#[derive(Debug, PartialEq)] +pub struct MyError(pub I); + +impl ParseError for CustomError { + fn from_error_kind(input: I, kind: ErrorKind) -> Self { + CustomError::Nom(input, kind) + } + + fn append(_input: I, _kind: ErrorKind, mut other: Self) -> Self { + // Doesn't do append like VerboseError + other + } +} diff --git a/src/compare/mod.rs b/src/compare/mod.rs new file mode 100644 index 0000000..07f9207 --- /dev/null +++ b/src/compare/mod.rs @@ -0,0 +1,8 @@ +mod diff; +mod error; +mod parse; +mod sexp; +mod util; +pub use diff::compare_document; +pub use parse::emacs_parse_org_document; +pub use sexp::sexp; diff --git a/src/compare/parse.rs b/src/compare/parse.rs new file mode 100644 index 0000000..03e5b52 --- /dev/null +++ b/src/compare/parse.rs @@ -0,0 +1,37 @@ +use std::path::Path; +use std::process::Command; + +use crate::compare::sexp::sexp; + +pub fn compare_parse_org_document<'a, C>(file_path: C) -> Result> +where + C: AsRef, +{ + let org_sexp = emacs_parse_org_document(file_path)?; + let parsed_sexp = sexp(org_sexp.as_str()).expect("Parse failure"); + todo!() +} + +pub fn emacs_parse_org_document<'a, C>(file_path: C) -> Result> +where + C: AsRef, +{ + let elisp_script = r#"(progn + (org-mode) + (message "%s" (pp-to-string (org-element-parse-buffer))) +)"#; + let mut cmd = Command::new("emacs"); + let proc = cmd + .arg("-q") + .arg("--no-site-file") + .arg("--no-splash") + .arg("--batch") + .arg("--insert") + .arg(file_path.as_ref().as_os_str()) + .arg("--eval") + .arg(elisp_script); + let out = proc.output()?; + out.status.exit_ok()?; + let org_sexp = out.stderr; + Ok(String::from_utf8(org_sexp)?) +} diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs new file mode 100644 index 0000000..948181d --- /dev/null +++ b/src/compare/sexp.rs @@ -0,0 +1,212 @@ +use std::collections::HashMap; + +use nom::branch::alt; +use nom::bytes::complete::escaped; +use nom::bytes::complete::tag; +use nom::bytes::complete::take_till1; +use nom::character::complete::multispace0; +use nom::character::complete::multispace1; +use nom::character::complete::one_of; +use nom::combinator::map; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::peek; +use nom::combinator::verify; +use nom::multi::separated_list1; +use nom::sequence::delimited; +use nom::sequence::preceded; +use nom::sequence::tuple; + +use super::error::Res; + +#[derive(Debug)] +pub enum Token<'s> { + Atom(&'s str), + List(Vec>), + TextWithProperties(TextWithProperties<'s>), +} + +#[derive(Debug)] +pub struct TextWithProperties<'s> { + text: &'s str, + properties: Vec>, +} + +impl<'s> Token<'s> { + pub fn as_list<'p>(&'p self) -> Result<&'p Vec>, Box> { + Ok(match self { + Token::List(children) => Ok(children), + _ => Err("wrong token type"), + }?) + } + + pub fn as_atom<'p>(&'p self) -> Result<&'s str, Box> { + Ok(match self { + Token::Atom(body) => Ok(*body), + _ => Err("wrong token type"), + }?) + } + + pub fn as_map<'p>( + &'p self, + ) -> Result>, Box> { + let mut hashmap = HashMap::new(); + + let children = self.as_list()?; + if children.len() % 2 != 0 { + return Err("Expecting an even number of children".into()); + } + let mut key: Option<&str> = None; + for child in children.iter() { + match key { + None => { + key = Some(child.as_atom()?); + } + Some(key_val) => { + key = None; + hashmap.insert(key_val, child); + } + }; + } + + Ok(hashmap) + } +} + +#[tracing::instrument(ret, level = "debug")] +pub fn sexp<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, _) = multispace0(input)?; + let (remaining, tkn) = token(remaining)?; + let (remaining, _) = multispace0(remaining)?; + Ok((remaining, tkn)) +} + +#[tracing::instrument(ret, level = "debug")] +fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + alt((list, atom))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, _) = tag("(")(input)?; + let (remaining, children) = delimited( + multispace0, + separated_list1(multispace1, token), + multispace0, + )(remaining)?; + let (remaining, _) = tag(")")(remaining)?; + Ok((remaining, Token::List(children))) +} + +#[tracing::instrument(ret, level = "debug")] +fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + not(peek(tag(")")))(input)?; + alt((text_with_properties, quoted_atom, unquoted_atom))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, body) = take_till1(|c| match c { + ' ' | '\t' | '\r' | '\n' | ')' => true, + _ => false, + })(input)?; + Ok((remaining, Token::Atom(body))) +} + +#[tracing::instrument(ret, level = "debug")] +fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, _) = tag(r#"""#)(input)?; + let (remaining, _) = escaped( + take_till1(|c| match c { + '\\' | '"' | ')' => true, + _ => false, + }), + '\\', + one_of(r#""n"#), + )(remaining)?; + let (remaining, _) = tag(r#"""#)(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Token::Atom(source))) +} + +fn text_with_properties<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, _) = tag("#(")(input)?; + let (remaining, (text, props)) = delimited( + multispace0, + tuple(( + map(quoted_atom, |atom| match atom { + Token::Atom(body) => body, + _ => unreachable!(), + }), + preceded(multispace1, opt(separated_list1(multispace1, token))), + )), + multispace0, + )(remaining)?; + let (remaining, _) = tag(")")(remaining)?; + Ok(( + remaining, + Token::TextWithProperties(TextWithProperties { + text, + properties: props.unwrap_or(Vec::new()), + }), + )) +} + +/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. +fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { + assert!(is_slice_of(input, remaining)); + let source = { + let offset = remaining.as_ptr() as usize - input.as_ptr() as usize; + &input[..offset] + }; + source +} + +/// Check if the child string slice is a slice of the parent string slice. +fn is_slice_of(parent: &str, child: &str) -> bool { + let parent_start = parent.as_ptr() as usize; + let parent_end = parent_start + parent.len(); + let child_start = child.as_ptr() as usize; + let child_end = child_start + child.len(); + child_start >= parent_start && child_end <= parent_end +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple() { + let input = " (foo bar baz ) "; + let (remaining, parsed) = sexp(input).expect("Parse the input"); + assert_eq!(remaining, ""); + assert!(match parsed { + Token::Atom(_) => false, + Token::List(_) => true, + Token::TextWithProperties(_) => false, + }); + } + + #[test] + fn quoted() { + let input = r#" ("foo" bar baz ) "#; + let (remaining, parsed) = sexp(input).expect("Parse the input"); + assert_eq!(remaining, ""); + assert!(match parsed { + Token::Atom(_) => false, + Token::List(_) => true, + Token::TextWithProperties(_) => false, + }); + let children = match parsed { + Token::List(children) => children, + _ => panic!("Should be a list."), + }; + assert_eq!( + match children.first() { + Some(Token::Atom(body)) => *body, + _ => panic!("First child should be an atom."), + }, + r#""foo""# + ) + } +} diff --git a/src/compare/util.rs b/src/compare/util.rs new file mode 100644 index 0000000..1dd9463 --- /dev/null +++ b/src/compare/util.rs @@ -0,0 +1,21 @@ +use crate::parser::Source; + +/// Check if the child string slice is a slice of the parent string slice. +fn is_slice_of(parent: &str, child: &str) -> bool { + let parent_start = parent.as_ptr() as usize; + let parent_end = parent_start + parent.len(); + let child_start = child.as_ptr() as usize; + let child_end = child_start + child.len(); + child_start >= parent_start && child_end <= parent_end +} + +/// Get the offset into source that the rust object exists at. +/// +/// These offsets are zero-based unlike the elisp ones. +pub fn get_offsets<'s, S: Source<'s>>(source: &'s str, rust_object: &'s S) -> (usize, usize) { + let rust_object_source = rust_object.get_source(); + assert!(is_slice_of(source, rust_object_source)); + let offset = rust_object_source.as_ptr() as usize - source.as_ptr() as usize; + let end = offset + rust_object_source.len(); + (offset, end) +} diff --git a/src/init_tracing.rs b/src/init_tracing.rs new file mode 100644 index 0000000..171de8d --- /dev/null +++ b/src/init_tracing.rs @@ -0,0 +1,34 @@ +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::EnvFilter; + +pub fn init_telemetry() -> Result<(), Box> { + let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN")); + + // let stdout = tracing_subscriber::fmt::Layer::new() + // .pretty() + // .with_file(true) + // .with_line_number(true) + // .with_thread_ids(false) + // .with_target(false); + + opentelemetry::global::set_text_map_propagator(opentelemetry_jaeger::Propagator::new()); + let tracer = opentelemetry_jaeger::new_pipeline() + .with_service_name("toy_language") + .install_simple()?; + + let opentelemetry = tracing_opentelemetry::layer().with_tracer(tracer); + + tracing_subscriber::registry() + .with(env_filter) + .with(opentelemetry) + // .with(stdout) + .try_init()?; + + Ok(()) +} + +pub fn shutdown_telemetry() -> Result<(), Box> { + opentelemetry::global::shutdown_tracer_provider(); + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index 8f1e4f1..6201ac9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,9 @@ #![feature(round_char_boundary)] +use crate::init_tracing::init_telemetry; +use crate::init_tracing::shutdown_telemetry; use crate::parser::document; -use tracing_subscriber::EnvFilter; +mod init_tracing; mod parser; -use tracing_subscriber::layer::SubscriberExt; -use tracing_subscriber::util::SubscriberInitExt; const TEST_DOC: &'static str = include_str!("../toy_language.txt"); @@ -12,31 +12,6 @@ fn main() -> Result<(), Box> { let parsed = document(TEST_DOC); println!("{}\n\n\n", TEST_DOC); println!("{:#?}", parsed); - opentelemetry::global::shutdown_tracer_provider(); - Ok(()) -} - -fn init_telemetry() -> Result<(), Box> { - let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN")); - - // let stdout = tracing_subscriber::fmt::Layer::new() - // .pretty() - // .with_file(true) - // .with_line_number(true) - // .with_thread_ids(false) - // .with_target(false); - - opentelemetry::global::set_text_map_propagator(opentelemetry_jaeger::Propagator::new()); - let tracer = opentelemetry_jaeger::new_pipeline() - .with_service_name("toy_language") - .install_simple()?; - - let opentelemetry = tracing_opentelemetry::layer().with_tracer(tracer); - - tracing_subscriber::registry() - .with(env_filter) - .with(opentelemetry) - // .with(stdout) - .try_init()?; + shutdown_telemetry()?; Ok(()) } diff --git a/src/org_compare.rs b/src/org_compare.rs new file mode 100644 index 0000000..b065720 --- /dev/null +++ b/src/org_compare.rs @@ -0,0 +1,28 @@ +#![feature(round_char_boundary)] +#![feature(exit_status_error)] +use crate::compare::compare_document; +use crate::init_tracing::init_telemetry; +use crate::init_tracing::shutdown_telemetry; +use crate::parser::document; +use compare::emacs_parse_org_document; +use compare::sexp; +mod compare; +mod init_tracing; +mod parser; + +fn main() -> Result<(), Box> { + init_telemetry()?; + for org_path in std::env::args().skip(1) { + let org_contents = std::fs::read_to_string(&org_path)?; + let org_sexp = emacs_parse_org_document(&org_path)?; + println!("{}", org_sexp); + let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).expect("Sexp Parse failure"); + let (_remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure"); + println!("{:#?}", rust_parsed); + let diff_result = compare_document(&parsed_sexp, &rust_parsed)?; + diff_result.print()?; + } + println!("Done."); + shutdown_telemetry()?; + Ok(()) +} diff --git a/src/parser/document.rs b/src/parser/document.rs index 83a4854..cf5addd 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -74,6 +74,18 @@ impl<'s> Source<'s> for DocumentElement<'s> { } } +impl<'s> Source<'s> for Section<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Heading<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + #[tracing::instrument(ret, level = "debug")] #[allow(dead_code)] pub fn document(input: &str) -> Res<&str, Document> { diff --git a/src/parser/element.rs b/src/parser/element.rs index f92a106..9a2d189 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -32,6 +32,12 @@ impl<'s> Source<'s> for Element<'s> { } } +impl<'s> Source<'s> for Paragraph<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + #[tracing::instrument(ret, level = "debug")] pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> { let non_paragraph_matcher = parser_with_context!(non_paragraph_element)(context); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8f3915e..c966331 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15,4 +15,11 @@ mod plain_text; mod source; mod util; pub use document::document; +pub use document::Document; +pub use document::DocumentElement; +pub use document::Heading; +pub use document::Section; +pub use element::Element; +pub use lesser_element::Paragraph; +pub use source::Source; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 24d988d..9f578f9 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -1,9 +1,8 @@ use std::rc::Rc; +use nom::combinator::eof; use nom::IResult; -use crate::parser::util::whitespace_eof; - use super::error::CustomError; use super::error::MyError; use super::error::Res; @@ -25,7 +24,7 @@ impl<'r, 's> ContextTree<'r, 's> { pub fn branch_from(trunk: &Rc>>) -> Self { ContextTree { - tree: List::branch_from(trunk) + tree: List::branch_from(trunk), } } @@ -67,7 +66,7 @@ impl<'r, 's> ContextTree<'r, 's> { i: &'s str, ) -> IResult<&'s str, &'s str, CustomError<&'s str>> { // Special check for EOF. We don't just make this a document-level exit matcher since the IgnoreParent ChainBehavior could cause early exit matchers to not run. - let at_end_of_file = whitespace_eof(i); + let at_end_of_file = eof(i); if at_end_of_file.is_ok() { return at_end_of_file; }