Merge branch 'compare'

This commit is contained in:
Tom Alexander 2023-04-12 12:20:55 -04:00
commit 7d5eb7c6bb
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
14 changed files with 655 additions and 34 deletions

View File

@ -3,12 +3,17 @@ name = "toy"
version = "0.1.0"
edition = "2021"
license = "0BSD"
default-run = "toy"
[[bin]]
name = "toy"
path = "src/main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[[bin]]
name = "org_compare"
path = "src/org_compare.rs"
[dependencies]
nom = "7.1.1"
@ -19,3 +24,5 @@ tracing-opentelemetry = "0.17.2"
tracing-subscriber = {version="0.3.16", features=["env-filter"]}
[features]
default = ["compare"]
compare = []

250
src/compare/diff.rs Normal file
View File

@ -0,0 +1,250 @@
use super::sexp::Token;
use crate::compare::util::get_offsets;
use crate::parser::Document;
use crate::parser::DocumentElement;
use crate::parser::Heading;
use crate::parser::Section;
use crate::parser::Paragraph;
use crate::parser::Element;
#[derive(Debug)]
pub struct DiffResult {
status: DiffStatus,
name: String,
children: Vec<DiffResult>,
}
#[derive(Debug, PartialEq)]
pub enum DiffStatus {
Good,
Bad,
}
impl DiffResult {
pub fn print(&self) -> Result<(), Box<dyn std::error::Error>> {
self.print_indented(0)
}
fn print_indented(&self, indentation: usize) -> Result<(), Box<dyn std::error::Error>> {
let status_text = {
match self.status {
DiffStatus::Good => {
if self.has_bad_children() {
"BADCHILD"
} else {
"GOOD"
}
}
DiffStatus::Bad => "BAD",
}
};
println!("{}{} {}", " ".repeat(indentation), status_text, self.name);
for child in self.children.iter() {
child.print_indented(indentation + 1)?;
}
Ok(())
}
pub fn has_bad_children(&self) -> bool {
self.children
.iter()
.any(|child| child.status == DiffStatus::Bad || child.has_bad_children())
}
}
pub fn compare_document<'s>(
emacs: &'s Token<'s>,
rust: &'s Document<'s>,
) -> Result<DiffResult, Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let first_child = children.first().ok_or("Should have at least one child.")?;
let first_child_text = first_child.as_atom()?;
if first_child_text != "org-data" {
return Err("Document should correspond to an org-data cell.".into());
}
let mut child_status = Vec::new();
let mut this_status = DiffStatus::Good;
// Skipping "org-data" and the first parameter which is often nil
for (i, token) in children.iter().skip(2).enumerate() {
let section_or_headline = token.as_list()?;
let first_cell = section_or_headline
.first()
.ok_or("Should have at least one child.")?
.as_atom()?;
if first_cell == "section" {
if i != 0 {
return Err("Section cannot be after the first child of document.".into());
}
child_status.push(compare_section(
rust.source,
token,
rust.zeroth_section
.as_ref()
.ok_or("No corresponding zeroth-section")?,
)?);
} else if first_cell == "headline" {
let corresponding_heading = rust
.children
.iter()
.nth(i - rust.zeroth_section.as_ref().map(|_| 1).unwrap_or(0))
.ok_or("Should have a corresponding heading.")?;
child_status.push(compare_heading(rust.source, token, corresponding_heading)?);
} else {
return Err("Document should only contain sections and headlines.".into());
}
}
Ok(DiffResult {
status: this_status,
name: "document".to_owned(),
children: child_status,
})
}
pub fn compare_section<'s>(
source: &'s str,
emacs: &'s Token<'s>,
rust: &'s Section<'s>,
) -> Result<DiffResult, Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let first_child = children.first().ok_or("Should have at least one child.")?;
let first_child_text = first_child.as_atom()?;
if first_child_text != "section" {
return Err("Section should correspond to a section cell.".into());
}
let mut child_status = Vec::new();
let mut this_status = DiffStatus::Good;
let attributes_child = children
.iter()
.nth(1)
.ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let begin = attributes_map
.get(":begin")
.ok_or("Missing :begin attribute.")?
.as_atom()?;
let end = attributes_map
.get(":end")
.ok_or("Missing :end attribute.")?
.as_atom()?;
let (rust_begin, rust_end) = get_offsets(source, rust);
if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end {
this_status = DiffStatus::Bad;
}
for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) {
child_status.push(compare_element(source, emacs_child, rust_child)?);
}
Ok(DiffResult {
status: this_status,
name: "section".to_owned(),
children: child_status,
})
}
pub fn compare_heading<'s>(
source: &'s str,
emacs: &'s Token<'s>,
rust: &'s Heading<'s>,
) -> Result<DiffResult, Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let first_child = children.first().ok_or("Should have at least one child.")?;
let first_child_text = first_child.as_atom()?;
if first_child_text != "headline" {
return Err("Heading should correspond to a headline cell.".into());
}
let mut child_status = Vec::new();
let mut this_status = DiffStatus::Good;
let attributes_child = children
.iter()
.nth(1)
.ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let begin = attributes_map
.get(":begin")
.ok_or("Missing :begin attribute.")?
.as_atom()?;
let end = attributes_map
.get(":end")
.ok_or("Missing :end attribute.")?
.as_atom()?;
let (rust_begin, rust_end) = get_offsets(source, rust);
if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end {
this_status = DiffStatus::Bad;
}
for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) {
match rust_child {
DocumentElement::Heading(rust_heading) => {
child_status.push(compare_heading(source, emacs_child, rust_heading)?);
},
DocumentElement::Section(rust_section) => {
child_status.push(compare_section(source, emacs_child, rust_section)?);
},
};
}
Ok(DiffResult {
status: this_status,
name: "heading".to_owned(),
children: child_status,
})
}
pub fn compare_element<'s>(
source: &'s str,
emacs: &'s Token<'s>,
rust: &'s Element<'s>,
) -> Result<DiffResult, Box<dyn std::error::Error>> {
match rust {
Element::Paragraph(obj) => compare_paragraph(source, emacs, obj),
Element::PlainList(_) => todo!(),
Element::GreaterBlock(_) => todo!(),
Element::FootnoteDefinition(_) => todo!(),
}
}
pub fn compare_paragraph<'s>(
source: &'s str,
emacs: &'s Token<'s>,
rust: &'s Paragraph<'s>,
) -> Result<DiffResult, Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let first_child = children.first().ok_or("Should have at least one child.")?.as_atom()?;
if first_child != "paragraph" {
return Err("Paragraph should correspond to a paragraph cell.".into());
}
let mut child_status = Vec::new();
let mut this_status = DiffStatus::Good;
let attributes_child = children
.iter()
.nth(1)
.ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let begin = attributes_map
.get(":begin")
.ok_or("Missing :begin attribute.")?
.as_atom()?;
let end = attributes_map
.get(":end")
.ok_or("Missing :end attribute.")?
.as_atom()?;
let (rust_begin, rust_end) = get_offsets(source, rust);
if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end {
this_status = DiffStatus::Bad;
}
for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) {
}
Ok(DiffResult {
status: this_status,
name: "paragraph".to_owned(),
children: child_status,
})
}

25
src/compare/error.rs Normal file
View File

@ -0,0 +1,25 @@
use nom::error::ErrorKind;
use nom::error::ParseError;
use nom::IResult;
pub type Res<T, U> = IResult<T, U, CustomError<T>>;
#[derive(Debug, PartialEq)]
pub enum CustomError<I> {
MyError(MyError<I>),
Nom(I, ErrorKind),
}
#[derive(Debug, PartialEq)]
pub struct MyError<I>(pub I);
impl<I> ParseError<I> for CustomError<I> {
fn from_error_kind(input: I, kind: ErrorKind) -> Self {
CustomError::Nom(input, kind)
}
fn append(_input: I, _kind: ErrorKind, mut other: Self) -> Self {
// Doesn't do append like VerboseError
other
}
}

8
src/compare/mod.rs Normal file
View File

@ -0,0 +1,8 @@
mod diff;
mod error;
mod parse;
mod sexp;
mod util;
pub use diff::compare_document;
pub use parse::emacs_parse_org_document;
pub use sexp::sexp;

37
src/compare/parse.rs Normal file
View File

@ -0,0 +1,37 @@
use std::path::Path;
use std::process::Command;
use crate::compare::sexp::sexp;
pub fn compare_parse_org_document<'a, C>(file_path: C) -> Result<String, Box<dyn std::error::Error>>
where
C: AsRef<Path>,
{
let org_sexp = emacs_parse_org_document(file_path)?;
let parsed_sexp = sexp(org_sexp.as_str()).expect("Parse failure");
todo!()
}
pub fn emacs_parse_org_document<'a, C>(file_path: C) -> Result<String, Box<dyn std::error::Error>>
where
C: AsRef<Path>,
{
let elisp_script = r#"(progn
(org-mode)
(message "%s" (pp-to-string (org-element-parse-buffer)))
)"#;
let mut cmd = Command::new("emacs");
let proc = cmd
.arg("-q")
.arg("--no-site-file")
.arg("--no-splash")
.arg("--batch")
.arg("--insert")
.arg(file_path.as_ref().as_os_str())
.arg("--eval")
.arg(elisp_script);
let out = proc.output()?;
out.status.exit_ok()?;
let org_sexp = out.stderr;
Ok(String::from_utf8(org_sexp)?)
}

212
src/compare/sexp.rs Normal file
View File

@ -0,0 +1,212 @@
use std::collections::HashMap;
use nom::branch::alt;
use nom::bytes::complete::escaped;
use nom::bytes::complete::tag;
use nom::bytes::complete::take_till1;
use nom::character::complete::multispace0;
use nom::character::complete::multispace1;
use nom::character::complete::one_of;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::verify;
use nom::multi::separated_list1;
use nom::sequence::delimited;
use nom::sequence::preceded;
use nom::sequence::tuple;
use super::error::Res;
#[derive(Debug)]
pub enum Token<'s> {
Atom(&'s str),
List(Vec<Token<'s>>),
TextWithProperties(TextWithProperties<'s>),
}
#[derive(Debug)]
pub struct TextWithProperties<'s> {
text: &'s str,
properties: Vec<Token<'s>>,
}
impl<'s> Token<'s> {
pub fn as_list<'p>(&'p self) -> Result<&'p Vec<Token<'s>>, Box<dyn std::error::Error>> {
Ok(match self {
Token::List(children) => Ok(children),
_ => Err("wrong token type"),
}?)
}
pub fn as_atom<'p>(&'p self) -> Result<&'s str, Box<dyn std::error::Error>> {
Ok(match self {
Token::Atom(body) => Ok(*body),
_ => Err("wrong token type"),
}?)
}
pub fn as_map<'p>(
&'p self,
) -> Result<HashMap<&'s str, &'p Token<'s>>, Box<dyn std::error::Error>> {
let mut hashmap = HashMap::new();
let children = self.as_list()?;
if children.len() % 2 != 0 {
return Err("Expecting an even number of children".into());
}
let mut key: Option<&str> = None;
for child in children.iter() {
match key {
None => {
key = Some(child.as_atom()?);
}
Some(key_val) => {
key = None;
hashmap.insert(key_val, child);
}
};
}
Ok(hashmap)
}
}
#[tracing::instrument(ret, level = "debug")]
pub fn sexp<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = multispace0(input)?;
let (remaining, tkn) = token(remaining)?;
let (remaining, _) = multispace0(remaining)?;
Ok((remaining, tkn))
}
#[tracing::instrument(ret, level = "debug")]
fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
alt((list, atom))(input)
}
#[tracing::instrument(ret, level = "debug")]
fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag("(")(input)?;
let (remaining, children) = delimited(
multispace0,
separated_list1(multispace1, token),
multispace0,
)(remaining)?;
let (remaining, _) = tag(")")(remaining)?;
Ok((remaining, Token::List(children)))
}
#[tracing::instrument(ret, level = "debug")]
fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
not(peek(tag(")")))(input)?;
alt((text_with_properties, quoted_atom, unquoted_atom))(input)
}
#[tracing::instrument(ret, level = "debug")]
fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, body) = take_till1(|c| match c {
' ' | '\t' | '\r' | '\n' | ')' => true,
_ => false,
})(input)?;
Ok((remaining, Token::Atom(body)))
}
#[tracing::instrument(ret, level = "debug")]
fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag(r#"""#)(input)?;
let (remaining, _) = escaped(
take_till1(|c| match c {
'\\' | '"' | ')' => true,
_ => false,
}),
'\\',
one_of(r#""n"#),
)(remaining)?;
let (remaining, _) = tag(r#"""#)(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Token::Atom(source)))
}
fn text_with_properties<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag("#(")(input)?;
let (remaining, (text, props)) = delimited(
multispace0,
tuple((
map(quoted_atom, |atom| match atom {
Token::Atom(body) => body,
_ => unreachable!(),
}),
preceded(multispace1, opt(separated_list1(multispace1, token))),
)),
multispace0,
)(remaining)?;
let (remaining, _) = tag(")")(remaining)?;
Ok((
remaining,
Token::TextWithProperties(TextWithProperties {
text,
properties: props.unwrap_or(Vec::new()),
}),
))
}
/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser.
fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
assert!(is_slice_of(input, remaining));
let source = {
let offset = remaining.as_ptr() as usize - input.as_ptr() as usize;
&input[..offset]
};
source
}
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple() {
let input = " (foo bar baz ) ";
let (remaining, parsed) = sexp(input).expect("Parse the input");
assert_eq!(remaining, "");
assert!(match parsed {
Token::Atom(_) => false,
Token::List(_) => true,
Token::TextWithProperties(_) => false,
});
}
#[test]
fn quoted() {
let input = r#" ("foo" bar baz ) "#;
let (remaining, parsed) = sexp(input).expect("Parse the input");
assert_eq!(remaining, "");
assert!(match parsed {
Token::Atom(_) => false,
Token::List(_) => true,
Token::TextWithProperties(_) => false,
});
let children = match parsed {
Token::List(children) => children,
_ => panic!("Should be a list."),
};
assert_eq!(
match children.first() {
Some(Token::Atom(body)) => *body,
_ => panic!("First child should be an atom."),
},
r#""foo""#
)
}
}

21
src/compare/util.rs Normal file
View File

@ -0,0 +1,21 @@
use crate::parser::Source;
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
/// Get the offset into source that the rust object exists at.
///
/// These offsets are zero-based unlike the elisp ones.
pub fn get_offsets<'s, S: Source<'s>>(source: &'s str, rust_object: &'s S) -> (usize, usize) {
let rust_object_source = rust_object.get_source();
assert!(is_slice_of(source, rust_object_source));
let offset = rust_object_source.as_ptr() as usize - source.as_ptr() as usize;
let end = offset + rust_object_source.len();
(offset, end)
}

34
src/init_tracing.rs Normal file
View File

@ -0,0 +1,34 @@
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
use tracing_subscriber::EnvFilter;
pub fn init_telemetry() -> Result<(), Box<dyn std::error::Error>> {
let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN"));
// let stdout = tracing_subscriber::fmt::Layer::new()
// .pretty()
// .with_file(true)
// .with_line_number(true)
// .with_thread_ids(false)
// .with_target(false);
opentelemetry::global::set_text_map_propagator(opentelemetry_jaeger::Propagator::new());
let tracer = opentelemetry_jaeger::new_pipeline()
.with_service_name("toy_language")
.install_simple()?;
let opentelemetry = tracing_opentelemetry::layer().with_tracer(tracer);
tracing_subscriber::registry()
.with(env_filter)
.with(opentelemetry)
// .with(stdout)
.try_init()?;
Ok(())
}
pub fn shutdown_telemetry() -> Result<(), Box<dyn std::error::Error>> {
opentelemetry::global::shutdown_tracer_provider();
Ok(())
}

View File

@ -1,9 +1,9 @@
#![feature(round_char_boundary)]
use crate::init_tracing::init_telemetry;
use crate::init_tracing::shutdown_telemetry;
use crate::parser::document;
use tracing_subscriber::EnvFilter;
mod init_tracing;
mod parser;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
const TEST_DOC: &'static str = include_str!("../toy_language.txt");
@ -12,31 +12,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let parsed = document(TEST_DOC);
println!("{}\n\n\n", TEST_DOC);
println!("{:#?}", parsed);
opentelemetry::global::shutdown_tracer_provider();
Ok(())
}
fn init_telemetry() -> Result<(), Box<dyn std::error::Error>> {
let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN"));
// let stdout = tracing_subscriber::fmt::Layer::new()
// .pretty()
// .with_file(true)
// .with_line_number(true)
// .with_thread_ids(false)
// .with_target(false);
opentelemetry::global::set_text_map_propagator(opentelemetry_jaeger::Propagator::new());
let tracer = opentelemetry_jaeger::new_pipeline()
.with_service_name("toy_language")
.install_simple()?;
let opentelemetry = tracing_opentelemetry::layer().with_tracer(tracer);
tracing_subscriber::registry()
.with(env_filter)
.with(opentelemetry)
// .with(stdout)
.try_init()?;
shutdown_telemetry()?;
Ok(())
}

28
src/org_compare.rs Normal file
View File

@ -0,0 +1,28 @@
#![feature(round_char_boundary)]
#![feature(exit_status_error)]
use crate::compare::compare_document;
use crate::init_tracing::init_telemetry;
use crate::init_tracing::shutdown_telemetry;
use crate::parser::document;
use compare::emacs_parse_org_document;
use compare::sexp;
mod compare;
mod init_tracing;
mod parser;
fn main() -> Result<(), Box<dyn std::error::Error>> {
init_telemetry()?;
for org_path in std::env::args().skip(1) {
let org_contents = std::fs::read_to_string(&org_path)?;
let org_sexp = emacs_parse_org_document(&org_path)?;
println!("{}", org_sexp);
let (_remaining, parsed_sexp) = sexp(org_sexp.as_str()).expect("Sexp Parse failure");
let (_remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure");
println!("{:#?}", rust_parsed);
let diff_result = compare_document(&parsed_sexp, &rust_parsed)?;
diff_result.print()?;
}
println!("Done.");
shutdown_telemetry()?;
Ok(())
}

View File

@ -74,6 +74,18 @@ impl<'s> Source<'s> for DocumentElement<'s> {
}
}
impl<'s> Source<'s> for Section<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
impl<'s> Source<'s> for Heading<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
#[tracing::instrument(ret, level = "debug")]
#[allow(dead_code)]
pub fn document(input: &str) -> Res<&str, Document> {

View File

@ -32,6 +32,12 @@ impl<'s> Source<'s> for Element<'s> {
}
}
impl<'s> Source<'s> for Paragraph<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
#[tracing::instrument(ret, level = "debug")]
pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> {
let non_paragraph_matcher = parser_with_context!(non_paragraph_element)(context);

View File

@ -15,4 +15,11 @@ mod plain_text;
mod source;
mod util;
pub use document::document;
pub use document::Document;
pub use document::DocumentElement;
pub use document::Heading;
pub use document::Section;
pub use element::Element;
pub use lesser_element::Paragraph;
pub use source::Source;
type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>;

View File

@ -1,9 +1,8 @@
use std::rc::Rc;
use nom::combinator::eof;
use nom::IResult;
use crate::parser::util::whitespace_eof;
use super::error::CustomError;
use super::error::MyError;
use super::error::Res;
@ -25,7 +24,7 @@ impl<'r, 's> ContextTree<'r, 's> {
pub fn branch_from(trunk: &Rc<Node<ContextElement<'r, 's>>>) -> Self {
ContextTree {
tree: List::branch_from(trunk)
tree: List::branch_from(trunk),
}
}
@ -67,7 +66,7 @@ impl<'r, 's> ContextTree<'r, 's> {
i: &'s str,
) -> IResult<&'s str, &'s str, CustomError<&'s str>> {
// Special check for EOF. We don't just make this a document-level exit matcher since the IgnoreParent ChainBehavior could cause early exit matchers to not run.
let at_end_of_file = whitespace_eof(i);
let at_end_of_file = eof(i);
if at_end_of_file.is_ok() {
return at_end_of_file;
}