Update the diff program to use exit code 1 for non-matching diff.

This commit is contained in:
Tom Alexander 2023-04-12 14:50:56 -04:00
parent 05c9ec86b8
commit 1a38ca43d6
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
10 changed files with 24 additions and 756 deletions

View File

@ -54,6 +54,13 @@ impl DiffResult {
.iter()
.any(|child| child.status == DiffStatus::Bad || child.has_bad_children())
}
pub fn is_bad(&self) -> bool {
match self.status {
DiffStatus::Good => !self.has_bad_children(),
DiffStatus::Bad => true,
}
}
}
pub fn compare_document<'s>(

View File

@ -1,5 +1,7 @@
#![feature(round_char_boundary)]
#![feature(exit_status_error)]
use std::process::ExitCode;
use crate::compare::compare_document;
use crate::init_tracing::init_telemetry;
use crate::init_tracing::shutdown_telemetry;
@ -10,7 +12,13 @@ mod compare;
mod init_tracing;
mod parser;
fn main() -> Result<(), Box<dyn std::error::Error>> {
fn main() -> ExitCode {
let result = _main().expect("Ran into an error comparing the parses.");
ExitCode::from(result)
}
fn _main() -> Result<u8, Box<dyn std::error::Error>> {
let mut ran_into_problems: bool = false;
init_telemetry()?;
for org_path in std::env::args().skip(1) {
let org_contents = std::fs::read_to_string(&org_path)?;
@ -21,8 +29,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("{:#?}", rust_parsed);
let diff_result = compare_document(&parsed_sexp, &rust_parsed)?;
diff_result.print()?;
if diff_result.is_bad() {
ran_into_problems = true;
}
}
println!("Done.");
shutdown_telemetry()?;
Ok(())
if ran_into_problems {
Ok(1)
} else {
Ok(0)
}
}

View File

@ -1,108 +0,0 @@
use super::combinator::context_many_till;
use super::error::CustomError;
use super::error::MyError;
use super::error::Res;
use super::parser_context::ChainBehavior;
use super::parser_context::ContextElement;
use super::parser_context::ExitMatcherNode;
use super::text::symbol;
use super::text::text_element;
use super::token::Bold;
use super::token::TextElement;
use super::token::Token;
use super::util::in_section;
use super::Context;
use crate::parser::parser_with_context::parser_with_context;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::combinator::map;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::sequence::tuple;
pub fn bold<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Bold<'s>> {
let bold_start = parser_with_context!(context_bold_start)(&context);
let parser_context = context
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&context_bold_end)),
}))
.with_additional_node(ContextElement::Context("bold"));
let (remaining, captured) = recognize(tuple((bold_start, |i| {
context_many_till(&parser_context, text_element, context_bold_end)(i)
})))(i)?;
let ret = Bold { source: captured };
Ok((remaining, ret))
}
fn can_start_bold<'r, 's>(context: Context<'r, 's>) -> bool {
_preceded_by_whitespace(context) && !in_section(context, "bold")
}
fn context_bold_start<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
if can_start_bold(context) {
recognize(bold_start)(input)
} else {
// TODO: Make this a specific error instead of just a generic MyError
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Cannot start bold",
))));
}
}
fn context_bold_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, actual_match) = recognize(bold_end)(input)?;
peek(alt((
// Must have whitespace after the end asterisk or it must be the end of that section (as checked by the exit matcher)
tag(" "),
tag("\t"),
tag("\n"),
|i| context.check_exit_matcher(i),
)))(remaining)?;
Ok((remaining, actual_match))
}
fn bold_start(input: &str) -> Res<&str, TextElement> {
map(symbol("*"), TextElement::Symbol)(input)
}
fn bold_end(input: &str) -> Res<&str, TextElement> {
map(symbol("*"), TextElement::Symbol)(input)
}
fn _preceded_by_whitespace<'r, 's>(context: Context<'r, 's>) -> bool {
let mut context_iterator = context.iter().enumerate();
loop {
if let Some((i, ctx)) = context_iterator.next() {
match ctx.get_data() {
ContextElement::ExitMatcherNode(_) => {}
ContextElement::PreviousElementNode(previous_element_node) => {
match &previous_element_node.element {
Token::TextElement(text_element) => {
match text_element {
TextElement::Span(_) => return false,
TextElement::Space(_) => return true,
TextElement::LineBreak(_) => return true,
TextElement::Symbol(_) => return false,
TextElement::Bold(_) => return false,
TextElement::Link(_) => return false,
};
}
Token::Paragraph(_) => unreachable!(),
};
}
ContextElement::StartOfParagraph => {
return true;
}
ContextElement::Context(_) => {}
ContextElement::ListItem(_) => {}
ContextElement::DocumentRoot(_) => {
return true;
}
}
} else {
break;
}
}
false
}

View File

@ -1,60 +0,0 @@
use crate::parser::parser_with_context::parser_with_context;
use super::combinator::context_many_till;
use super::error::CustomError;
use super::error::MyError;
use super::error::Res;
use super::parser_context::ChainBehavior;
use super::parser_context::ContextElement;
use super::parser_context::ExitMatcherNode;
use super::text::symbol;
use super::text::text_element;
use super::token::Link;
use super::token::TextElement;
use super::util::in_section;
use super::Context;
use nom::combinator::map;
use nom::combinator::recognize;
use nom::sequence::tuple;
pub fn link<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Link<'s>> {
let link_start = parser_with_context!(context_link_start)(&context);
let parser_context = context
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&context_link_end)),
}))
.with_additional_node(ContextElement::Context("link"));
let (remaining, captured) = recognize(tuple((link_start, |i| {
context_many_till(&parser_context, text_element, context_link_end)(i)
})))(i)?;
let ret = Link { source: captured };
Ok((remaining, ret))
}
fn can_start_link<'r, 's>(context: Context<'r, 's>) -> bool {
!in_section(context, "link")
}
fn context_link_start<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
if can_start_link(context) {
recognize(link_start)(input)
} else {
// TODO: Make this a specific error instead of just a generic MyError
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Cannot start link",
))));
}
}
fn context_link_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, actual_match) = recognize(link_end)(input)?;
Ok((remaining, actual_match))
}
fn link_start(input: &str) -> Res<&str, TextElement> {
map(symbol("["), TextElement::Symbol)(input)
}
fn link_end(input: &str) -> Res<&str, TextElement> {
map(symbol("]"), TextElement::Symbol)(input)
}

View File

@ -1,125 +0,0 @@
use super::parser_context::ContextElement;
use super::parser_context::PreviousElementNode;
use super::token::Token;
use super::Context;
use nom::error::ErrorKind;
use nom::error::ParseError;
use nom::IResult;
use nom::InputLength;
pub fn context_many1<'r, 's, I, O, E, M>(
context: Context<'r, 's>,
mut many_matcher: M,
) -> impl FnMut(I) -> IResult<I, Vec<Token<'s>>, E> + 'r
where
I: Clone + InputLength,
E: ParseError<I>,
M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
O: Into<Token<'s>>,
{
move |mut i: I| {
let mut err = None;
// TODO: Can I eliminate the clone? I think this is incrementing the reference count
let mut current_context = context.clone();
// Despite the clone, the Rc should still point to the same value.
assert!(current_context.ptr_eq(context));
loop {
match many_matcher(&current_context, i.clone()) {
Ok((remaining, many_elem)) => {
current_context = current_context.with_additional_node(
ContextElement::PreviousElementNode(PreviousElementNode {
element: many_elem.into(),
}),
);
i = remaining;
}
the_error @ Err(_) => {
err = Some(the_error);
break;
}
}
}
let mut elements: Vec<Token<'s>> = current_context
.into_iter_until(context)
.filter_map(|context_element| match context_element {
ContextElement::PreviousElementNode(elem) => Some(elem.element),
_ => None,
})
.collect();
if elements.is_empty() {
if let Some(err) = err {
err?;
}
}
elements.reverse();
Ok((i, elements))
}
}
pub fn context_many_till<'r, 's, I, O, E, F, M, T>(
context: Context<'r, 's>,
mut many_matcher: M,
mut till_matcher: T,
) -> impl FnMut(I) -> IResult<I, (Vec<Token<'s>>, F), E> + 'r
where
I: Clone + InputLength,
E: ParseError<I>,
M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
T: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, F, E> + 'r,
O: Into<Token<'s>>,
{
move |mut i: I| {
// TODO: Can I eliminate the clone? I think this is incrementing the reference count
let mut current_context = context.clone();
// Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop.
assert!(current_context.ptr_eq(context));
loop {
let len = i.input_len();
match till_matcher(&current_context, i.clone()) {
Ok((remaining, finish)) => {
let mut ret = Vec::new();
while !current_context.ptr_eq(context) {
let (context_element, next_context) = current_context.pop_front();
let context_element = context_element.expect("We only pop off context elements created in this function, so they are all Some()");
current_context = next_context;
match context_element {
ContextElement::PreviousElementNode(PreviousElementNode {
element: token,
}) => {
ret.push(token);
}
_ => {}
};
}
ret.reverse();
return Ok((remaining, (ret, finish)));
}
Err(nom::Err::Error(_)) => {
match many_matcher(&current_context, i.clone()) {
Err(nom::Err::Error(err)) => {
return Err(nom::Err::Error(E::append(i, ErrorKind::ManyTill, err)))
}
Err(e) => return Err(e),
Ok((remaining, many_elem)) => {
// infinite loop check: the parser must always consume
if remaining.input_len() == len {
return Err(nom::Err::Error(E::from_error_kind(
remaining,
ErrorKind::ManyTill,
)));
}
current_context = current_context.with_additional_node(
ContextElement::PreviousElementNode(PreviousElementNode {
element: many_elem.into(),
}),
);
i = remaining;
}
}
}
Err(e) => return Err(e),
};
}
}
}

View File

@ -1,29 +0,0 @@
//! A single element of text.
use super::combinator::context_many1;
use super::error::Res;
use super::paragraph::paragraph;
use super::parser_context::ContextElement;
use super::parser_context::ContextTree;
use super::token::Paragraph;
use super::token::Token;
use super::Context;
use nom::IResult;
type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;
// TODO: Implement FromStr for Document
pub fn document(input: &str) -> Res<&str, Vec<Paragraph>> {
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let (remaining, tokens) = context_many1(&document_context, paragraph)(input)?;
let paragraphs = tokens
.into_iter()
.map(|token| match token {
Token::TextElement(_) => unreachable!(),
Token::Paragraph(paragraph) => paragraph,
})
.collect();
Ok((remaining, paragraphs))
}

View File

@ -1,67 +0,0 @@
use super::combinator::context_many_till;
use super::error::Res;
use super::parser_context::ChainBehavior;
use super::parser_context::ContextElement;
use super::parser_context::ExitMatcherNode;
use super::text::blank_line;
use super::text::line_break;
use super::text::text_element;
use super::token::Paragraph;
use super::token::TextElement;
use super::token::Token;
use super::Context;
use nom::branch::alt;
use nom::combinator::consumed;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::recognize;
use nom::multi::many1;
use nom::sequence::tuple;
pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Paragraph<'s>> {
// Add a not(eof) check because many_till cannot match a zero-length string
not(eof)(i)?;
let paragraph_context = context
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)),
}))
.with_additional_node(ContextElement::StartOfParagraph);
let (remaining, (source, (many, till))) = consumed(context_many_till(
&paragraph_context,
text_element,
context_paragraph_end,
))(i)?;
let many = many
.into_iter()
.filter_map(|token| match token {
Token::TextElement(text_element) => Some(text_element),
Token::Paragraph(_) => panic!("There should only be text elements in paragraphs."),
})
.collect();
Ok((
remaining,
Paragraph {
contents: many,
paragraph_end: till,
source,
},
))
}
fn context_paragraph_end<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
paragraph_end(input)
}
pub fn paragraph_end(input: &str) -> Res<&str, &str> {
alt((
recognize(tuple((
map(line_break, TextElement::LineBreak),
many1(blank_line),
))),
eof,
))(input)
}

View File

@ -1,161 +0,0 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
use nom::character::complete::digit1;
use nom::character::complete::line_ending;
use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::sequence::tuple;
use super::combinator::context_many_till;
use super::error::CustomError;
use super::error::MyError;
use super::error::Res;
use super::parser_context::ContextElement;
use super::parser_with_context::parser_with_context;
use super::text::blank_line;
use super::text::line_break;
use super::text::space;
use super::text::text_element;
use super::token::ListItem;
use super::token::PlainList;
use super::token::TextElement;
use super::token::Token;
use super::Context;
#[allow(dead_code)]
pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, PlainList<'s>> {
// todo
todo!()
}
#[allow(dead_code)]
pub fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> {
let (remaining, leading_whitespace) = space0(i)?;
let indent_level = leading_whitespace.len();
let list_item_context = context.with_additional_node(ContextElement::ListItem(indent_level));
let (remaining, (bul, countset, check, tg, sp, (contents, end))) = tuple((
bullet,
opt(tuple((space, counter_set))),
opt(tuple((space, check_box))),
opt(tuple((space, item_tag))),
space,
context_many_till(&list_item_context, text_element, item_end),
))(remaining)?;
let elements = contents
.into_iter()
.filter_map(|token| match token {
Token::TextElement(text_element) => Some(text_element),
Token::Paragraph(_) => panic!("There should only be text elements in items."),
})
.collect();
let source = {
let offset = remaining.as_ptr() as usize - i.as_ptr() as usize;
&i[..offset]
};
let ret = ListItem {
source,
leading_whitespace,
bullet: bul,
counter_set: countset.map(|(_spc, count)| count),
check_box: check.map(|(_spc, check)| check),
item_tag: tg.map(|(_spc, tg)| tg),
contents: elements,
};
Ok((remaining, ret))
}
fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> {
alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
}
fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> {
alt((
tag("*"),
tag("-"),
tag("+"),
recognize(tuple((counter, alt((tag("."), tag(")")))))),
))(i)
}
fn counter_set<'s>(i: &'s str) -> Res<&'s str, &'s str> {
recognize(tuple((tag("[@"), counter, tag("]"))))(i)
}
fn check_box<'s>(i: &'s str) -> Res<&'s str, &'s str> {
recognize(alt((tag("[ ]"), tag("[X]"), tag("[-]"))))(i)
}
fn item_tag<'s>(i: &'s str) -> Res<&'s str, &'s str> {
recognize(tuple((tag_text, tag_separator)))(i)
}
fn tag_text<'s>(i: &'s str) -> Res<&'s str, &'s str> {
recognize(many1(tag_text_character))(i)
}
fn tag_text_character<'s>(i: &'s str) -> Res<&'s str, &'s str> {
not(alt((tag_separator, line_ending)))(i)?;
recognize(anychar)(i)
}
fn tag_separator<'s>(i: &'s str) -> Res<&'s str, &'s str> {
tag(" :: ")(i)
}
pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> {
let item_matcher = parser_with_context!(item)(&context);
let line_indented_matcher = parser_with_context!(line_indented_lte)(&context);
alt((
// TODO: This should ends the highest plain list
plain_list_end,
recognize(tuple((line_ending, peek(line_indented_matcher)))),
// TODO: Do we still need the item_matcher entry here? If we remove it, then child items should become part of the body of the parent item which would match the description on https://orgmode.org/worg/org-syntax.html
recognize(tuple((line_ending, peek(item_matcher)))),
))(i)
}
fn line_indented_lte<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> {
let current_item_indent_level: &usize = get_context_item_indent(context).ok_or(
nom::Err::Error(CustomError::MyError(MyError("NotInPlainListItem"))),
)?;
let matched = recognize(verify(
tuple((space0::<&str, _>, anychar)),
|(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
))(i)?;
Ok(matched)
}
fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
for thing in context.iter() {
match thing.get_data() {
ContextElement::ListItem(depth) => return Some(depth),
_ => {}
};
}
None
}
pub fn plain_list_end(input: &str) -> Res<&str, &str> {
alt((
recognize(tuple((
map(line_break, TextElement::LineBreak),
blank_line,
many1(blank_line),
))),
eof,
))(input)
}

View File

@ -1,68 +0,0 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::alphanumeric1;
use nom::character::complete::line_ending;
use nom::character::complete::space1;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::recognize;
use nom::multi::many_till;
use super::bold::bold;
use super::error::Res;
use super::link::link;
use super::parser_with_context::parser_with_context;
use super::token::BlankLine;
use super::token::LineBreak;
use super::token::Space;
use super::token::Span;
use super::token::Symbol;
use super::token::TextElement;
use super::Context;
pub fn line_break(input: &str) -> Res<&str, LineBreak> {
map(line_ending, |s: &str| LineBreak { source: s })(input)
}
pub fn space(input: &str) -> Res<&str, Space> {
map(space1, |s: &str| Space { source: s })(input)
}
fn span(input: &str) -> Res<&str, Span> {
map(alphanumeric1, |s: &str| Span { source: s })(input)
}
pub fn symbol(symbol_tag: &'static str) -> impl for<'a> Fn(&'a str) -> Res<&'a str, Symbol<'a>> {
move |i: &str| map(tag(symbol_tag), |s: &str| Symbol { source: s })(i)
}
/// A line containing only whitespace and then a line break
///
/// It is up to the caller to ensure this is called at the start of a line.
pub fn blank_line(input: &str) -> Res<&str, BlankLine> {
map(
recognize(many_till(
map(space, TextElement::Space),
map(line_break, TextElement::LineBreak),
)),
|contents| BlankLine { source: contents },
)(input)
}
pub fn text_element<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, TextElement<'s>> {
not(|i| context.check_exit_matcher(i))(i)?;
let bold_matcher = parser_with_context!(bold)(&context);
let link_matcher = parser_with_context!(link)(&context);
alt((
map(bold_matcher, TextElement::Bold),
map(link_matcher, TextElement::Link),
map(span, TextElement::Span),
map(symbol("*"), TextElement::Symbol),
map(symbol("["), TextElement::Symbol),
map(symbol("]"), TextElement::Symbol),
map(space, TextElement::Space),
map(line_break, TextElement::LineBreak),
))(i)
}

View File

@ -1,136 +0,0 @@
#[derive(Debug)]
pub enum Token<'a> {
TextElement(TextElement<'a>),
Paragraph(Paragraph<'a>),
}
impl<'a> Into<Token<'a>> for TextElement<'a> {
fn into(self) -> Token<'a> {
Token::TextElement(self)
}
}
impl<'a> Into<Token<'a>> for Paragraph<'a> {
fn into(self) -> Token<'a> {
Token::Paragraph(self)
}
}
#[derive(Debug)]
pub enum TextElement<'a> {
Span(Span<'a>),
Space(Space<'a>),
LineBreak(LineBreak<'a>),
Symbol(Symbol<'a>),
Bold(Bold<'a>),
Link(Link<'a>),
}
#[derive(Debug)]
pub struct Span<'a> {
pub source: &'a str,
}
#[derive(Debug)]
pub struct Space<'a> {
pub source: &'a str,
}
#[derive(Debug)]
pub struct LineBreak<'a> {
pub source: &'a str,
}
#[derive(Debug)]
pub struct Symbol<'a> {
pub source: &'a str,
}
#[derive(Debug)]
pub struct BlankLine<'a> {
pub source: &'a str,
}
#[derive(Debug)]
pub struct Sequence<'a> {
pub source: &'a str,
}
#[derive(Debug)]
pub struct Bold<'a> {
pub source: &'a str,
}
#[derive(Debug)]
pub struct Link<'a> {
pub source: &'a str,
}
#[derive(Debug)]
pub struct Paragraph<'a> {
pub source: &'a str,
pub contents: Vec<TextElement<'a>>,
pub paragraph_end: &'a str,
}
pub trait Source<'a> {
fn get_source(&'a self) -> &'a str;
}
impl<'a> Source<'a> for TextElement<'a> {
fn get_source(&'a self) -> &'a str {
match self {
TextElement::Span(elem) => elem.source,
TextElement::Space(elem) => elem.source,
TextElement::LineBreak(elem) => elem.source,
TextElement::Symbol(elem) => elem.source,
TextElement::Bold(elem) => elem.source,
TextElement::Link(elem) => elem.source,
}
}
}
impl<'a> Source<'a> for Paragraph<'a> {
fn get_source(&'a self) -> &'a str {
self.source
}
}
#[derive(Debug)]
pub struct PlainList<'a> {
pub source: &'a str,
}
impl<'a> Source<'a> for PlainList<'a> {
fn get_source(&'a self) -> &'a str {
self.source
}
}
#[derive(Debug)]
pub struct ListItem<'a> {
pub source: &'a str,
pub leading_whitespace: &'a str,
pub bullet: &'a str,
pub counter_set: Option<&'a str>,
pub check_box: Option<&'a str>,
pub item_tag: Option<&'a str>,
pub contents: Vec<TextElement<'a>>,
}
impl<'a> Source<'a> for ListItem<'a> {
fn get_source(&'a self) -> &'a str {
self.source
}
}
#[derive(Debug)]
pub struct ListCounter<'a> {
pub source: &'a str,
}
impl<'a> Source<'a> for ListCounter<'a> {
fn get_source(&'a self) -> &'a str {
self.source
}
}