Remove wasm_test's dependency on compare module.
All checks were successful
clippy Build clippy has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded
rust-build Build rust-build has succeeded
rust-test Build rust-test has succeeded

This commit is contained in:
Tom Alexander
2023-12-31 11:11:25 -05:00
parent f4e0dddd9d
commit 945121202d
12 changed files with 102 additions and 96 deletions

View File

@@ -1,8 +1,6 @@
use std::path::Path;
use crate::compare::diff::compare_document;
use crate::compare::sexp::sexp;
use crate::context::GlobalSettings;
use crate::context::LocalFileAccessInterface;
use crate::parser::parse_file_with_settings;
@@ -10,6 +8,7 @@ use crate::parser::parse_with_settings;
use crate::util::cli::emacs_parse_anonymous_org_document;
use crate::util::cli::emacs_parse_file_org_document;
use crate::util::cli::print_versions;
use crate::util::elisp::sexp;
use crate::util::terminal::foreground_color;
use crate::util::terminal::reset_color;

View File

@@ -9,8 +9,6 @@ use super::diff::artificial_owned_diff_scope;
use super::diff::compare_ast_node;
use super::diff::DiffEntry;
use super::diff::DiffStatus;
use super::sexp::unquote;
use super::sexp::Token;
use super::util::get_property;
use super::util::get_property_numeric;
use super::util::get_property_quoted_string;
@@ -20,6 +18,8 @@ use crate::types::CharOffsetInLine;
use crate::types::LineNumber;
use crate::types::RetainLabels;
use crate::types::SwitchNumberLines;
use crate::util::elisp::unquote;
use crate::util::elisp::Token;
#[derive(Debug)]
pub(crate) enum EmacsField<'s> {

View File

@@ -16,8 +16,6 @@ use super::compare_field::compare_property_retain_labels;
use super::compare_field::compare_property_set_of_quoted_string;
use super::compare_field::compare_property_single_ast_node;
use super::compare_field::compare_property_unquoted_atom;
use super::sexp::unquote;
use super::sexp::Token;
use super::util::affiliated_keywords_names;
use super::util::assert_no_children;
use super::util::compare_additional_properties;
@@ -107,6 +105,8 @@ use crate::types::Verbatim;
use crate::types::VerseBlock;
use crate::types::WarningDelayType;
use crate::types::Year;
use crate::util::elisp::unquote;
use crate::util::elisp::Token;
use crate::util::elisp_fact::ElispFact;
use crate::util::elisp_fact::GetElispFact;
use crate::util::terminal::foreground_color;

View File

@@ -3,7 +3,6 @@ mod compare;
mod compare_field;
mod diff;
mod macros;
mod sexp;
mod util;
pub use compare::run_anonymous_compare;
pub use compare::run_anonymous_compare_with_settings;
@@ -11,9 +10,3 @@ pub use compare::run_compare_on_file;
pub use compare::run_compare_on_file_with_settings;
pub use compare::silent_anonymous_compare;
pub use compare::silent_compare_on_file;
pub use sexp::sexp;
pub(crate) use sexp::unquote;
pub(crate) use sexp::TextWithProperties;
pub use sexp::Token;
pub(crate) use util::maybe_token_to_usize;
pub(crate) use util::EmacsStandardProperties;

View File

@@ -1,497 +0,0 @@
use std::borrow::Cow;
use std::collections::HashMap;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::take_till1;
use nom::character::complete::anychar;
use nom::character::complete::digit1;
use nom::character::complete::multispace0;
use nom::character::complete::multispace1;
use nom::character::complete::one_of;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::separated_list1;
use nom::sequence::delimited;
use nom::sequence::preceded;
use nom::sequence::tuple;
use crate::error::Res;
const MAX_OCTAL_LENGTH: usize = 3;
#[derive(Debug)]
pub enum Token<'s> {
Atom(&'s str),
List(Vec<Token<'s>>),
TextWithProperties(TextWithProperties<'s>),
Vector(Vec<Token<'s>>),
}
#[derive(Debug)]
pub struct TextWithProperties<'s> {
pub(crate) text: &'s str,
pub(crate) properties: Vec<Token<'s>>,
}
impl<'s> Token<'s> {
pub(crate) fn as_vector<'p>(
&'p self,
) -> Result<&'p Vec<Token<'s>>, Box<dyn std::error::Error>> {
Ok(match self {
Token::Vector(children) => Ok(children),
_ => Err(format!("wrong token type, expected vector: {:?}", self)),
}?)
}
pub(crate) fn as_list<'p>(&'p self) -> Result<&'p Vec<Token<'s>>, Box<dyn std::error::Error>> {
Ok(match self {
Token::List(children) => Ok(children),
_ => Err(format!("wrong token type, expected list: {:?}", self)),
}?)
}
pub(crate) fn as_atom<'p>(&'p self) -> Result<&'s str, Box<dyn std::error::Error>> {
Ok(match self {
Token::Atom(body) => Ok(*body),
_ => Err(format!("wrong token type, expected atom: {:?}", self)),
}?)
}
pub(crate) fn as_text<'p>(
&'p self,
) -> Result<&'p TextWithProperties<'s>, Box<dyn std::error::Error>> {
Ok(match self {
Token::TextWithProperties(body) => Ok(body),
_ => Err(format!("wrong token type, expected text: {:?}", self)),
}?)
}
pub(crate) fn as_map<'p>(
&'p self,
) -> Result<HashMap<&'s str, &'p Token<'s>>, Box<dyn std::error::Error>> {
let mut hashmap = HashMap::new();
let children = self.as_list()?;
if children.len() % 2 != 0 {
return Err("Expecting an even number of children".into());
}
let mut key: Option<&str> = None;
for child in children.iter() {
match key {
None => {
key = Some(child.as_atom()?);
}
Some(key_val) => {
key = None;
hashmap.insert(key_val, child);
}
};
}
Ok(hashmap)
}
}
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser.
fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
debug_assert!(is_slice_of(input, remaining));
let offset = remaining.as_ptr() as usize - input.as_ptr() as usize;
&input[..offset]
}
#[derive(Debug)]
enum UnquoteState {
Normal,
Escape,
HasEscape {
out: Vec<u8>,
},
HasEscapeEscape {
out: Vec<u8>,
},
Octal {
octal_begin_offset: usize,
octal: Vec<u8>,
},
HasEscapeOctal {
out: Vec<u8>,
octal: Vec<u8>,
},
}
pub(crate) fn unquote(text: &str) -> Result<Cow<'_, str>, Box<dyn std::error::Error>> {
if !text.starts_with('"') {
return Err("Quoted text does not start with quote.".into());
}
if !text.ends_with('"') {
return Err("Quoted text does not end with quote.".into());
}
let interior_text = &text[1..(text.len() - 1)];
let mut state = UnquoteState::Normal;
for (offset, current_char) in interior_text.bytes().enumerate() {
// Check to see if octal finished
state = match (state, current_char) {
(
UnquoteState::Octal {
octal_begin_offset,
octal,
},
b'0'..=b'7',
) if octal.len() < MAX_OCTAL_LENGTH => UnquoteState::Octal {
octal_begin_offset,
octal,
},
(
UnquoteState::Octal {
octal_begin_offset,
octal,
},
_,
) => {
let octal_number_string = String::from_utf8(octal)?;
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
out.extend_from_slice(&interior_text.as_bytes()[..octal_begin_offset]);
out.push(decoded_byte);
UnquoteState::HasEscape { out }
}
(UnquoteState::HasEscapeOctal { out, octal }, b'0'..=b'7')
if octal.len() < MAX_OCTAL_LENGTH =>
{
UnquoteState::HasEscapeOctal { out, octal }
}
(UnquoteState::HasEscapeOctal { mut out, octal }, _) => {
let octal_number_string = String::from_utf8(octal)?;
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
out.push(decoded_byte);
UnquoteState::HasEscape { out }
}
(state, _) => state,
};
state = match (state, current_char) {
(UnquoteState::Normal, b'\\') => UnquoteState::Escape,
(UnquoteState::Normal, _) => UnquoteState::Normal,
(UnquoteState::HasEscape { out }, b'\\') => UnquoteState::HasEscapeEscape { out },
(UnquoteState::HasEscape { mut out }, _) => {
out.push(current_char);
UnquoteState::HasEscape { out }
}
(UnquoteState::Escape, b'n') => {
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
// Subtract 1 from offset to account for backslash.
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
out.push(b'\n');
UnquoteState::HasEscape { out }
}
(UnquoteState::HasEscapeEscape { mut out }, b'n') => {
out.push(b'\n');
UnquoteState::HasEscape { out }
}
(UnquoteState::Escape, b'\\') => {
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
// Subtract 1 from offset to account for backslash.
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
out.push(b'\\');
UnquoteState::HasEscape { out }
}
(UnquoteState::HasEscapeEscape { mut out }, b'\\') => {
out.push(b'\\');
UnquoteState::HasEscape { out }
}
(UnquoteState::Escape, b'"') => {
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
// Subtract 1 from offset to account for backslash.
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
out.push(b'"');
UnquoteState::HasEscape { out }
}
(UnquoteState::HasEscapeEscape { mut out }, b'"') => {
out.push(b'"');
UnquoteState::HasEscape { out }
}
(UnquoteState::Escape, b'0'..=b'7') => {
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
octal.push(current_char);
// Substract 1 from offset to account for backslash
UnquoteState::Octal {
octal_begin_offset: offset - 1,
octal,
}
}
(UnquoteState::HasEscapeEscape { out }, b'0'..=b'7') => {
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
octal.push(current_char);
// Substract 1 from offset to account for backslash
UnquoteState::HasEscapeOctal { out, octal }
}
(
UnquoteState::Octal {
octal_begin_offset,
mut octal,
},
b'0'..=b'7',
) => {
octal.push(current_char);
UnquoteState::Octal {
octal_begin_offset,
octal,
}
}
(UnquoteState::HasEscapeOctal { out, mut octal }, b'0'..=b'7') => {
octal.push(current_char);
UnquoteState::HasEscapeOctal { out, octal }
}
(state, _) => panic!(
"Invalid state unquoting string: {:?} | {} | {:?}",
state, offset, interior_text
),
};
}
match state {
UnquoteState::Normal | UnquoteState::Escape | UnquoteState::Octal { .. } => {
Ok(Cow::Borrowed(interior_text))
}
UnquoteState::HasEscape { out } => Ok(Cow::Owned(String::from_utf8(out)?)),
UnquoteState::HasEscapeEscape { mut out } => {
out.push(b'\\');
Ok(Cow::Owned(String::from_utf8(out)?))
}
UnquoteState::HasEscapeOctal { mut out, octal } => {
out.push(b'\\');
out.extend(octal);
Ok(Cow::Owned(String::from_utf8(out)?))
}
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn sexp<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = multispace0(input)?;
let (remaining, tkn) = token(remaining).map(|(rem, out)| (Into::<&str>::into(rem), out))?;
let (remaining, _) = multispace0(remaining)?;
Ok((remaining, tkn))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
alt((list, vector, atom))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag("(")(input)?;
let (remaining, children) = delimited(
multispace0,
separated_list1(multispace1, token),
multispace0,
)(remaining)?;
let (remaining, _) = tag(")")(remaining)?;
Ok((remaining, Token::List(children)))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn vector<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag("[")(input)?;
let (remaining, children) = delimited(
multispace0,
separated_list1(multispace1, token),
multispace0,
)(remaining)?;
let (remaining, _) = tag("]")(remaining)?;
Ok((remaining, Token::Vector(children)))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
not(peek(one_of(")]")))(input)?;
alt((
text_with_properties,
hash_notation,
quoted_atom,
unquoted_atom,
))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, body) =
take_till1(|c| matches!(c, ' ' | '\t' | '\r' | '\n' | ')' | ']'))(input)?;
Ok((remaining, Token::Atom(body)))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (mut remaining, _) = tag(r#"""#)(input)?;
let mut in_escape = false;
loop {
if in_escape {
let (remain, _) = alt((recognize(one_of(r#""n\\"#)), digit1))(remaining)?;
remaining = remain;
in_escape = false;
} else {
let end_quote = tag::<_, _, nom::error::Error<_>>(r#"""#)(remaining);
if end_quote.is_ok() {
break;
}
let escape_backslash = tag::<_, _, nom::error::Error<_>>("\\")(remaining);
if let Ok((remain, _)) = escape_backslash {
remaining = remain;
in_escape = true;
continue;
}
let (remain, _) = anychar(remaining)?;
remaining = remain;
}
}
let (remaining, _) = tag(r#"""#)(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Token::Atom(source)))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn hash_notation<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag("#<")(input)?;
let (remaining, _body) = take_till1(|c| matches!(c, '>'))(remaining)?;
let (remaining, _) = tag(">")(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Token::Atom(source)))
}
fn text_with_properties(input: &str) -> Res<&str, Token<'_>> {
let (remaining, _) = tag("#(")(input)?;
let (remaining, (text, props)) = delimited(
multispace0,
tuple((
map(quoted_atom, |atom| match atom {
Token::Atom(body) => body,
_ => unreachable!(),
}),
preceded(multispace1, opt(separated_list1(multispace1, token))),
)),
multispace0,
)(remaining)?;
let (remaining, _) = tag(")")(remaining)?;
Ok((
remaining,
Token::TextWithProperties(TextWithProperties {
text,
properties: props.unwrap_or(Vec::new()),
}),
))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple() {
let input = " (foo bar baz ) ";
let (remaining, parsed) = sexp(input).expect("Parse the input");
assert_eq!(remaining, "");
assert!(match parsed {
Token::Atom(_) => false,
Token::List(_) => true,
Token::TextWithProperties(_) => false,
Token::Vector(_) => false,
});
}
#[test]
fn quoted() {
let input = r#" ("foo" bar baz ) "#;
let (remaining, parsed) = sexp(input).expect("Parse the input");
assert_eq!(remaining, "");
assert!(match parsed {
Token::Atom(_) => false,
Token::List(_) => true,
Token::TextWithProperties(_) => false,
Token::Vector(_) => false,
});
let children = match parsed {
Token::List(children) => children,
_ => panic!("Should be a list."),
};
assert_eq!(
match children.first() {
Some(Token::Atom(body)) => *body,
_ => panic!("First child should be an atom."),
},
r#""foo""#
)
}
#[test]
fn quoted_containing_paren() {
let input = r#" (foo "b(a)r" baz ) "#;
let (remaining, parsed) = sexp(input).expect("Parse the input");
assert_eq!(remaining, "");
assert!(matches!(parsed, Token::List(_)));
let children = match parsed {
Token::List(children) => children,
_ => panic!("Should be a list."),
};
assert_eq!(
match children.first() {
Some(Token::Atom(body)) => *body,
_ => panic!("First child should be an atom."),
},
r#"foo"#
);
assert_eq!(
match children.get(1) {
Some(Token::Atom(body)) => *body,
_ => panic!("Second child should be an atom."),
},
r#""b(a)r""#
);
assert_eq!(
match children.get(2) {
Some(Token::Atom(body)) => *body,
_ => panic!("Third child should be an atom."),
},
r#"baz"#
);
}
#[test]
fn string_containing_escaped_characters() {
let input = r#" (foo "\\( x=2 \\)" bar) "#;
let (remaining, parsed) = sexp(input).expect("Parse the input");
assert_eq!(remaining, "");
assert!(match parsed {
Token::Atom(_) => false,
Token::List(_) => true,
Token::TextWithProperties(_) => false,
Token::Vector(_) => false,
});
let children = match parsed {
Token::List(children) => children,
_ => panic!("Should be a list."),
};
assert_eq!(
match children.get(1) {
Some(Token::Atom(body)) => *body,
_ => panic!("First child should be an atom."),
},
r#""\\( x=2 \\)""#
)
}
}

View File

@@ -8,13 +8,14 @@ use super::compare_field::compare_property_quoted_string;
use super::compare_field::ComparePropertiesResult;
use super::diff::DiffEntry;
use super::diff::DiffStatus;
use super::sexp::Token;
use crate::compare::diff::compare_ast_node;
use crate::compare::sexp::unquote;
use crate::types::AffiliatedKeywordValue;
use crate::types::AstNode;
use crate::types::GetAffiliatedKeywords;
use crate::types::StandardProperties;
use crate::util::elisp::get_emacs_standard_properties;
use crate::util::elisp::unquote;
use crate::util::elisp::Token;
use crate::util::elisp_fact::GetElispFact;
/// Check if the child string slice is a slice of the parent string slice.
@@ -145,80 +146,6 @@ fn assert_post_blank<'b, 's, S: StandardProperties<'s> + ?Sized>(
Ok(())
}
pub(crate) struct EmacsStandardProperties {
pub(crate) begin: Option<usize>,
#[allow(dead_code)]
pub(crate) post_affiliated: Option<usize>,
#[allow(dead_code)]
pub(crate) contents_begin: Option<usize>,
#[allow(dead_code)]
pub(crate) contents_end: Option<usize>,
pub(crate) end: Option<usize>,
#[allow(dead_code)]
pub(crate) post_blank: Option<usize>,
}
pub(crate) fn get_emacs_standard_properties(
emacs: &Token<'_>,
) -> Result<EmacsStandardProperties, Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let attributes_child = children.get(1).ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let standard_properties = attributes_map.get(":standard-properties");
Ok(if standard_properties.is_some() {
let mut std_props = standard_properties
.expect("if statement proves its Some")
.as_vector()?
.iter();
let begin = maybe_token_to_usize(std_props.next())?;
let post_affiliated = maybe_token_to_usize(std_props.next())?;
let contents_begin = maybe_token_to_usize(std_props.next())?;
let contents_end = maybe_token_to_usize(std_props.next())?;
let end = maybe_token_to_usize(std_props.next())?;
let post_blank = maybe_token_to_usize(std_props.next())?;
EmacsStandardProperties {
begin,
post_affiliated,
contents_begin,
contents_end,
end,
post_blank,
}
} else {
let begin = maybe_token_to_usize(attributes_map.get(":begin").copied())?;
let end = maybe_token_to_usize(attributes_map.get(":end").copied())?;
let contents_begin = maybe_token_to_usize(attributes_map.get(":contents-begin").copied())?;
let contents_end = maybe_token_to_usize(attributes_map.get(":contents-end").copied())?;
let post_blank = maybe_token_to_usize(attributes_map.get(":post-blank").copied())?;
let post_affiliated =
maybe_token_to_usize(attributes_map.get(":post-affiliated").copied())?;
EmacsStandardProperties {
begin,
post_affiliated,
contents_begin,
contents_end,
end,
post_blank,
}
})
}
pub(crate) fn maybe_token_to_usize(
token: Option<&Token<'_>>,
) -> Result<Option<usize>, Box<dyn std::error::Error>> {
Ok(token
.map(|token| token.as_atom())
.map_or(Ok(None), |r| r.map(Some))?
.and_then(|val| {
if val == "nil" {
None
} else {
Some(val.parse::<usize>())
}
})
.map_or(Ok(None), |r| r.map(Some))?)
}
/// Get a named property from the emacs token.
///
/// Returns Ok(None) if value is nil or absent.