diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index 136c1e7b..2adf5454 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -1,8 +1,10 @@ use nom::branch::alt; +use nom::bytes::complete::escaped; use nom::bytes::complete::tag; use nom::bytes::complete::take_till1; use nom::character::complete::multispace0; use nom::character::complete::multispace1; +use nom::character::complete::one_of; use nom::combinator::not; use nom::combinator::peek; use nom::multi::separated_list1; @@ -32,7 +34,11 @@ fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { #[tracing::instrument(ret, level = "debug")] fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, opening_paren) = tag("(")(input)?; - let (remaining, children) = delimited(multispace0, separated_list1(multispace1, token), multispace0)(remaining)?; + let (remaining, children) = delimited( + multispace0, + separated_list1(multispace1, token), + multispace0, + )(remaining)?; let (remaining, closing_paren) = tag(")")(remaining)?; Ok((remaining, Token::List(children))) } @@ -40,7 +46,7 @@ fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { #[tracing::instrument(ret, level = "debug")] fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { not(peek(tag(")")))(input)?; - unquoted_atom(input) + alt((quoted_atom, unquoted_atom))(input) } #[tracing::instrument(ret, level = "debug")] @@ -52,6 +58,41 @@ fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { Ok((remaining, Token::Atom(body))) } +#[tracing::instrument(ret, level = "debug")] +fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { + let (remaining, _) = tag(r#"""#)(input)?; + let (remaining, _) = escaped( + take_till1(|c| match c { + '\\' | '"' => true, + _ => false, + }), + '\\', + one_of(r#"""#), + )(remaining)?; + let (remaining, _) = tag(r#"""#)(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Token::Atom(source))) +} + +/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. +fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { + assert!(is_slice_of(input, remaining)); + let source = { + let offset = remaining.as_ptr() as usize - input.as_ptr() as usize; + &input[..offset] + }; + source +} + +/// Check if the child string slice is a slice of the parent string slice. +fn is_slice_of(parent: &str, child: &str) -> bool { + let parent_start = parent.as_ptr() as usize; + let parent_end = parent_start + parent.len(); + let child_start = child.as_ptr() as usize; + let child_end = child_start + child.len(); + child_start >= parent_start && child_end <= parent_end +} + #[cfg(test)] mod tests { use super::*; @@ -66,4 +107,15 @@ mod tests { Token::List(_) => true, }); } + + #[test] + fn quoted() { + let input = r#" ("foo" bar baz ) "#; + let (remaining, parsed) = sexp(input).expect("Parse the input"); + assert_eq!(remaining, ""); + assert!(match parsed { + Token::Atom(_) => false, + Token::List(_) => true, + }); + } } diff --git a/src/org_compare.rs b/src/org_compare.rs index 76ce3b91..6a06a0f8 100644 --- a/src/org_compare.rs +++ b/src/org_compare.rs @@ -10,7 +10,7 @@ mod init_tracing; fn main() -> Result<(), Box> { init_telemetry()?; // emacs_parse_org_document("./org_mode_samples/footnote_definition/simple.org")?; - sexp(" (foo bar baz ) ")?; + sexp(r#" ("foo" bar baz ) "#)?; shutdown_telemetry()?; Ok(()) }