Beginning of handling quoted strings.

This commit is contained in:
Tom Alexander 2023-04-11 15:23:16 -04:00
parent 5d7ca1b966
commit 3bdb1e3841
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
2 changed files with 55 additions and 3 deletions

View File

@ -1,8 +1,10 @@
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::escaped;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::bytes::complete::take_till1; use nom::bytes::complete::take_till1;
use nom::character::complete::multispace0; use nom::character::complete::multispace0;
use nom::character::complete::multispace1; use nom::character::complete::multispace1;
use nom::character::complete::one_of;
use nom::combinator::not; use nom::combinator::not;
use nom::combinator::peek; use nom::combinator::peek;
use nom::multi::separated_list1; use nom::multi::separated_list1;
@ -32,7 +34,11 @@ fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
#[tracing::instrument(ret, level = "debug")] #[tracing::instrument(ret, level = "debug")]
fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, opening_paren) = tag("(")(input)?; let (remaining, opening_paren) = tag("(")(input)?;
let (remaining, children) = delimited(multispace0, separated_list1(multispace1, token), multispace0)(remaining)?; let (remaining, children) = delimited(
multispace0,
separated_list1(multispace1, token),
multispace0,
)(remaining)?;
let (remaining, closing_paren) = tag(")")(remaining)?; let (remaining, closing_paren) = tag(")")(remaining)?;
Ok((remaining, Token::List(children))) Ok((remaining, Token::List(children)))
} }
@ -40,7 +46,7 @@ fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
#[tracing::instrument(ret, level = "debug")] #[tracing::instrument(ret, level = "debug")]
fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
not(peek(tag(")")))(input)?; not(peek(tag(")")))(input)?;
unquoted_atom(input) alt((quoted_atom, unquoted_atom))(input)
} }
#[tracing::instrument(ret, level = "debug")] #[tracing::instrument(ret, level = "debug")]
@ -52,6 +58,41 @@ fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
Ok((remaining, Token::Atom(body))) Ok((remaining, Token::Atom(body)))
} }
#[tracing::instrument(ret, level = "debug")]
fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag(r#"""#)(input)?;
let (remaining, _) = escaped(
take_till1(|c| match c {
'\\' | '"' => true,
_ => false,
}),
'\\',
one_of(r#"""#),
)(remaining)?;
let (remaining, _) = tag(r#"""#)(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Token::Atom(source)))
}
/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser.
fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
assert!(is_slice_of(input, remaining));
let source = {
let offset = remaining.as_ptr() as usize - input.as_ptr() as usize;
&input[..offset]
};
source
}
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -66,4 +107,15 @@ mod tests {
Token::List(_) => true, Token::List(_) => true,
}); });
} }
#[test]
fn quoted() {
let input = r#" ("foo" bar baz ) "#;
let (remaining, parsed) = sexp(input).expect("Parse the input");
assert_eq!(remaining, "");
assert!(match parsed {
Token::Atom(_) => false,
Token::List(_) => true,
});
}
} }

View File

@ -10,7 +10,7 @@ mod init_tracing;
fn main() -> Result<(), Box<dyn std::error::Error>> { fn main() -> Result<(), Box<dyn std::error::Error>> {
init_telemetry()?; init_telemetry()?;
// emacs_parse_org_document("./org_mode_samples/footnote_definition/simple.org")?; // emacs_parse_org_document("./org_mode_samples/footnote_definition/simple.org")?;
sexp(" (foo bar baz ) ")?; sexp(r#" ("foo" bar baz ) "#)?;
shutdown_telemetry()?; shutdown_telemetry()?;
Ok(()) Ok(())
} }