use std::collections::HashMap; use nom::branch::alt; use nom::bytes::complete::escaped; use nom::bytes::complete::tag; use nom::bytes::complete::take_till1; use nom::character::complete::multispace0; use nom::character::complete::multispace1; use nom::character::complete::one_of; use nom::combinator::map; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::verify; use nom::multi::separated_list1; use nom::sequence::delimited; use nom::sequence::preceded; use nom::sequence::tuple; use super::error::Res; #[derive(Debug)] pub enum Token<'s> { Atom(&'s str), List(Vec>), TextWithProperties(TextWithProperties<'s>), } #[derive(Debug)] pub struct TextWithProperties<'s> { text: &'s str, properties: Vec>, } impl<'s> Token<'s> { pub fn as_list<'p>(&'p self) -> Result<&'p Vec>, Box> { Ok(match self { Token::List(children) => Ok(children), _ => Err("wrong token type"), }?) } pub fn as_atom<'p>(&'p self) -> Result<&'s str, Box> { Ok(match self { Token::Atom(body) => Ok(*body), _ => Err("wrong token type"), }?) } pub fn as_map<'p>( &'p self, ) -> Result>, Box> { let mut hashmap = HashMap::new(); let children = self.as_list()?; if children.len() % 2 != 0 { return Err("Expecting an even number of children".into()); } let mut key: Option<&str> = None; for child in children.iter() { match key { None => { key = Some(child.as_atom()?); } Some(key_val) => { key = None; hashmap.insert(key_val, child); } }; } Ok(hashmap) } } #[tracing::instrument(ret, level = "debug")] pub fn sexp<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, _) = multispace0(input)?; let (remaining, tkn) = token(remaining)?; let (remaining, _) = multispace0(remaining)?; Ok((remaining, tkn)) } #[tracing::instrument(ret, level = "debug")] fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { alt((list, atom))(input) } #[tracing::instrument(ret, level = "debug")] fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, _) = tag("(")(input)?; let (remaining, children) = delimited( multispace0, separated_list1(multispace1, token), multispace0, )(remaining)?; let (remaining, _) = tag(")")(remaining)?; Ok((remaining, Token::List(children))) } #[tracing::instrument(ret, level = "debug")] fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { not(peek(tag(")")))(input)?; alt((text_with_properties, quoted_atom, unquoted_atom))(input) } #[tracing::instrument(ret, level = "debug")] fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, body) = take_till1(|c| match c { ' ' | '\t' | '\r' | '\n' | ')' => true, _ => false, })(input)?; Ok((remaining, Token::Atom(body))) } #[tracing::instrument(ret, level = "debug")] fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, _) = tag(r#"""#)(input)?; let (remaining, _) = escaped( take_till1(|c| match c { '\\' | '"' | ')' => true, _ => false, }), '\\', one_of(r#""n"#), )(remaining)?; let (remaining, _) = tag(r#"""#)(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, Token::Atom(source))) } fn text_with_properties<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { let (remaining, _) = tag("#(")(input)?; let (remaining, (text, props)) = delimited( multispace0, tuple(( map(quoted_atom, |atom| match atom { Token::Atom(body) => body, _ => unreachable!(), }), preceded(multispace1, opt(separated_list1(multispace1, token))), )), multispace0, )(remaining)?; let (remaining, _) = tag(")")(remaining)?; Ok(( remaining, Token::TextWithProperties(TextWithProperties { text, properties: props.unwrap_or(Vec::new()), }), )) } /// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { assert!(is_slice_of(input, remaining)); let source = { let offset = remaining.as_ptr() as usize - input.as_ptr() as usize; &input[..offset] }; source } /// Check if the child string slice is a slice of the parent string slice. fn is_slice_of(parent: &str, child: &str) -> bool { let parent_start = parent.as_ptr() as usize; let parent_end = parent_start + parent.len(); let child_start = child.as_ptr() as usize; let child_end = child_start + child.len(); child_start >= parent_start && child_end <= parent_end } #[cfg(test)] mod tests { use super::*; #[test] fn simple() { let input = " (foo bar baz ) "; let (remaining, parsed) = sexp(input).expect("Parse the input"); assert_eq!(remaining, ""); assert!(match parsed { Token::Atom(_) => false, Token::List(_) => true, Token::TextWithProperties(_) => false, }); } #[test] fn quoted() { let input = r#" ("foo" bar baz ) "#; let (remaining, parsed) = sexp(input).expect("Parse the input"); assert_eq!(remaining, ""); assert!(match parsed { Token::Atom(_) => false, Token::List(_) => true, Token::TextWithProperties(_) => false, }); let children = match parsed { Token::List(children) => children, _ => panic!("Should be a list."), }; assert_eq!( match children.first() { Some(Token::Atom(body)) => *body, _ => panic!("First child should be an atom."), }, r#""foo""# ) } }