From 91e9645c371288bac9aaeda4dab5afd79e3b8027 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 17 Aug 2023 05:18:51 -0400 Subject: [PATCH] Recursing through the ast to find the lists. --- src/owner_tree.rs | 195 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 145 insertions(+), 50 deletions(-) diff --git a/src/owner_tree.rs b/src/owner_tree.rs index d45fe90..f067782 100644 --- a/src/owner_tree.rs +++ b/src/owner_tree.rs @@ -7,30 +7,32 @@ pub fn build_owner_tree<'a>( ast_raw: &'a str, ) -> Result> { let (_remaining, parsed_sexp) = sexp_with_padding(ast_raw)?; - let lists = find_lists_in_document(&parsed_sexp)?; + let lists = find_lists_in_document(body, &parsed_sexp)?; Ok(OwnerTree { + input: body.to_owned(), ast: ast_raw.to_owned(), - children: lists, + lists, }) } #[derive(Serialize)] pub struct OwnerTree { + input: String, ast: String, - children: Vec, + lists: Vec, } #[derive(Serialize)] pub struct PlainList { position: SourceRange, - children: Vec, + items: Vec, } #[derive(Serialize)] pub struct PlainListItem { position: SourceRange, - children: Vec, + lists: Vec, } #[derive(Serialize)] @@ -42,6 +44,7 @@ pub struct SourceRange { } fn find_lists_in_document<'a>( + original_source: &str, current_token: &Token<'a>, ) -> Result, Box> { // DFS looking for top-level lists @@ -53,12 +56,91 @@ fn find_lists_in_document<'a>( // skip 2 to skip token name and standard properties for child_token in children.iter().skip(2) { - found_lists.extend(recurse_token(child_token)?); + found_lists.extend(recurse_token(original_source, child_token)?); } Ok(found_lists) } +fn recurse_token<'a>( + original_source: &str, + current_token: &Token<'a>, +) -> Result, Box> { + match current_token { + Token::Atom(_) | Token::TextWithProperties(_) => Ok(Vec::new()), + Token::List(_) => { + let new_lists = find_lists_in_list(original_source, current_token)?; + Ok(new_lists) + } + Token::Vector(_) => { + let new_lists = find_lists_in_vector(original_source, current_token)?; + Ok(new_lists) + } + } +} + +fn find_lists_in_list<'a>( + original_source: &str, + current_token: &Token<'a>, +) -> Result, Box> { + let mut found_lists = Vec::new(); + let children = current_token.as_list()?; + if assert_name(current_token, "plain-list").is_ok() { + // Found a list! + let mut found_items = Vec::new(); + // skip 2 to skip token name and standard properties + for child_token in children.iter().skip(2) { + found_items.push(get_item_in_list(original_source, child_token)?); + } + + found_lists.push(PlainList { + position: get_bounds(original_source, current_token)?, + items: found_items, + }); + } else { + // skip 2 to skip token name and standard properties + for child_token in children.iter().skip(2) { + found_lists.extend(recurse_token(original_source, child_token)?); + } + } + + Ok(found_lists) +} + +fn find_lists_in_vector<'a>( + original_source: &str, + current_token: &Token<'a>, +) -> Result, Box> { + let mut found_lists = Vec::new(); + let children = current_token.as_vector()?; + + for child_token in children.iter() { + found_lists.extend(recurse_token(original_source, child_token)?); + } + + Ok(found_lists) +} + +fn get_item_in_list<'a>( + original_source: &str, + current_token: &Token<'a>, +) -> Result> { + let mut found_lists = Vec::new(); + let children = current_token.as_list()?; + let token_name = "item"; + assert_name(current_token, token_name)?; + + // skip 2 to skip token name and standard properties + for child_token in children.iter().skip(2) { + found_lists.extend(recurse_token(original_source, child_token)?); + } + + Ok(PlainListItem { + position: get_bounds(original_source, current_token)?, + lists: found_lists, + }) +} + fn assert_name<'s>(emacs: &'s Token<'s>, name: &str) -> Result<(), Box> { let children = emacs.as_list()?; let first_child = children @@ -75,48 +157,61 @@ fn assert_name<'s>(emacs: &'s Token<'s>, name: &str) -> Result<(), Box( - current_token: &Token<'a>, -) -> Result, Box> { - match current_token { - Token::Atom(_) | Token::TextWithProperties(_) => Ok(Vec::new()), - Token::List(_) => { - let new_lists = find_lists_in_list(current_token)?; - Ok(new_lists) - } - Token::Vector(_) => { - let new_lists = find_lists_in_vector(current_token)?; - Ok(new_lists) - } - } -} - -fn find_lists_in_list<'a>( - current_token: &Token<'a>, -) -> Result, Box> { - let mut found_lists = Vec::new(); - let children = current_token.as_list()?; - if assert_name(current_token, "plain-list").is_ok() { - // Found a list! - } - - // skip 2 to skip token name and standard properties - for child_token in children.iter().skip(2) { - found_lists.extend(recurse_token(child_token)?); - } - - Ok(found_lists) -} - -fn find_lists_in_vector<'a>( - current_token: &Token<'a>, -) -> Result, Box> { - let mut found_lists = Vec::new(); - let children = current_token.as_vector()?; - - for child_token in children.iter() { - found_lists.extend(recurse_token(child_token)?); - } - - Ok(found_lists) +fn get_bounds<'s>( + original_source: &'s str, + emacs: &'s Token<'s>, +) -> Result> { + let children = emacs.as_list()?; + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let standard_properties = attributes_map.get(":standard-properties"); + let (begin, end) = if standard_properties.is_some() { + let std_props = standard_properties + .expect("if statement proves its Some") + .as_vector()?; + let begin = std_props + .get(0) + .ok_or("Missing first element in standard properties")? + .as_atom()?; + let end = std_props + .get(1) + .ok_or("Missing first element in standard properties")? + .as_atom()?; + (begin, end) + } else { + let begin = attributes_map + .get(":begin") + .ok_or("Missing :begin attribute.")? + .as_atom()?; + let end = attributes_map + .get(":end") + .ok_or("Missing :end attribute.")? + .as_atom()?; + (begin, end) + }; + let begin = begin.parse::()?; + let end = end.parse::()?; + let start_line = original_source + .chars() + .into_iter() + .take(usize::try_from(begin)? - 1) + .filter(|x| *x == '\n') + .count() + + 1; + let end_line = original_source + .chars() + .into_iter() + .take(usize::try_from(end)? - 1) + .filter(|x| *x == '\n') + .count() + + 1; + Ok(SourceRange { + start_line: u32::try_from(start_line)?, + end_line: u32::try_from(end_line)?, + start_character: begin, + end_character: end, + }) }