Fix handling of plain text.

Highlighting characters.
Rendering ast tree.
2023-08-18 21:22:53 -04:00 · 2023-08-18 21:06:43 -04:00 · 2023-08-18 19:23:31 -04:00 · 2023-08-18 19:11:51 -04:00 · 2023-08-18 18:32:23 -04:00
4 changed files with 231 additions and 112 deletions
--- a/src/owner_tree.rs
+++ b/src/owner_tree.rs
@ -7,12 +7,13 @@ pub fn build_owner_tree<'a>(
    ast_raw: &'a str,
 ) -> Result<OwnerTree, Box<dyn std::error::Error + 'a>> {
    let (_remaining, parsed_sexp) = sexp_with_padding(ast_raw)?;
-    let lists = find_lists_in_document(body, &parsed_sexp)?;
+    assert_name(&parsed_sexp, "org-data")?;
+    let ast_node = build_ast_node(body, None, &parsed_sexp)?;

    Ok(OwnerTree {
        input: body.to_owned(),
        ast: ast_raw.to_owned(),
-        lists,
+        tree: ast_node,
    })
 }

@ -20,7 +21,14 @@ pub fn build_owner_tree<'a>(
 pub struct OwnerTree {
    input: String,
    ast: String,
-    lists: Vec<PlainList>,
+    tree: AstNode,
+}
+
+#[derive(Serialize)]
+pub struct AstNode {
+    name: String,
+    position: SourceRange,
+    children: Vec<AstNode>,
 }

 #[derive(Serialize)]
@ -43,102 +51,66 @@ pub struct SourceRange {
    end_character: u32, // Exclusive
 }

-fn find_lists_in_document<'a>(
+fn build_ast_node<'a>(
    original_source: &str,
+    parent_contents_begin: Option<u32>,
    current_token: &Token<'a>,
-) -> Result<Vec<PlainList>, Box<dyn std::error::Error>> {
-    // DFS looking for top-level lists
-
-    let mut found_lists = Vec::new();
-    let children = current_token.as_list()?;
-    let token_name = "org-data";
-    assert_name(current_token, token_name)?;
-
-    // skip 2 to skip token name and standard properties
-    for child_token in children.iter().skip(2) {
-        found_lists.extend(recurse_token(original_source, child_token)?);
-    }
-
-    Ok(found_lists)
-}
-
-fn recurse_token<'a>(
-    original_source: &str,
-    current_token: &Token<'a>,
-) -> Result<Vec<PlainList>, Box<dyn std::error::Error>> {
-    match current_token {
-        Token::Atom(_) | Token::TextWithProperties(_) => Ok(Vec::new()),
-        Token::List(_) => {
-            let new_lists = find_lists_in_list(original_source, current_token)?;
-            Ok(new_lists)
+) -> Result<AstNode, Box<dyn std::error::Error>> {
+    let maybe_plain_text = current_token.as_text();
+    let ast_node = match maybe_plain_text {
+        Ok(plain_text) => {
+            let parent_contents_begin = parent_contents_begin
+                .ok_or("parent_contents_begin should be set for all plain text nodes.")?;
+            let parameters = &plain_text.properties;
+            let begin = parent_contents_begin
+                + parameters
+                    .get(0)
+                    .ok_or("Missing first element past the text.")?
+                    .as_atom()?
+                    .parse::<u32>()?;
+            let end = parent_contents_begin
+                + parameters
+                    .get(1)
+                    .ok_or("Missing second element past the text.")?
+                    .as_atom()?
+                    .parse::<u32>()?;
+            let (start_line, end_line) = get_line_numbers(original_source, begin, end)?;
+            AstNode {
+                name: "plain-text".to_owned(),
+                position: SourceRange {
+                    start_line,
+                    end_line,
+                    start_character: begin,
+                    end_character: end,
+                },
+                children: Vec::new(),
+            }
        }
-        Token::Vector(_) => {
-            let new_lists = find_lists_in_vector(original_source, current_token)?;
-            Ok(new_lists)
+        Err(_) => {
+            // Not plain text, so it must be a list
+            let parameters = current_token.as_list()?;
+            let name = parameters
+                .first()
+                .ok_or("Should have at least one child.")?
+                .as_atom()?;
+            let position = get_bounds(original_source, current_token)?;
+            let mut children = Vec::new();
+            let mut contents_begin = get_contents_begin(current_token)?;
+            for child in parameters.into_iter().skip(2) {
+                let new_ast_node = build_ast_node(original_source, Some(contents_begin), child)?;
+                contents_begin = new_ast_node.position.end_character;
+                children.push(new_ast_node);
+            }
+
+            AstNode {
+                name: name.to_owned(),
+                position,
+                children,
+            }
        }
-    }
-}
+    };

-fn find_lists_in_list<'a>(
-    original_source: &str,
-    current_token: &Token<'a>,
-) -> Result<Vec<PlainList>, Box<dyn std::error::Error>> {
-    let mut found_lists = Vec::new();
-    let children = current_token.as_list()?;
-    if assert_name(current_token, "plain-list").is_ok() {
-        // Found a list!
-        let mut found_items = Vec::new();
-        // skip 2 to skip token name and standard properties
-        for child_token in children.iter().skip(2) {
-            found_items.push(get_item_in_list(original_source, child_token)?);
-        }
-
-        found_lists.push(PlainList {
-            position: get_bounds(original_source, current_token)?,
-            items: found_items,
-        });
-    } else {
-        // skip 2 to skip token name and standard properties
-        for child_token in children.iter().skip(2) {
-            found_lists.extend(recurse_token(original_source, child_token)?);
-        }
-    }
-
-    Ok(found_lists)
-}
-
-fn find_lists_in_vector<'a>(
-    original_source: &str,
-    current_token: &Token<'a>,
-) -> Result<Vec<PlainList>, Box<dyn std::error::Error>> {
-    let mut found_lists = Vec::new();
-    let children = current_token.as_vector()?;
-
-    for child_token in children.iter() {
-        found_lists.extend(recurse_token(original_source, child_token)?);
-    }
-
-    Ok(found_lists)
-}
-
-fn get_item_in_list<'a>(
-    original_source: &str,
-    current_token: &Token<'a>,
-) -> Result<PlainListItem, Box<dyn std::error::Error>> {
-    let mut found_lists = Vec::new();
-    let children = current_token.as_list()?;
-    let token_name = "item";
-    assert_name(current_token, token_name)?;
-
-    // skip 2 to skip token name and standard properties
-    for child_token in children.iter().skip(2) {
-        found_lists.extend(recurse_token(original_source, child_token)?);
-    }
-
-    Ok(PlainListItem {
-        position: get_bounds(original_source, current_token)?,
-        lists: found_lists,
-    })
+    Ok(ast_node)
 }

 fn assert_name<'s>(emacs: &'s Token<'s>, name: &str) -> Result<(), Box<dyn std::error::Error>> {
@ -194,6 +166,47 @@ fn get_bounds<'s>(
    };
    let begin = begin.parse::<u32>()?;
    let end = end.parse::<u32>()?;
+    let (start_line, end_line) = get_line_numbers(original_source, begin, end)?;
+    Ok(SourceRange {
+        start_line,
+        end_line,
+        start_character: begin,
+        end_character: end,
+    })
+}
+
+fn get_contents_begin<'s>(emacs: &'s Token<'s>) -> Result<u32, Box<dyn std::error::Error>> {
+    let children = emacs.as_list()?;
+    let attributes_child = children
+        .iter()
+        .nth(1)
+        .ok_or("Should have an attributes child.")?;
+    let attributes_map = attributes_child.as_map()?;
+    let standard_properties = attributes_map.get(":standard-properties");
+    let contents_begin = if standard_properties.is_some() {
+        let std_props = standard_properties
+            .expect("if statement proves its Some")
+            .as_vector()?;
+        let contents_begin = std_props
+            .get(2)
+            .ok_or("Missing third element in standard properties")?
+            .as_atom()?;
+        contents_begin
+    } else {
+        let contents_begin = attributes_map
+            .get(":contents-begin")
+            .ok_or("Missing :contents-begin attribute.")?
+            .as_atom()?;
+        contents_begin
+    };
+    Ok(contents_begin.parse::<u32>()?)
+}
+
+fn get_line_numbers<'s>(
+    original_source: &'s str,
+    begin: u32,
+    end: u32,
+) -> Result<(u32, u32), Box<dyn std::error::Error>> {
    let start_line = original_source
        .chars()
        .into_iter()
@ -208,10 +221,5 @@ fn get_bounds<'s>(
        .filter(|x| *x == '\n')
        .count()
        + 1;
-    Ok(SourceRange {
-        start_line: u32::try_from(start_line)?,
-        end_line: u32::try_from(end_line)?,
-        start_character: begin,
-        end_character: end,
-    })
+    Ok((u32::try_from(start_line)?, u32::try_from(end_line)?))
 }
--- a/static/index.html
+++ b/static/index.html
@ -14,7 +14,7 @@
        <div id="parse-output" class="code_block" style="counter-set: code_line_number 0;"></div>
      </div>
      <div>
-        tree goes here
+        <div id="ast-tree" class="ast_tree"></div>
      </div>
    </div>
  </body>
--- a/static/script.js
+++ b/static/script.js
@ -1,6 +1,7 @@
 let inFlightRequest = null;
 const inputElement = document.querySelector("#org-input");
 const outputElement = document.querySelector("#parse-output");
+const astTreeElement = document.querySelector("#ast-tree");

 function abortableFetch(request, options) {
    const controller = new AbortController();
@ -12,9 +13,20 @@ function abortableFetch(request, options) {
    };
 }

-async function renderParseResponse(response) {
-    console.log(response);
+function clearOutput() {
+    clearActiveAstNode();
    outputElement.innerHTML = "";
+    astTreeElement.innerHTML = "";
+}
+
+function renderParseResponse(response) {
+    clearOutput();
+    console.log(response);
+    renderSourceBox(response);
+    renderAstTree(response);
+}
+
+function renderSourceBox(response) {
    const lines = response.input.split(/\r?\n/);
    const numLines = lines.length;
    const numDigits = Math.log10(numLines) + 1;
@ -23,18 +35,83 @@ async function renderParseResponse(response) {

    for (let line of lines) {
        let wrappedLine = document.createElement("code");
-        wrappedLine.textContent = line ? line : "\n";
+        if (line !== "" && line !== null) {
+            for (let chr of line) {
+                // Please forgive me
+                let wrappedCharacter = document.createElement("span");
+                wrappedCharacter.textContent = chr;
+                wrappedLine.appendChild(wrappedCharacter);
+            }
+        } else {
+            let wrappedCharacter = document.createElement("span");
+            wrappedCharacter.textContent = "\n";
+            wrappedLine.appendChild(wrappedCharacter);
+        }
        outputElement.appendChild(wrappedLine);
    }
 }

+function renderAstTree(response) {
+    renderAstNode(response.input, 0, response.tree);
+}
+
+function renderAstNode(originalSource, depth, astNode) {
+    const nodeElem = document.createElement("div");
+    nodeElem.classList.add("ast_node");
+
+    let sourceForNode = originalSource.slice(astNode.position.start_character - 1, astNode.position.end_character - 1);
+    // Since sourceForList is a string, JSON.stringify will escape with backslashes and wrap the text in quotation marks, ensuring that the string ends up on a single line. Coincidentally, this is the behavior we want.
+    let escapedSource = JSON.stringify(sourceForNode);
+
+    nodeElem.innerText = `${astNode.name}: ${escapedSource}`;
+    nodeElem.style.marginLeft = `${depth * 20}px`;
+    nodeElem.dataset.startLine = astNode.position.start_line;
+    nodeElem.dataset.endLine = astNode.position.end_line;
+    nodeElem.dataset.startCharacter = astNode.position.start_character;
+    nodeElem.dataset.endCharacter = astNode.position.end_character;
+
+    nodeElem.addEventListener("click", () => {
+        setActiveAstNode(nodeElem, originalSource);
+    });
+
+    astTreeElement.appendChild(nodeElem);
+    for (let child of astNode.children) {
+        renderAstNode(originalSource, depth + 1, child);
+    }
+}
+
+function clearActiveAstNode() {
+    for (let elem of document.querySelectorAll("#ast-tree .ast_node.highlighted")) {
+        elem.classList.remove("highlighted");
+    }
+    for (let elem of document.querySelectorAll("#parse-output > code.highlighted")) {
+        elem.classList.remove("highlighted");
+    }
+    for (let elem of document.querySelectorAll("#parse-output > code > span")) {
+        elem.classList.remove("highlighted");
+    }
+}
+
+function setActiveAstNode(elem, originalSource) {
+    clearActiveAstNode();
+    elem.classList.add("highlighted");
+    let startLine = parseInt(elem.dataset.startLine, 10);
+    let endLine = parseInt(elem.dataset.endLine, 10);
+    let startCharacter = parseInt(elem.dataset.startCharacter, 10);
+    let endCharacter = parseInt(elem.dataset.endCharacter, 10);
+    for (let line = startLine; line < endLine; ++line) {
+        highlightLine("parse-output", line - 1);
+    }
+    highlightCharacters("parse-output", originalSource, startCharacter, endCharacter);
+}
+
 inputElement.addEventListener("input", async () => {
    let orgSource = inputElement.value;
    if (inFlightRequest != null) {
        inFlightRequest.abort();
        inFlightRequest = null;
    }
-    outputElement.innerHTML = "";
+    clearOutput();

    let newRequest = abortableFetch("/parse", {
        method: "POST",
@ -55,12 +132,24 @@ inputElement.addEventListener("input", async () => {

 function highlightLine(htmlName, lineOffset) {
  const childOffset = lineOffset + 1;
-    const codeLineElement = document.querySelector(`.${htmlName} > code:nth-child(${childOffset})`);
+    const codeLineElement = document.querySelector(`#${htmlName} > code:nth-child(${childOffset})`);
  codeLineElement?.classList.add("highlighted")
 }

-function unhighlightLine(htmlName, lineOffset) {
-  const childOffset = lineOffset + 1;
-    const codeLineElement = document.querySelector(`.${htmlName} > code:nth-child(${childOffset})`);
-  codeLineElement?.classList.remove("highlighted")
+function highlightCharacters(htmlName, originalSource, startCharacter, endCharacter) {
+    let sourceBefore = originalSource.slice(0, startCharacter - 1);
+    let precedingLineBreak = sourceBefore.lastIndexOf("\n");
+    let characterIndexOnLine = precedingLineBreak !== -1 ? startCharacter - precedingLineBreak - 1 : startCharacter;
+    let lineNumber = (sourceBefore.match(/\r?\n/g) || '').length + 1;
+
+    for (let characterIndex = startCharacter; characterIndex < endCharacter; ++characterIndex) {
+        document.querySelector(`#${htmlName} > code:nth-child(${lineNumber}) > span:nth-child(${characterIndexOnLine})`)?.classList.add("highlighted");
+        if (originalSource[characterIndex - 1] == "\n") {
+            ++lineNumber;
+            characterIndexOnLine = 1;
+        } else {
+            ++characterIndexOnLine;
+        }
+    }
+
 }
--- a/static/style.css
+++ b/static/style.css
@ -47,18 +47,40 @@ h7 {
 }

 .code_block > code.highlighted {
-    background: #307351ff;
+    /* We aren't using this because we are going to highlight individual characters, but we still need to set the highlighted class on the code elem so the line numbers on the left get highlighted to make empty lines more obvious. */
+    /* background: #307351ff; */
 }

 .code_block > code.highlighted::before {
    background: #307351ff;
 }

+.code_block > code > span.highlighted {
+    background: #307351ff;
+}
+
 .output_container {
    display: flex;
    flex-direction: row;
 }

 .output_container > * {
-    flex: 1 0;
+    flex: 1 0 0;
+}
+
+.ast_tree {
+    padding: 5px;
+}
+
+.ast_node {
+    cursor: pointer;
+    background: #eeeeee;
+    margin-bottom: 5px;
+    border: 1px solid #000000;
+    padding: 2px;
+}
+
+.ast_node.highlighted {
+    background: #307351ff;
+    color: #ffffff;
 }
Author	SHA1	Message	Date
Tom Alexander	9032b00e1b	Fix handling of plain text.	2023-08-18 21:22:53 -04:00
Tom Alexander	acdc8b8993	Highlighting characters.	2023-08-18 21:06:43 -04:00
Tom Alexander	676dffa15f	Rendering ast tree.	2023-08-18 19:23:31 -04:00
Tom Alexander	ab836f2794	Switch to returning the whole tree from rust instead of just the lists.	2023-08-18 19:11:51 -04:00
Tom Alexander	0ee33949e9	Beginning of rendering the ast list.	2023-08-18 18:32:23 -04:00