From b06424cb1779d229eb19ddf926f3f492e9742565 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 22 Feb 2025 15:09:00 -0500 Subject: [PATCH] Initial highlighting code. The dust auto-escaping is causing this naive approach to fail so I will have to create a distinction between highlighted code and not-highlighted code. --- Cargo.lock | 138 ++++++++++++++++++++++++++++++++-- Cargo.toml | 2 + src/intermediate/src_block.rs | 68 +++++++++++++++++ 3 files changed, 201 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c1bd77d..a1a742c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,15 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.15" @@ -123,6 +132,15 @@ version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" +[[package]] +name = "cc" +version = "1.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af" +dependencies = [ + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -445,6 +463,8 @@ dependencies = [ "serde_json", "tokio", "toml", + "tree-sitter-highlight", + "tree-sitter-nix", "url", ] @@ -517,9 +537,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "proc-macro2" -version = "1.0.88" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] @@ -539,6 +559,35 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -582,10 +631,11 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.130" +version = "1.0.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "610f75ff4a8e3cb29b85da56eabdd1bff5b06739059a4b8e2967fef32e5d9944" +checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" dependencies = [ + "indexmap", "itoa", "memchr", "ryu", @@ -601,6 +651,12 @@ dependencies = [ "serde", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "slab" version = "0.4.9" @@ -617,10 +673,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] -name = "syn" -version = "2.0.79" +name = "streaming-iterator" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "syn" +version = "2.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" dependencies = [ "proc-macro2", "quote", @@ -633,6 +695,26 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "thiserror" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tinyvec" version = "1.8.0" @@ -693,6 +775,48 @@ dependencies = [ "winnow", ] +[[package]] +name = "tree-sitter" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5168a515fe492af54c5cc8800ff8c840be09fa5168de45838afaecd3e008bce4" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-highlight" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "457164f56e8dbbd0dc620c239bd7e2eb6025b76e4d1593a690bd4d9ed37bf168" +dependencies = [ + "regex", + "streaming-iterator", + "thiserror", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" + +[[package]] +name = "tree-sitter-nix" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8d4f8705d377d63242a075331d2d8c1dcc9828fd74aa13d7145185b3d9c004" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicode-bidi" version = "0.3.17" diff --git a/Cargo.toml b/Cargo.toml index 47d1263..5b71650 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,8 @@ serde = { version = "1.0.189", default-features = false, features = ["std", "der serde_json = "1.0.107" tokio = { version = "1.30.0", default-features = false, features = ["rt", "rt-multi-thread", "fs", "io-util"] } toml = "0.8.2" +tree-sitter-highlight = "0.25.2" +tree-sitter-nix = "0.0.2" url = "2.5.0" # Optimized build for any sort of release. diff --git a/src/intermediate/src_block.rs b/src/intermediate/src_block.rs index b0d948b..0055d46 100644 --- a/src/intermediate/src_block.rs +++ b/src/intermediate/src_block.rs @@ -1,6 +1,12 @@ +use std::borrow::Cow; + use super::macros::intermediate; use crate::error::CustomError; use organic::types::StandardProperties; +use tree_sitter_highlight::Highlight; +use tree_sitter_highlight::HighlightConfiguration; +use tree_sitter_highlight::HighlightEvent; +use tree_sitter_highlight::Highlighter; #[derive(Debug, Clone)] pub(crate) struct ISrcBlock { @@ -59,6 +65,14 @@ intermediate!( }) .collect(); let language = original.language.map(str::to_owned); + + let lines = match language.as_ref().map(String::as_str) { + Some("nix") => { + // foo + highlight_nix(lines)? + } + _ => lines, + }; Ok(ISrcBlock { lines, language, @@ -76,3 +90,57 @@ fn ascii_whitespace_value(c: char) -> usize { _ => unreachable!("Only ascii whitespace can reach this code."), } } + +fn highlight_nix(lines: Vec) -> Result, CustomError> { + let highlight_names = ["comment", "keyword"]; + // Need 1 highlighter per thread + let mut highlighter = Highlighter::new(); + let language = tree_sitter_nix::LANGUAGE.into(); + let mut config = + HighlightConfiguration::new(language, "nix", tree_sitter_nix::HIGHLIGHTS_QUERY, "", "") + .unwrap(); + config.configure(&highlight_names); + + let combined_text = lines.join(""); + + let highlights = highlighter + .highlight(&config, combined_text.as_bytes(), None, |_| None) + .unwrap(); + + let mut highlighted_text = Vec::new(); + for event in highlights { + match event.unwrap() { + HighlightEvent::Source { start, end } => { + highlighted_text.push(Cow::Borrowed(&combined_text[start..end])); + } + HighlightEvent::HighlightStart(s) => { + let class_name = format!("srchl_{}", highlight_names[s.0]); + highlighted_text.push(Cow::Owned(format!(r#""#, class_name))); + } + HighlightEvent::HighlightEnd => { + highlighted_text.push(Cow::Borrowed(r#""#)); + } + } + } + + let highlighted_text = highlighted_text.join(""); + let lines = highlighted_text + .split_inclusive('\n') + .map(str::to_owned) + .collect(); + Ok(lines) +} + +// use tree_sitter::Parser; +// fn dump_nix(body: B) -> Result<(), CustomError> +// where +// B: AsRef, +// { +// let mut parser = Parser::new(); +// parser +// .set_language(&tree_sitter_nix::LANGUAGE.into()) +// .expect("Error loading Nix grammar"); +// let mut tree = parser.parse(body.as_ref(), None).unwrap(); +// println!("{}", tree.root_node()); +// Ok(()) +// }