natter/src/intermediate/src_block.rs
Tom Alexander b06424cb17
Initial highlighting code.
The dust auto-escaping is causing this naive approach to fail so I will have to create a distinction between highlighted code and not-highlighted code.
2025-02-22 15:09:00 -05:00

147 lines
4.8 KiB
Rust

use std::borrow::Cow;
use super::macros::intermediate;
use crate::error::CustomError;
use organic::types::StandardProperties;
use tree_sitter_highlight::Highlight;
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_highlight::HighlightEvent;
use tree_sitter_highlight::Highlighter;
#[derive(Debug, Clone)]
pub(crate) struct ISrcBlock {
pub(crate) lines: Vec<String>,
pub(crate) language: Option<String>,
pub(crate) post_blank: organic::types::PostBlank,
}
intermediate!(
ISrcBlock,
&'orig organic::types::SrcBlock<'parse>,
original,
_intermediate_context,
{
let source_code = original.get_value();
let prefix_content_pairs: Vec<_> = source_code
.split_inclusive('\n')
.map(|line| {
line.find(|c: char| !c.is_ascii_whitespace() && c != '\n')
.map(|content_start| line.split_at(content_start))
.map(|(leading_whitespace, content)| {
(
leading_whitespace
.chars()
.map(ascii_whitespace_value)
.sum::<usize>(),
content,
)
})
.unwrap_or((0, line))
})
.collect();
let common_whitespace_prefix = prefix_content_pairs
.iter()
.filter_map(|(leading_whitespace, _content)| {
if *leading_whitespace > 0 {
Some(*leading_whitespace)
} else {
None
}
})
.min()
.unwrap_or(0);
let lines: Vec<_> = prefix_content_pairs
.into_iter()
.map(|(leading_whitespace, content)| {
let mut line = String::with_capacity(common_whitespace_prefix + content.len());
if leading_whitespace > common_whitespace_prefix {
// Empty lines can have a leading_whitespace less than common_whitespace_prefix.
for _ in 0..(leading_whitespace - common_whitespace_prefix) {
line.push(' ');
}
}
line.push_str(content);
line
})
.collect();
let language = original.language.map(str::to_owned);
let lines = match language.as_ref().map(String::as_str) {
Some("nix") => {
// foo
highlight_nix(lines)?
}
_ => lines,
};
Ok(ISrcBlock {
lines,
language,
post_blank: original.get_post_blank(),
})
}
);
fn ascii_whitespace_value(c: char) -> usize {
match c {
' ' => 1,
'\t' => 8,
'\r' | '\n' => 0,
c if c as u32 == 12 => 0, // form feed
_ => unreachable!("Only ascii whitespace can reach this code."),
}
}
fn highlight_nix(lines: Vec<String>) -> Result<Vec<String>, CustomError> {
let highlight_names = ["comment", "keyword"];
// Need 1 highlighter per thread
let mut highlighter = Highlighter::new();
let language = tree_sitter_nix::LANGUAGE.into();
let mut config =
HighlightConfiguration::new(language, "nix", tree_sitter_nix::HIGHLIGHTS_QUERY, "", "")
.unwrap();
config.configure(&highlight_names);
let combined_text = lines.join("");
let highlights = highlighter
.highlight(&config, combined_text.as_bytes(), None, |_| None)
.unwrap();
let mut highlighted_text = Vec::new();
for event in highlights {
match event.unwrap() {
HighlightEvent::Source { start, end } => {
highlighted_text.push(Cow::Borrowed(&combined_text[start..end]));
}
HighlightEvent::HighlightStart(s) => {
let class_name = format!("srchl_{}", highlight_names[s.0]);
highlighted_text.push(Cow::Owned(format!(r#"<span class="{}">"#, class_name)));
}
HighlightEvent::HighlightEnd => {
highlighted_text.push(Cow::Borrowed(r#"</span>"#));
}
}
}
let highlighted_text = highlighted_text.join("");
let lines = highlighted_text
.split_inclusive('\n')
.map(str::to_owned)
.collect();
Ok(lines)
}
// use tree_sitter::Parser;
// fn dump_nix<B>(body: B) -> Result<(), CustomError>
// where
// B: AsRef<str>,
// {
// let mut parser = Parser::new();
// parser
// .set_language(&tree_sitter_nix::LANGUAGE.into())
// .expect("Error loading Nix grammar");
// let mut tree = parser.parse(body.as_ref(), None).unwrap();
// println!("{}", tree.root_node());
// Ok(())
// }