natter/src/intermediate/src_block.rs
2025-02-22 16:55:53 -05:00

210 lines
6.5 KiB
Rust

use std::borrow::Borrow;
use std::borrow::Cow;
use super::macros::intermediate;
use crate::error::CustomError;
use organic::types::StandardProperties;
use tree_sitter_highlight::Highlight;
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_highlight::HighlightEvent;
use tree_sitter_highlight::Highlighter;
#[derive(Debug, Clone)]
pub(crate) struct ISrcBlock {
pub(crate) lines: Vec<ISrcLine>,
pub(crate) language: Option<String>,
pub(crate) post_blank: organic::types::PostBlank,
}
#[derive(Debug, Clone)]
pub(crate) struct ISrcLine {
pub(crate) children: Vec<ISrcSegment>,
}
#[derive(Debug, Clone)]
pub(crate) enum ISrcSegment {
RawText(String),
HighlightStart { name: String },
HighlightEnd,
}
intermediate!(
ISrcBlock,
&'orig organic::types::SrcBlock<'parse>,
original,
_intermediate_context,
{
let source_code = original.get_value();
let prefix_content_pairs: Vec<_> = source_code
.split_inclusive('\n')
.map(|line| {
line.find(|c: char| !c.is_ascii_whitespace() && c != '\n')
.map(|content_start| line.split_at(content_start))
.map(|(leading_whitespace, content)| {
(
leading_whitespace
.chars()
.map(ascii_whitespace_value)
.sum::<usize>(),
content,
)
})
.unwrap_or((0, line))
})
.collect();
let common_whitespace_prefix = prefix_content_pairs
.iter()
.filter_map(|(leading_whitespace, _content)| {
if *leading_whitespace > 0 {
Some(*leading_whitespace)
} else {
None
}
})
.min()
.unwrap_or(0);
let lines: Vec<_> = prefix_content_pairs
.into_iter()
.map(|(leading_whitespace, content)| {
let mut line = String::with_capacity(common_whitespace_prefix + content.len());
if leading_whitespace > common_whitespace_prefix {
// Empty lines can have a leading_whitespace less than common_whitespace_prefix.
for _ in 0..(leading_whitespace - common_whitespace_prefix) {
line.push(' ');
}
}
line.push_str(content);
line
})
.collect();
let language = original.language.map(str::to_owned);
match language.as_ref().map(String::as_str) {
Some("nix") => {
let highlighted = highlight_nix(&lines);
if let Ok(highlighted) = highlighted {
return Ok(ISrcBlock {
lines: highlighted,
language,
post_blank: original.get_post_blank(),
});
}
}
_ => {}
};
let highlighted = highlight_plain(&lines)?;
Ok(ISrcBlock {
lines: highlighted,
language,
post_blank: original.get_post_blank(),
})
}
);
impl ISrcLine {
pub(crate) fn new() -> ISrcLine {
ISrcLine {
children: Vec::new(),
}
}
}
fn ascii_whitespace_value(c: char) -> usize {
match c {
' ' => 1,
'\t' => 8,
'\r' | '\n' => 0,
c if c as u32 == 12 => 0, // form feed
_ => unreachable!("Only ascii whitespace can reach this code."),
}
}
fn highlight_plain<L>(lines: &[L]) -> Result<Vec<ISrcLine>, CustomError>
where
std::string::String: for<'a> From<&'a L>,
{
Ok(lines
.into_iter()
.map(|l| {
let mut line = ISrcLine::new();
line.children.push(ISrcSegment::RawText(l.into()));
line
})
.collect())
}
fn highlight_nix<L>(lines: &[L]) -> Result<Vec<ISrcLine>, CustomError>
where
L: Borrow<str>,
{
let highlight_names = [
"comment",
"keyword",
"property",
"string",
"string.special.path",
// "string.special.uri",
];
// Need 1 highlighter per thread
let mut highlighter = Highlighter::new();
let language = tree_sitter_nix::LANGUAGE.into();
let mut config =
HighlightConfiguration::new(language, "nix", tree_sitter_nix::HIGHLIGHTS_QUERY, "", "")
.unwrap();
config.configure(&highlight_names);
let combined_text = lines.join("");
let highlights = highlighter
.highlight(&config, combined_text.as_bytes(), None, |_| None)
.unwrap();
let mut highlighted_text: Vec<ISrcLine> = Vec::with_capacity(lines.len());
let mut current_line = ISrcLine::new();
for event in highlights {
match event.unwrap() {
HighlightEvent::Source { start, end } => {
let mut span = &combined_text[start..end];
while let Some(line_break_index) = span.find('\n') {
let first_line = &span[..(line_break_index + 1)];
current_line
.children
.push(ISrcSegment::RawText(first_line.to_owned()));
highlighted_text.push(current_line);
current_line = ISrcLine::new();
span = &span[(line_break_index + 1)..];
}
if !span.is_empty() {
current_line
.children
.push(ISrcSegment::RawText(span.to_owned()));
}
}
HighlightEvent::HighlightStart(s) => {
current_line.children.push(ISrcSegment::HighlightStart {
name: highlight_names[s.0].to_owned(),
});
}
HighlightEvent::HighlightEnd => {
current_line.children.push(ISrcSegment::HighlightEnd);
}
}
}
Ok(highlighted_text)
}
// use tree_sitter::Parser;
// fn dump_nix<B>(body: B) -> Result<(), CustomError>
// where
// B: AsRef<str>,
// {
// let mut parser = Parser::new();
// parser
// .set_language(&tree_sitter_nix::LANGUAGE.into())
// .expect("Error loading Nix grammar");
// let mut tree = parser.parse(body.as_ref(), None).unwrap();
// println!("{}", tree.root_node());
// Ok(())
// }