2023-10-17 13:32:01 -04:00
|
|
|
use std::collections::HashMap;
|
|
|
|
|
use std::sync::Mutex;
|
|
|
|
|
|
|
|
|
|
use super::EventType;
|
|
|
|
|
use crate::parser::OrgSource;
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Eq, Hash, PartialEq)]
|
|
|
|
|
struct EventKey {
|
|
|
|
|
event_type: EventType,
|
|
|
|
|
byte_offset: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub(crate) type EventCount = usize;
|
|
|
|
|
|
|
|
|
|
static GLOBAL_DATA: Mutex<Option<HashMap<EventKey, EventCount>>> = Mutex::new(None);
|
|
|
|
|
|
|
|
|
|
pub(crate) fn record_event(event_type: EventType, input: OrgSource<'_>) {
|
|
|
|
|
let mut db = GLOBAL_DATA.lock().unwrap();
|
|
|
|
|
let db = db.get_or_insert_with(HashMap::new);
|
|
|
|
|
let key = EventKey {
|
|
|
|
|
event_type,
|
|
|
|
|
byte_offset: input.get_byte_offset(),
|
|
|
|
|
};
|
|
|
|
|
*db.entry(key).or_insert(0) += 1;
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-03 20:06:53 -05:00
|
|
|
pub(crate) fn report(original_document: &str) {
|
2023-10-17 13:32:01 -04:00
|
|
|
let mut db = GLOBAL_DATA.lock().unwrap();
|
|
|
|
|
let db = db.get_or_insert_with(HashMap::new);
|
2023-12-15 19:57:35 -05:00
|
|
|
let mut results: Vec<_> = db.iter().collect();
|
2023-10-17 13:32:01 -04:00
|
|
|
results.sort_by_key(|(_k, v)| *v);
|
|
|
|
|
// This would put the most common at the top, but that is a pain when there is already a lot of output from the parser.
|
|
|
|
|
// results.sort_by(|(_ak, av), (_bk, bv)| bv.cmp(av));
|
|
|
|
|
for (key, count) in results {
|
|
|
|
|
println!(
|
2023-10-17 13:35:40 -04:00
|
|
|
"{:?} {} character offset: {} byte offset: {}",
|
2023-10-17 13:32:01 -04:00
|
|
|
key.event_type,
|
|
|
|
|
count,
|
2023-10-17 13:35:40 -04:00
|
|
|
original_document[..key.byte_offset].chars().count() + 1,
|
|
|
|
|
key.byte_offset
|
2023-10-17 13:32:01 -04:00
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
}
|