Record element start events and report them when the event_count feature is enabled.

This commit is contained in:
Tom Alexander 2023-10-17 13:32:01 -04:00
parent 17db05c2c7
commit a2f53361eb
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
7 changed files with 70 additions and 1 deletions

View File

@ -59,6 +59,7 @@ default = []
compare = ["tokio/process", "tokio/macros"]
foreign_document_test = ["compare", "dep:futures", "tokio/sync", "dep:walkdir", "tokio/process"]
tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"]
event_count = []
# Optimized build for any sort of release.
[profile.release-lto]

View File

@ -0,0 +1,42 @@
use std::collections::HashMap;
use std::sync::Mutex;
use super::EventType;
use crate::parser::OrgSource;
#[derive(Debug, Eq, Hash, PartialEq)]
struct EventKey {
event_type: EventType,
byte_offset: usize,
}
pub(crate) type EventCount = usize;
static GLOBAL_DATA: Mutex<Option<HashMap<EventKey, EventCount>>> = Mutex::new(None);
pub(crate) fn record_event(event_type: EventType, input: OrgSource<'_>) {
let mut db = GLOBAL_DATA.lock().unwrap();
let db = db.get_or_insert_with(HashMap::new);
let key = EventKey {
event_type,
byte_offset: input.get_byte_offset(),
};
*db.entry(key).or_insert(0) += 1;
}
pub fn report(original_document: &str) {
let mut db = GLOBAL_DATA.lock().unwrap();
let db = db.get_or_insert_with(HashMap::new);
let mut results: Vec<_> = db.iter().map(|(k, v)| (k, v)).collect();
results.sort_by_key(|(_k, v)| *v);
// This would put the most common at the top, but that is a pain when there is already a lot of output from the parser.
// results.sort_by(|(_ak, av), (_bk, bv)| bv.cmp(av));
for (key, count) in results {
println!(
"{:?} {} character offset: {}",
key.event_type,
count,
original_document[..key.byte_offset].chars().count() + 1
)
}
}

View File

@ -0,0 +1,7 @@
#[derive(Debug, Eq, Hash, PartialEq)]
pub(crate) enum EventType {
ElementStart,
ElementFinish,
ObjectStart,
ObjectFinish,
}

6
src/event_count/mod.rs Normal file
View File

@ -0,0 +1,6 @@
mod database;
mod event_type;
pub(crate) use database::record_event;
pub use database::report;
pub(crate) use event_type::EventType;

View File

@ -13,6 +13,8 @@ pub mod compare;
mod context;
mod error;
#[cfg(feature = "event_count")]
pub mod event_count;
mod iter;
pub mod parser;
pub mod types;

View File

@ -54,8 +54,11 @@ fn read_stdin_to_string() -> Result<String, Box<dyn std::error::Error>> {
}
fn run_anonymous_parse<P: AsRef<str>>(org_contents: P) -> Result<(), Box<dyn std::error::Error>> {
let rust_parsed = parse(org_contents.as_ref())?;
let org_contents = org_contents.as_ref();
let rust_parsed = parse(org_contents)?;
println!("{:#?}", rust_parsed);
#[cfg(feature = "event_count")]
organic::event_count::report(org_contents);
Ok(())
}
@ -75,5 +78,7 @@ fn run_parse_on_file<P: AsRef<Path>>(org_path: P) -> Result<(), Box<dyn std::err
};
let rust_parsed = parse_with_settings(org_contents, &global_settings)?;
println!("{:#?}", rust_parsed);
#[cfg(feature = "event_count")]
organic::event_count::report(org_contents);
Ok(())
}

View File

@ -27,6 +27,10 @@ use super::table::detect_table;
use crate::context::RefContext;
use crate::error::CustomError;
use crate::error::Res;
#[cfg(feature = "event_count")]
use crate::event_count::record_event;
#[cfg(feature = "event_count")]
use crate::event_count::EventType;
use crate::parser::affiliated_keyword::affiliated_keywords;
use crate::parser::macros::ak_element;
use crate::parser::macros::element;
@ -51,6 +55,8 @@ fn _element<'b, 'g, 'r, 's>(
input: OrgSource<'s>,
can_be_paragraph: bool,
) -> Res<OrgSource<'s>, Element<'s>> {
#[cfg(feature = "event_count")]
record_event(EventType::ElementStart, input);
let (post_affiliated_keywords_input, affiliated_keywords) =
affiliated_keywords(context, input)?;