From a2f53361eb25b51a4e17ac79370d4e964b1288e0 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 13:32:01 -0400 Subject: [PATCH] Record element start events and report them when the event_count feature is enabled. --- Cargo.toml | 1 + src/event_count/database.rs | 42 +++++++++++++++++++++++++++++++++++ src/event_count/event_type.rs | 7 ++++++ src/event_count/mod.rs | 6 +++++ src/lib.rs | 2 ++ src/main.rs | 7 +++++- src/parser/element_parser.rs | 6 +++++ 7 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 src/event_count/database.rs create mode 100644 src/event_count/event_type.rs create mode 100644 src/event_count/mod.rs diff --git a/Cargo.toml b/Cargo.toml index fdda8d7..6f0573e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,6 +59,7 @@ default = [] compare = ["tokio/process", "tokio/macros"] foreign_document_test = ["compare", "dep:futures", "tokio/sync", "dep:walkdir", "tokio/process"] tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"] +event_count = [] # Optimized build for any sort of release. [profile.release-lto] diff --git a/src/event_count/database.rs b/src/event_count/database.rs new file mode 100644 index 0000000..e5d0486 --- /dev/null +++ b/src/event_count/database.rs @@ -0,0 +1,42 @@ +use std::collections::HashMap; +use std::sync::Mutex; + +use super::EventType; +use crate::parser::OrgSource; + +#[derive(Debug, Eq, Hash, PartialEq)] +struct EventKey { + event_type: EventType, + byte_offset: usize, +} + +pub(crate) type EventCount = usize; + +static GLOBAL_DATA: Mutex>> = Mutex::new(None); + +pub(crate) fn record_event(event_type: EventType, input: OrgSource<'_>) { + let mut db = GLOBAL_DATA.lock().unwrap(); + let db = db.get_or_insert_with(HashMap::new); + let key = EventKey { + event_type, + byte_offset: input.get_byte_offset(), + }; + *db.entry(key).or_insert(0) += 1; +} + +pub fn report(original_document: &str) { + let mut db = GLOBAL_DATA.lock().unwrap(); + let db = db.get_or_insert_with(HashMap::new); + let mut results: Vec<_> = db.iter().map(|(k, v)| (k, v)).collect(); + results.sort_by_key(|(_k, v)| *v); + // This would put the most common at the top, but that is a pain when there is already a lot of output from the parser. + // results.sort_by(|(_ak, av), (_bk, bv)| bv.cmp(av)); + for (key, count) in results { + println!( + "{:?} {} character offset: {}", + key.event_type, + count, + original_document[..key.byte_offset].chars().count() + 1 + ) + } +} diff --git a/src/event_count/event_type.rs b/src/event_count/event_type.rs new file mode 100644 index 0000000..82cef50 --- /dev/null +++ b/src/event_count/event_type.rs @@ -0,0 +1,7 @@ +#[derive(Debug, Eq, Hash, PartialEq)] +pub(crate) enum EventType { + ElementStart, + ElementFinish, + ObjectStart, + ObjectFinish, +} diff --git a/src/event_count/mod.rs b/src/event_count/mod.rs new file mode 100644 index 0000000..8987f8b --- /dev/null +++ b/src/event_count/mod.rs @@ -0,0 +1,6 @@ +mod database; +mod event_type; + +pub(crate) use database::record_event; +pub use database::report; +pub(crate) use event_type::EventType; diff --git a/src/lib.rs b/src/lib.rs index bd81614..6795061 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,8 @@ pub mod compare; mod context; mod error; +#[cfg(feature = "event_count")] +pub mod event_count; mod iter; pub mod parser; pub mod types; diff --git a/src/main.rs b/src/main.rs index b034b18..ecf67c1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,8 +54,11 @@ fn read_stdin_to_string() -> Result> { } fn run_anonymous_parse>(org_contents: P) -> Result<(), Box> { - let rust_parsed = parse(org_contents.as_ref())?; + let org_contents = org_contents.as_ref(); + let rust_parsed = parse(org_contents)?; println!("{:#?}", rust_parsed); + #[cfg(feature = "event_count")] + organic::event_count::report(org_contents); Ok(()) } @@ -75,5 +78,7 @@ fn run_parse_on_file>(org_path: P) -> Result<(), Box( input: OrgSource<'s>, can_be_paragraph: bool, ) -> Res, Element<'s>> { + #[cfg(feature = "event_count")] + record_event(EventType::ElementStart, input); let (post_affiliated_keywords_input, affiliated_keywords) = affiliated_keywords(context, input)?;