From 369d3e8c502b4134af5b31f9ad82cd7abaabd79d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 10:57:04 -0400 Subject: [PATCH 01/11] Add a full-document parse benchmark. --- scripts/perf.bash | 2 +- src/parser/document.rs | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/scripts/perf.bash b/scripts/perf.bash index aa7ae329..fab93e72 100755 --- a/scripts/perf.bash +++ b/scripts/perf.bash @@ -14,7 +14,7 @@ function main { additional_flags+=(--profile "$PROFILE") fi (cd "$DIR/../" && cargo build --no-default-features "${additional_flags[@]}") - perf record --freq=2000 --call-graph dwarf --output="$DIR/../perf.data" "$DIR/../target/${PROFILE}/parse" "${@}" + perf record --freq=70000 --call-graph dwarf --output="$DIR/../perf.data" "$DIR/../target/${PROFILE}/parse" "${@}" # Convert to a format firefox will read # flags to consider --show-info diff --git a/src/parser/document.rs b/src/parser/document.rs index a2122d31..898b42e5 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -208,3 +208,17 @@ fn _document<'b, 'g, 'r, 's>( }, )) } + +#[cfg(test)] +mod tests { + use test::Bencher; + + use super::*; + + #[bench] + fn bench_full_document(b: &mut Bencher) { + let input = include_str!("../../org_mode_samples/element_container_priority/README.org"); + + b.iter(|| assert!(parse(input).is_ok())); + } +} From bc9bd4f97b602dd5daf6e9482213c8b161aa0a85 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 11:10:18 -0400 Subject: [PATCH 02/11] Eliminate some closures. --- src/parser/affiliated_keyword.rs | 31 +++++++++++++++++++++++++++++++ src/parser/element_parser.rs | 9 +++------ src/parser/keyword.rs | 3 ++- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/parser/affiliated_keyword.rs b/src/parser/affiliated_keyword.rs index ded3fa89..62fbaf05 100644 --- a/src/parser/affiliated_keyword.rs +++ b/src/parser/affiliated_keyword.rs @@ -14,17 +14,48 @@ use nom::multi::many0; use nom::multi::many_till; use nom::sequence::tuple; +use super::keyword::affiliated_keyword; use super::object_parser::standard_set_object; use super::util::confine_context; +use super::OrgSource; use crate::context::bind_context; use crate::context::Context; use crate::context::ContextElement; use crate::context::GlobalSettings; use crate::context::List; +use crate::context::RefContext; +use crate::error::Res; use crate::types::AffiliatedKeywordValue; use crate::types::AffiliatedKeywords; use crate::types::Keyword; +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn affiliated_keywords<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Vec>> { + let mut ret = Vec::new(); + let mut remaining = input; + + loop { + let result = affiliated_keyword(context, remaining); + match result { + Ok((remain, kw)) => { + remaining = remain; + ret.push(kw); + } + Err(_) => { + break; + } + } + } + + Ok((remaining, ret)) +} + pub(crate) fn parse_affiliated_keywords<'g, 's, AK>( global_settings: &'g GlobalSettings<'g, 's>, input: AK, diff --git a/src/parser/element_parser.rs b/src/parser/element_parser.rs index 5cf47290..d65e93f0 100644 --- a/src/parser/element_parser.rs +++ b/src/parser/element_parser.rs @@ -1,5 +1,3 @@ -use nom::multi::many0; - use super::babel_call::babel_call; use super::clock::clock; use super::comment::comment; @@ -14,7 +12,6 @@ use super::footnote_definition::detect_footnote_definition; use super::footnote_definition::footnote_definition; use super::greater_block::greater_block; use super::horizontal_rule::horizontal_rule; -use super::keyword::affiliated_keyword; use super::keyword::keyword; use super::latex_environment::latex_environment; use super::lesser_block::comment_block; @@ -27,10 +24,10 @@ use super::paragraph::paragraph; use super::plain_list::detect_plain_list; use super::plain_list::plain_list; use super::table::detect_table; -use crate::context::parser_with_context; use crate::context::RefContext; use crate::error::CustomError; use crate::error::Res; +use crate::parser::affiliated_keyword::affiliated_keywords; use crate::parser::macros::ak_element; use crate::parser::macros::element; use crate::parser::table::org_mode_table; @@ -55,7 +52,7 @@ fn _element<'b, 'g, 'r, 's>( can_be_paragraph: bool, ) -> Res, Element<'s>> { let (post_affiliated_keywords_input, affiliated_keywords) = - many0(parser_with_context!(affiliated_keyword)(context))(input)?; + affiliated_keywords(context, input)?; let mut affiliated_keywords = affiliated_keywords.into_iter(); @@ -270,7 +267,7 @@ fn _detect_element<'b, 'g, 'r, 's>( can_be_paragraph: bool, ) -> Res, ()> { let (post_affiliated_keywords_input, affiliated_keywords) = - many0(parser_with_context!(affiliated_keyword)(context))(input)?; + affiliated_keywords(context, input)?; let mut affiliated_keywords = affiliated_keywords.into_iter(); diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index d09ed8d7..0424f942 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -22,6 +22,7 @@ use super::org_source::BracketDepth; use super::org_source::OrgSource; use super::util::get_consumed; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; +use crate::context::bind_context; use crate::context::parser_with_context; use crate::context::RefContext; use crate::error::CustomError; @@ -107,7 +108,7 @@ pub(crate) fn affiliated_keyword<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Keyword<'s>> { - filtered_keyword(parser_with_context!(affiliated_key)(context))(input) + filtered_keyword(bind_context!(affiliated_key, context))(input) } #[cfg_attr( From 50d2831081b3444590ea12dc1fbbd1a1c824e078 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 11:30:23 -0400 Subject: [PATCH 03/11] Cleanup. --- src/parser/keyword.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index 0424f942..1374ffe4 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -22,6 +22,7 @@ use super::org_source::BracketDepth; use super::org_source::OrgSource; use super::util::get_consumed; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; +use super::util::org_line_ending; use crate::context::bind_context; use crate::context::parser_with_context; use crate::context::RefContext; @@ -50,9 +51,7 @@ fn _filtered_keyword<'s, F: Fn(OrgSource<'s>) -> Res, OrgSource<'s // TODO: When key is a member of org-element-parsed-keywords, value can contain the standard set objects, excluding footnote references. let (remaining, (consumed_input, (_, _, parsed_key, _))) = consumed(tuple((space0, tag("#+"), key_parser, tag(":"))))(input)?; - if let Ok((remaining, _)) = - tuple((space0::<_, CustomError>, alt((line_ending, eof))))(remaining) - { + if let Ok((remaining, _)) = tuple((space0::<_, CustomError>, org_line_ending))(remaining) { return Ok(( remaining, Keyword { @@ -64,11 +63,9 @@ fn _filtered_keyword<'s, F: Fn(OrgSource<'s>) -> Res, OrgSource<'s )); } let (remaining, _ws) = space0(remaining)?; - let (remaining, parsed_value) = recognize(many_till( - anychar, - peek(tuple((space0, alt((line_ending, eof))))), - ))(remaining)?; - let (remaining, _ws) = tuple((space0, alt((line_ending, eof))))(remaining)?; + let (remaining, parsed_value) = + recognize(many_till(anychar, peek(tuple((space0, org_line_ending)))))(remaining)?; + let (remaining, _ws) = tuple((space0, org_line_ending))(remaining)?; Ok(( remaining, Keyword { From f65d0bb82dc93511efb2c7a615e00c66a55c2682 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 11:33:26 -0400 Subject: [PATCH 04/11] Remove redundant call to space0. --- src/parser/keyword.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index 1374ffe4..2cf29f0f 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -4,11 +4,9 @@ use nom::bytes::complete::tag; use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while1; use nom::character::complete::anychar; -use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; use nom::combinator::consumed; -use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; use nom::combinator::peek; @@ -24,7 +22,6 @@ use super::util::get_consumed; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; use super::util::org_line_ending; use crate::context::bind_context; -use crate::context::parser_with_context; use crate::context::RefContext; use crate::error::CustomError; use crate::error::Res; @@ -51,7 +48,8 @@ fn _filtered_keyword<'s, F: Fn(OrgSource<'s>) -> Res, OrgSource<'s // TODO: When key is a member of org-element-parsed-keywords, value can contain the standard set objects, excluding footnote references. let (remaining, (consumed_input, (_, _, parsed_key, _))) = consumed(tuple((space0, tag("#+"), key_parser, tag(":"))))(input)?; - if let Ok((remaining, _)) = tuple((space0::<_, CustomError>, org_line_ending))(remaining) { + let (remaining, _ws) = space0(remaining)?; + if let Ok((remaining, _)) = org_line_ending(remaining) { return Ok(( remaining, Keyword { @@ -62,7 +60,6 @@ fn _filtered_keyword<'s, F: Fn(OrgSource<'s>) -> Res, OrgSource<'s }, )); } - let (remaining, _ws) = space0(remaining)?; let (remaining, parsed_value) = recognize(many_till(anychar, peek(tuple((space0, org_line_ending)))))(remaining)?; let (remaining, _ws) = tuple((space0, org_line_ending))(remaining)?; From 05c64f53b1903f40c1b2656fc5471d683356fb4e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 11:40:11 -0400 Subject: [PATCH 05/11] Remove boxed error from CustomError. --- src/error/error.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/error/error.rs b/src/error/error.rs index 579518a6..9b7d4a06 100644 --- a/src/error/error.rs +++ b/src/error/error.rs @@ -10,7 +10,6 @@ pub enum CustomError { Text(String), Static(&'static str), IO(std::io::Error), - BoxedError(Box), Parser(ErrorKind), } @@ -36,9 +35,3 @@ impl From<&'static str> for CustomError { CustomError::Static(value) } } - -impl From> for CustomError { - fn from(value: Box) -> Self { - CustomError::BoxedError(value) - } -} From d20b4a410b9a72cf89336e653c655ba04382616f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 11:56:36 -0400 Subject: [PATCH 06/11] Remove pointless map_err calls. --- src/error/error.rs | 6 ++++++ src/parser/document.rs | 15 ++------------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/error/error.rs b/src/error/error.rs index 9b7d4a06..65f52ef0 100644 --- a/src/error/error.rs +++ b/src/error/error.rs @@ -35,3 +35,9 @@ impl From<&'static str> for CustomError { CustomError::Static(value) } } + +impl From for CustomError { + fn from(value: String) -> Self { + CustomError::Text(value) + } +} diff --git a/src/parser/document.rs b/src/parser/document.rs index 898b42e5..2117b920 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -129,23 +129,12 @@ fn document_org_source<'b, 'g, 'r, 's>( }) .collect::, _>>()?; for setup_file in setup_files.iter().map(String::as_str) { - let (_, setup_file_settings) = - scan_for_in_buffer_settings(setup_file.into()).map_err(|err| { - eprintln!("{}", err); - nom::Err::Error(CustomError::Static( - "TODO: make this take an owned string so I can dump err.to_string() into it.", - )) - })?; + let (_, setup_file_settings) = scan_for_in_buffer_settings(setup_file.into())?; final_settings.extend(setup_file_settings); } final_settings.extend(document_settings); let new_settings = apply_in_buffer_settings(final_settings, context.get_global_settings()) - .map_err(|err| { - eprintln!("{}", err); - nom::Err::Error(CustomError::Static( - "TODO: make this take an owned string so I can dump err.to_string() into it.", - )) - })?; + .map_err(|err| nom::Err::Error(CustomError::from(err)))?; let new_context = context.with_global_settings(&new_settings); let context = &new_context; From 6139ea328d66cfa7d1a93fe52e1c11859e2d7967 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 12:22:52 -0400 Subject: [PATCH 07/11] Unify some more error handling. --- src/parser/document.rs | 8 +++----- src/parser/in_buffer_settings.rs | 24 +++++++++++++++--------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 2117b920..be995477 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -134,7 +134,7 @@ fn document_org_source<'b, 'g, 'r, 's>( } final_settings.extend(document_settings); let new_settings = apply_in_buffer_settings(final_settings, context.get_global_settings()) - .map_err(|err| nom::Err::Error(CustomError::from(err)))?; + .map_err(nom::Err::Error)?; let new_context = context.with_global_settings(&new_settings); let context = &new_context; @@ -159,15 +159,13 @@ fn document_org_source<'b, 'g, 'r, 's>( let parser_context = context.with_global_settings(&new_global_settings); let (remaining, mut document) = _document(&parser_context, input) .map(|(rem, out)| (Into::<&str>::into(rem), out))?; - apply_post_parse_in_buffer_settings(&mut document) - .map_err(|err| nom::Err::::Failure(err.into()))?; + apply_post_parse_in_buffer_settings(&mut document); return Ok((remaining.into(), document)); } } // Find final in-buffer settings that do not impact parsing - apply_post_parse_in_buffer_settings(&mut document) - .map_err(|err| nom::Err::::Failure(err.into()))?; + apply_post_parse_in_buffer_settings(&mut document); Ok((remaining.into(), document)) } diff --git a/src/parser/in_buffer_settings.rs b/src/parser/in_buffer_settings.rs index b746496f..7ab27a7b 100644 --- a/src/parser/in_buffer_settings.rs +++ b/src/parser/in_buffer_settings.rs @@ -88,7 +88,7 @@ fn in_buffer_settings_key<'s>(input: OrgSource<'s>) -> Res, OrgSou pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( keywords: Vec>, original_settings: &'g GlobalSettings<'g, 's>, -) -> Result, String> { +) -> Result, CustomError> { let mut new_settings = original_settings.clone(); // Todo Keywords @@ -98,7 +98,11 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( || kw.key.eq_ignore_ascii_case("typ_todo") }) { let (_, (in_progress_words, complete_words)) = - todo_keywords(kw.value).map_err(|err| err.to_string())?; + todo_keywords(kw.value).map_err(|err| match err { + nom::Err::Incomplete(_) => CustomError::Text(err.to_string()), + nom::Err::Error(e) => e, + nom::Err::Failure(e) => e, + })?; new_settings .in_progress_todo_keywords .extend(in_progress_words.into_iter().map(str::to_string)); @@ -112,9 +116,14 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( .iter() .filter(|kw| kw.key.eq_ignore_ascii_case("startup")) { - let (_remaining, settings) = - separated_list0(space1::<&str, nom::error::Error<_>>, is_not(" \t"))(kw.value) - .map_err(|err: nom::Err<_>| err.to_string())?; + let (_remaining, settings) = separated_list0(space1::<&str, CustomError>, is_not(" \t"))( + kw.value, + ) + .map_err(|err: nom::Err<_>| match err { + nom::Err::Incomplete(_) => CustomError::Text(err.to_string()), + nom::Err::Error(e) => e, + nom::Err::Failure(e) => e, + })?; if settings.contains(&"odd") { new_settings.odd_levels_only = HeadlineLevelFilter::Odd; } @@ -139,9 +148,7 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( /// Apply in-buffer settings that do not impact parsing and therefore can be applied after parsing. #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub(crate) fn apply_post_parse_in_buffer_settings<'g, 's, 'sf>( - document: &mut Document<'s>, -) -> Result<(), &'static str> { +pub(crate) fn apply_post_parse_in_buffer_settings<'g, 's, 'sf>(document: &mut Document<'s>) { document.category = Into::::into(&*document) .into_iter() .filter_map(|ast_node| { @@ -154,7 +161,6 @@ pub(crate) fn apply_post_parse_in_buffer_settings<'g, 's, 'sf>( }) .last() .map(|kw| kw.value.to_owned()); - Ok(()) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] From 17db05c2c77551658e90004e605d8bae2de0e307 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 12:42:34 -0400 Subject: [PATCH 08/11] Unify more error handling. --- src/parser/headline.rs | 17 ++++++++--------- src/parser/in_buffer_settings.rs | 6 +++++- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/parser/headline.rs b/src/parser/headline.rs index 7b4ffc40..aca0fa3f 100644 --- a/src/parser/headline.rs +++ b/src/parser/headline.rs @@ -18,12 +18,13 @@ use nom::sequence::tuple; use super::org_source::OrgSource; use super::section::section; +use super::util::exit_matcher_parser; use super::util::get_consumed; use super::util::org_line_ending; use super::util::org_space; use super::util::org_space_or_line_ending; use super::util::start_of_line; -use crate::context::parser_with_context; +use crate::context::bind_context; use crate::context::ContextElement; use crate::context::ExitClass; use crate::context::ExitMatcherNode; @@ -61,10 +62,10 @@ fn _heading<'b, 'g, 'r, 's>( let mut scheduled = None; let mut deadline = None; let mut closed = None; - not(|i| context.check_exit_matcher(i))(input)?; + not(bind_context!(exit_matcher_parser, context))(input)?; let (remaining, pre_headline) = headline(context, input, parent_star_count)?; - let section_matcher = parser_with_context!(section)(context); - let heading_matcher = parser_with_context!(heading(pre_headline.star_count))(context); + let section_matcher = bind_context!(section, context); + let heading_matcher = bind_context!(heading(pre_headline.star_count), context); let (remaining, maybe_section) = opt(map(section_matcher, DocumentElement::Section))(remaining)?; let (remaining, _ws) = opt(tuple((start_of_line, many0(blank_line))))(remaining)?; @@ -154,7 +155,7 @@ fn headline<'b, 'g, 'r, 's>( let (remaining, (_, (headline_level, star_count, _), _)) = tuple(( start_of_line, verify( - parser_with_context!(headline_level)(&parser_context), + bind_context!(headline_level, &parser_context), |(_, count, _)| *count > parent_star_count, ), peek(org_space), @@ -162,7 +163,7 @@ fn headline<'b, 'g, 'r, 's>( let (remaining, maybe_todo_keyword) = opt(tuple(( space1, - parser_with_context!(heading_keyword)(&parser_context), + bind_context!(heading_keyword, &parser_context), peek(org_space_or_line_ending), )))(remaining)?; @@ -176,9 +177,7 @@ fn headline<'b, 'g, 'r, 's>( let (remaining, maybe_title) = opt(tuple(( space1, - consumed(many1(parser_with_context!(standard_set_object)( - &parser_context, - ))), + consumed(many1(bind_context!(standard_set_object, &parser_context))), )))(remaining)?; let (remaining, maybe_tags) = opt(tuple((space0, tags)))(remaining)?; diff --git a/src/parser/in_buffer_settings.rs b/src/parser/in_buffer_settings.rs index 7ab27a7b..e6719403 100644 --- a/src/parser/in_buffer_settings.rs +++ b/src/parser/in_buffer_settings.rs @@ -137,7 +137,11 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( .iter() .filter(|kw| kw.key.eq_ignore_ascii_case("link")) { - let (_, (link_key, link_value)) = link_template(kw.value).map_err(|e| e.to_string())?; + let (_, (link_key, link_value)) = link_template(kw.value).map_err(|err| match err { + nom::Err::Incomplete(_) => CustomError::Text(err.to_string()), + nom::Err::Error(e) => e, + nom::Err::Failure(e) => e, + })?; new_settings .link_templates .insert(link_key.to_owned(), link_value.to_owned()); From a2f53361eb25b51a4e17ac79370d4e964b1288e0 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 13:32:01 -0400 Subject: [PATCH 09/11] Record element start events and report them when the event_count feature is enabled. --- Cargo.toml | 1 + src/event_count/database.rs | 42 +++++++++++++++++++++++++++++++++++ src/event_count/event_type.rs | 7 ++++++ src/event_count/mod.rs | 6 +++++ src/lib.rs | 2 ++ src/main.rs | 7 +++++- src/parser/element_parser.rs | 6 +++++ 7 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 src/event_count/database.rs create mode 100644 src/event_count/event_type.rs create mode 100644 src/event_count/mod.rs diff --git a/Cargo.toml b/Cargo.toml index fdda8d74..6f0573e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,6 +59,7 @@ default = [] compare = ["tokio/process", "tokio/macros"] foreign_document_test = ["compare", "dep:futures", "tokio/sync", "dep:walkdir", "tokio/process"] tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"] +event_count = [] # Optimized build for any sort of release. [profile.release-lto] diff --git a/src/event_count/database.rs b/src/event_count/database.rs new file mode 100644 index 00000000..e5d04869 --- /dev/null +++ b/src/event_count/database.rs @@ -0,0 +1,42 @@ +use std::collections::HashMap; +use std::sync::Mutex; + +use super::EventType; +use crate::parser::OrgSource; + +#[derive(Debug, Eq, Hash, PartialEq)] +struct EventKey { + event_type: EventType, + byte_offset: usize, +} + +pub(crate) type EventCount = usize; + +static GLOBAL_DATA: Mutex>> = Mutex::new(None); + +pub(crate) fn record_event(event_type: EventType, input: OrgSource<'_>) { + let mut db = GLOBAL_DATA.lock().unwrap(); + let db = db.get_or_insert_with(HashMap::new); + let key = EventKey { + event_type, + byte_offset: input.get_byte_offset(), + }; + *db.entry(key).or_insert(0) += 1; +} + +pub fn report(original_document: &str) { + let mut db = GLOBAL_DATA.lock().unwrap(); + let db = db.get_or_insert_with(HashMap::new); + let mut results: Vec<_> = db.iter().map(|(k, v)| (k, v)).collect(); + results.sort_by_key(|(_k, v)| *v); + // This would put the most common at the top, but that is a pain when there is already a lot of output from the parser. + // results.sort_by(|(_ak, av), (_bk, bv)| bv.cmp(av)); + for (key, count) in results { + println!( + "{:?} {} character offset: {}", + key.event_type, + count, + original_document[..key.byte_offset].chars().count() + 1 + ) + } +} diff --git a/src/event_count/event_type.rs b/src/event_count/event_type.rs new file mode 100644 index 00000000..82cef50e --- /dev/null +++ b/src/event_count/event_type.rs @@ -0,0 +1,7 @@ +#[derive(Debug, Eq, Hash, PartialEq)] +pub(crate) enum EventType { + ElementStart, + ElementFinish, + ObjectStart, + ObjectFinish, +} diff --git a/src/event_count/mod.rs b/src/event_count/mod.rs new file mode 100644 index 00000000..8987f8ba --- /dev/null +++ b/src/event_count/mod.rs @@ -0,0 +1,6 @@ +mod database; +mod event_type; + +pub(crate) use database::record_event; +pub use database::report; +pub(crate) use event_type::EventType; diff --git a/src/lib.rs b/src/lib.rs index bd816145..6795061f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,8 @@ pub mod compare; mod context; mod error; +#[cfg(feature = "event_count")] +pub mod event_count; mod iter; pub mod parser; pub mod types; diff --git a/src/main.rs b/src/main.rs index b034b183..ecf67c1a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,8 +54,11 @@ fn read_stdin_to_string() -> Result> { } fn run_anonymous_parse>(org_contents: P) -> Result<(), Box> { - let rust_parsed = parse(org_contents.as_ref())?; + let org_contents = org_contents.as_ref(); + let rust_parsed = parse(org_contents)?; println!("{:#?}", rust_parsed); + #[cfg(feature = "event_count")] + organic::event_count::report(org_contents); Ok(()) } @@ -75,5 +78,7 @@ fn run_parse_on_file>(org_path: P) -> Result<(), Box( input: OrgSource<'s>, can_be_paragraph: bool, ) -> Res, Element<'s>> { + #[cfg(feature = "event_count")] + record_event(EventType::ElementStart, input); let (post_affiliated_keywords_input, affiliated_keywords) = affiliated_keywords(context, input)?; From 0208020e3e6db801b17851f2f06c19561d525163 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 13:35:40 -0400 Subject: [PATCH 10/11] Also print byte offset. --- src/event_count/database.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/event_count/database.rs b/src/event_count/database.rs index e5d04869..1b763e0d 100644 --- a/src/event_count/database.rs +++ b/src/event_count/database.rs @@ -33,10 +33,11 @@ pub fn report(original_document: &str) { // results.sort_by(|(_ak, av), (_bk, bv)| bv.cmp(av)); for (key, count) in results { println!( - "{:?} {} character offset: {}", + "{:?} {} character offset: {} byte offset: {}", key.event_type, count, - original_document[..key.byte_offset].chars().count() + 1 + original_document[..key.byte_offset].chars().count() + 1, + key.byte_offset ) } } From 01464057ad09eb9e116122c23016a3e5ce6bcc1f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 17 Oct 2023 13:43:33 -0400 Subject: [PATCH 11/11] Remove unused event types. --- src/event_count/event_type.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/event_count/event_type.rs b/src/event_count/event_type.rs index 82cef50e..02a66309 100644 --- a/src/event_count/event_type.rs +++ b/src/event_count/event_type.rs @@ -1,7 +1,4 @@ #[derive(Debug, Eq, Hash, PartialEq)] pub(crate) enum EventType { ElementStart, - ElementFinish, - ObjectStart, - ObjectFinish, }