diff --git a/Cargo.toml b/Cargo.toml index fdda8d74..6f0573e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,6 +59,7 @@ default = [] compare = ["tokio/process", "tokio/macros"] foreign_document_test = ["compare", "dep:futures", "tokio/sync", "dep:walkdir", "tokio/process"] tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"] +event_count = [] # Optimized build for any sort of release. [profile.release-lto] diff --git a/scripts/perf.bash b/scripts/perf.bash index aa7ae329..fab93e72 100755 --- a/scripts/perf.bash +++ b/scripts/perf.bash @@ -14,7 +14,7 @@ function main { additional_flags+=(--profile "$PROFILE") fi (cd "$DIR/../" && cargo build --no-default-features "${additional_flags[@]}") - perf record --freq=2000 --call-graph dwarf --output="$DIR/../perf.data" "$DIR/../target/${PROFILE}/parse" "${@}" + perf record --freq=70000 --call-graph dwarf --output="$DIR/../perf.data" "$DIR/../target/${PROFILE}/parse" "${@}" # Convert to a format firefox will read # flags to consider --show-info diff --git a/src/error/error.rs b/src/error/error.rs index 579518a6..65f52ef0 100644 --- a/src/error/error.rs +++ b/src/error/error.rs @@ -10,7 +10,6 @@ pub enum CustomError { Text(String), Static(&'static str), IO(std::io::Error), - BoxedError(Box), Parser(ErrorKind), } @@ -37,8 +36,8 @@ impl From<&'static str> for CustomError { } } -impl From> for CustomError { - fn from(value: Box) -> Self { - CustomError::BoxedError(value) +impl From for CustomError { + fn from(value: String) -> Self { + CustomError::Text(value) } } diff --git a/src/event_count/database.rs b/src/event_count/database.rs new file mode 100644 index 00000000..1b763e0d --- /dev/null +++ b/src/event_count/database.rs @@ -0,0 +1,43 @@ +use std::collections::HashMap; +use std::sync::Mutex; + +use super::EventType; +use crate::parser::OrgSource; + +#[derive(Debug, Eq, Hash, PartialEq)] +struct EventKey { + event_type: EventType, + byte_offset: usize, +} + +pub(crate) type EventCount = usize; + +static GLOBAL_DATA: Mutex>> = Mutex::new(None); + +pub(crate) fn record_event(event_type: EventType, input: OrgSource<'_>) { + let mut db = GLOBAL_DATA.lock().unwrap(); + let db = db.get_or_insert_with(HashMap::new); + let key = EventKey { + event_type, + byte_offset: input.get_byte_offset(), + }; + *db.entry(key).or_insert(0) += 1; +} + +pub fn report(original_document: &str) { + let mut db = GLOBAL_DATA.lock().unwrap(); + let db = db.get_or_insert_with(HashMap::new); + let mut results: Vec<_> = db.iter().map(|(k, v)| (k, v)).collect(); + results.sort_by_key(|(_k, v)| *v); + // This would put the most common at the top, but that is a pain when there is already a lot of output from the parser. + // results.sort_by(|(_ak, av), (_bk, bv)| bv.cmp(av)); + for (key, count) in results { + println!( + "{:?} {} character offset: {} byte offset: {}", + key.event_type, + count, + original_document[..key.byte_offset].chars().count() + 1, + key.byte_offset + ) + } +} diff --git a/src/event_count/event_type.rs b/src/event_count/event_type.rs new file mode 100644 index 00000000..02a66309 --- /dev/null +++ b/src/event_count/event_type.rs @@ -0,0 +1,4 @@ +#[derive(Debug, Eq, Hash, PartialEq)] +pub(crate) enum EventType { + ElementStart, +} diff --git a/src/event_count/mod.rs b/src/event_count/mod.rs new file mode 100644 index 00000000..8987f8ba --- /dev/null +++ b/src/event_count/mod.rs @@ -0,0 +1,6 @@ +mod database; +mod event_type; + +pub(crate) use database::record_event; +pub use database::report; +pub(crate) use event_type::EventType; diff --git a/src/lib.rs b/src/lib.rs index bd816145..6795061f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,8 @@ pub mod compare; mod context; mod error; +#[cfg(feature = "event_count")] +pub mod event_count; mod iter; pub mod parser; pub mod types; diff --git a/src/main.rs b/src/main.rs index b034b183..ecf67c1a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,8 +54,11 @@ fn read_stdin_to_string() -> Result> { } fn run_anonymous_parse>(org_contents: P) -> Result<(), Box> { - let rust_parsed = parse(org_contents.as_ref())?; + let org_contents = org_contents.as_ref(); + let rust_parsed = parse(org_contents)?; println!("{:#?}", rust_parsed); + #[cfg(feature = "event_count")] + organic::event_count::report(org_contents); Ok(()) } @@ -75,5 +78,7 @@ fn run_parse_on_file>(org_path: P) -> Result<(), Box( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Vec>> { + let mut ret = Vec::new(); + let mut remaining = input; + + loop { + let result = affiliated_keyword(context, remaining); + match result { + Ok((remain, kw)) => { + remaining = remain; + ret.push(kw); + } + Err(_) => { + break; + } + } + } + + Ok((remaining, ret)) +} + pub(crate) fn parse_affiliated_keywords<'g, 's, AK>( global_settings: &'g GlobalSettings<'g, 's>, input: AK, diff --git a/src/parser/document.rs b/src/parser/document.rs index a2122d31..be995477 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -129,23 +129,12 @@ fn document_org_source<'b, 'g, 'r, 's>( }) .collect::, _>>()?; for setup_file in setup_files.iter().map(String::as_str) { - let (_, setup_file_settings) = - scan_for_in_buffer_settings(setup_file.into()).map_err(|err| { - eprintln!("{}", err); - nom::Err::Error(CustomError::Static( - "TODO: make this take an owned string so I can dump err.to_string() into it.", - )) - })?; + let (_, setup_file_settings) = scan_for_in_buffer_settings(setup_file.into())?; final_settings.extend(setup_file_settings); } final_settings.extend(document_settings); let new_settings = apply_in_buffer_settings(final_settings, context.get_global_settings()) - .map_err(|err| { - eprintln!("{}", err); - nom::Err::Error(CustomError::Static( - "TODO: make this take an owned string so I can dump err.to_string() into it.", - )) - })?; + .map_err(nom::Err::Error)?; let new_context = context.with_global_settings(&new_settings); let context = &new_context; @@ -170,15 +159,13 @@ fn document_org_source<'b, 'g, 'r, 's>( let parser_context = context.with_global_settings(&new_global_settings); let (remaining, mut document) = _document(&parser_context, input) .map(|(rem, out)| (Into::<&str>::into(rem), out))?; - apply_post_parse_in_buffer_settings(&mut document) - .map_err(|err| nom::Err::::Failure(err.into()))?; + apply_post_parse_in_buffer_settings(&mut document); return Ok((remaining.into(), document)); } } // Find final in-buffer settings that do not impact parsing - apply_post_parse_in_buffer_settings(&mut document) - .map_err(|err| nom::Err::::Failure(err.into()))?; + apply_post_parse_in_buffer_settings(&mut document); Ok((remaining.into(), document)) } @@ -208,3 +195,17 @@ fn _document<'b, 'g, 'r, 's>( }, )) } + +#[cfg(test)] +mod tests { + use test::Bencher; + + use super::*; + + #[bench] + fn bench_full_document(b: &mut Bencher) { + let input = include_str!("../../org_mode_samples/element_container_priority/README.org"); + + b.iter(|| assert!(parse(input).is_ok())); + } +} diff --git a/src/parser/element_parser.rs b/src/parser/element_parser.rs index 5cf47290..92988dba 100644 --- a/src/parser/element_parser.rs +++ b/src/parser/element_parser.rs @@ -1,5 +1,3 @@ -use nom::multi::many0; - use super::babel_call::babel_call; use super::clock::clock; use super::comment::comment; @@ -14,7 +12,6 @@ use super::footnote_definition::detect_footnote_definition; use super::footnote_definition::footnote_definition; use super::greater_block::greater_block; use super::horizontal_rule::horizontal_rule; -use super::keyword::affiliated_keyword; use super::keyword::keyword; use super::latex_environment::latex_environment; use super::lesser_block::comment_block; @@ -27,10 +24,14 @@ use super::paragraph::paragraph; use super::plain_list::detect_plain_list; use super::plain_list::plain_list; use super::table::detect_table; -use crate::context::parser_with_context; use crate::context::RefContext; use crate::error::CustomError; use crate::error::Res; +#[cfg(feature = "event_count")] +use crate::event_count::record_event; +#[cfg(feature = "event_count")] +use crate::event_count::EventType; +use crate::parser::affiliated_keyword::affiliated_keywords; use crate::parser::macros::ak_element; use crate::parser::macros::element; use crate::parser::table::org_mode_table; @@ -54,8 +55,10 @@ fn _element<'b, 'g, 'r, 's>( input: OrgSource<'s>, can_be_paragraph: bool, ) -> Res, Element<'s>> { + #[cfg(feature = "event_count")] + record_event(EventType::ElementStart, input); let (post_affiliated_keywords_input, affiliated_keywords) = - many0(parser_with_context!(affiliated_keyword)(context))(input)?; + affiliated_keywords(context, input)?; let mut affiliated_keywords = affiliated_keywords.into_iter(); @@ -270,7 +273,7 @@ fn _detect_element<'b, 'g, 'r, 's>( can_be_paragraph: bool, ) -> Res, ()> { let (post_affiliated_keywords_input, affiliated_keywords) = - many0(parser_with_context!(affiliated_keyword)(context))(input)?; + affiliated_keywords(context, input)?; let mut affiliated_keywords = affiliated_keywords.into_iter(); diff --git a/src/parser/headline.rs b/src/parser/headline.rs index 7b4ffc40..aca0fa3f 100644 --- a/src/parser/headline.rs +++ b/src/parser/headline.rs @@ -18,12 +18,13 @@ use nom::sequence::tuple; use super::org_source::OrgSource; use super::section::section; +use super::util::exit_matcher_parser; use super::util::get_consumed; use super::util::org_line_ending; use super::util::org_space; use super::util::org_space_or_line_ending; use super::util::start_of_line; -use crate::context::parser_with_context; +use crate::context::bind_context; use crate::context::ContextElement; use crate::context::ExitClass; use crate::context::ExitMatcherNode; @@ -61,10 +62,10 @@ fn _heading<'b, 'g, 'r, 's>( let mut scheduled = None; let mut deadline = None; let mut closed = None; - not(|i| context.check_exit_matcher(i))(input)?; + not(bind_context!(exit_matcher_parser, context))(input)?; let (remaining, pre_headline) = headline(context, input, parent_star_count)?; - let section_matcher = parser_with_context!(section)(context); - let heading_matcher = parser_with_context!(heading(pre_headline.star_count))(context); + let section_matcher = bind_context!(section, context); + let heading_matcher = bind_context!(heading(pre_headline.star_count), context); let (remaining, maybe_section) = opt(map(section_matcher, DocumentElement::Section))(remaining)?; let (remaining, _ws) = opt(tuple((start_of_line, many0(blank_line))))(remaining)?; @@ -154,7 +155,7 @@ fn headline<'b, 'g, 'r, 's>( let (remaining, (_, (headline_level, star_count, _), _)) = tuple(( start_of_line, verify( - parser_with_context!(headline_level)(&parser_context), + bind_context!(headline_level, &parser_context), |(_, count, _)| *count > parent_star_count, ), peek(org_space), @@ -162,7 +163,7 @@ fn headline<'b, 'g, 'r, 's>( let (remaining, maybe_todo_keyword) = opt(tuple(( space1, - parser_with_context!(heading_keyword)(&parser_context), + bind_context!(heading_keyword, &parser_context), peek(org_space_or_line_ending), )))(remaining)?; @@ -176,9 +177,7 @@ fn headline<'b, 'g, 'r, 's>( let (remaining, maybe_title) = opt(tuple(( space1, - consumed(many1(parser_with_context!(standard_set_object)( - &parser_context, - ))), + consumed(many1(bind_context!(standard_set_object, &parser_context))), )))(remaining)?; let (remaining, maybe_tags) = opt(tuple((space0, tags)))(remaining)?; diff --git a/src/parser/in_buffer_settings.rs b/src/parser/in_buffer_settings.rs index b746496f..e6719403 100644 --- a/src/parser/in_buffer_settings.rs +++ b/src/parser/in_buffer_settings.rs @@ -88,7 +88,7 @@ fn in_buffer_settings_key<'s>(input: OrgSource<'s>) -> Res, OrgSou pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( keywords: Vec>, original_settings: &'g GlobalSettings<'g, 's>, -) -> Result, String> { +) -> Result, CustomError> { let mut new_settings = original_settings.clone(); // Todo Keywords @@ -98,7 +98,11 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( || kw.key.eq_ignore_ascii_case("typ_todo") }) { let (_, (in_progress_words, complete_words)) = - todo_keywords(kw.value).map_err(|err| err.to_string())?; + todo_keywords(kw.value).map_err(|err| match err { + nom::Err::Incomplete(_) => CustomError::Text(err.to_string()), + nom::Err::Error(e) => e, + nom::Err::Failure(e) => e, + })?; new_settings .in_progress_todo_keywords .extend(in_progress_words.into_iter().map(str::to_string)); @@ -112,9 +116,14 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( .iter() .filter(|kw| kw.key.eq_ignore_ascii_case("startup")) { - let (_remaining, settings) = - separated_list0(space1::<&str, nom::error::Error<_>>, is_not(" \t"))(kw.value) - .map_err(|err: nom::Err<_>| err.to_string())?; + let (_remaining, settings) = separated_list0(space1::<&str, CustomError>, is_not(" \t"))( + kw.value, + ) + .map_err(|err: nom::Err<_>| match err { + nom::Err::Incomplete(_) => CustomError::Text(err.to_string()), + nom::Err::Error(e) => e, + nom::Err::Failure(e) => e, + })?; if settings.contains(&"odd") { new_settings.odd_levels_only = HeadlineLevelFilter::Odd; } @@ -128,7 +137,11 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( .iter() .filter(|kw| kw.key.eq_ignore_ascii_case("link")) { - let (_, (link_key, link_value)) = link_template(kw.value).map_err(|e| e.to_string())?; + let (_, (link_key, link_value)) = link_template(kw.value).map_err(|err| match err { + nom::Err::Incomplete(_) => CustomError::Text(err.to_string()), + nom::Err::Error(e) => e, + nom::Err::Failure(e) => e, + })?; new_settings .link_templates .insert(link_key.to_owned(), link_value.to_owned()); @@ -139,9 +152,7 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( /// Apply in-buffer settings that do not impact parsing and therefore can be applied after parsing. #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub(crate) fn apply_post_parse_in_buffer_settings<'g, 's, 'sf>( - document: &mut Document<'s>, -) -> Result<(), &'static str> { +pub(crate) fn apply_post_parse_in_buffer_settings<'g, 's, 'sf>(document: &mut Document<'s>) { document.category = Into::::into(&*document) .into_iter() .filter_map(|ast_node| { @@ -154,7 +165,6 @@ pub(crate) fn apply_post_parse_in_buffer_settings<'g, 's, 'sf>( }) .last() .map(|kw| kw.value.to_owned()); - Ok(()) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index d09ed8d7..2cf29f0f 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -4,11 +4,9 @@ use nom::bytes::complete::tag; use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while1; use nom::character::complete::anychar; -use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; use nom::combinator::consumed; -use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; use nom::combinator::peek; @@ -22,7 +20,8 @@ use super::org_source::BracketDepth; use super::org_source::OrgSource; use super::util::get_consumed; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; -use crate::context::parser_with_context; +use super::util::org_line_ending; +use crate::context::bind_context; use crate::context::RefContext; use crate::error::CustomError; use crate::error::Res; @@ -49,9 +48,8 @@ fn _filtered_keyword<'s, F: Fn(OrgSource<'s>) -> Res, OrgSource<'s // TODO: When key is a member of org-element-parsed-keywords, value can contain the standard set objects, excluding footnote references. let (remaining, (consumed_input, (_, _, parsed_key, _))) = consumed(tuple((space0, tag("#+"), key_parser, tag(":"))))(input)?; - if let Ok((remaining, _)) = - tuple((space0::<_, CustomError>, alt((line_ending, eof))))(remaining) - { + let (remaining, _ws) = space0(remaining)?; + if let Ok((remaining, _)) = org_line_ending(remaining) { return Ok(( remaining, Keyword { @@ -62,12 +60,9 @@ fn _filtered_keyword<'s, F: Fn(OrgSource<'s>) -> Res, OrgSource<'s }, )); } - let (remaining, _ws) = space0(remaining)?; - let (remaining, parsed_value) = recognize(many_till( - anychar, - peek(tuple((space0, alt((line_ending, eof))))), - ))(remaining)?; - let (remaining, _ws) = tuple((space0, alt((line_ending, eof))))(remaining)?; + let (remaining, parsed_value) = + recognize(many_till(anychar, peek(tuple((space0, org_line_ending)))))(remaining)?; + let (remaining, _ws) = tuple((space0, org_line_ending))(remaining)?; Ok(( remaining, Keyword { @@ -107,7 +102,7 @@ pub(crate) fn affiliated_keyword<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Keyword<'s>> { - filtered_keyword(parser_with_context!(affiliated_key)(context))(input) + filtered_keyword(bind_context!(affiliated_key, context))(input) } #[cfg_attr(