Merge branch 'perf_improvement'
rustfmt Build rustfmt has succeeded Details
clippy Build clippy has succeeded Details
rust-build Build rust-build has succeeded Details
rust-foreign-document-test Build rust-foreign-document-test has failed Details
rust-test Build rust-test has succeeded Details

This commit is contained in:
Tom Alexander 2023-10-17 13:54:44 -04:00
commit 44f7412a5c
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
14 changed files with 160 additions and 61 deletions

View File

@ -59,6 +59,7 @@ default = []
compare = ["tokio/process", "tokio/macros"]
foreign_document_test = ["compare", "dep:futures", "tokio/sync", "dep:walkdir", "tokio/process"]
tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"]
event_count = []
# Optimized build for any sort of release.
[profile.release-lto]

View File

@ -14,7 +14,7 @@ function main {
additional_flags+=(--profile "$PROFILE")
fi
(cd "$DIR/../" && cargo build --no-default-features "${additional_flags[@]}")
perf record --freq=2000 --call-graph dwarf --output="$DIR/../perf.data" "$DIR/../target/${PROFILE}/parse" "${@}"
perf record --freq=70000 --call-graph dwarf --output="$DIR/../perf.data" "$DIR/../target/${PROFILE}/parse" "${@}"
# Convert to a format firefox will read
# flags to consider --show-info

View File

@ -10,7 +10,6 @@ pub enum CustomError {
Text(String),
Static(&'static str),
IO(std::io::Error),
BoxedError(Box<dyn std::error::Error>),
Parser(ErrorKind),
}
@ -37,8 +36,8 @@ impl From<&'static str> for CustomError {
}
}
impl From<Box<dyn std::error::Error>> for CustomError {
fn from(value: Box<dyn std::error::Error>) -> Self {
CustomError::BoxedError(value)
impl From<String> for CustomError {
fn from(value: String) -> Self {
CustomError::Text(value)
}
}

View File

@ -0,0 +1,43 @@
use std::collections::HashMap;
use std::sync::Mutex;
use super::EventType;
use crate::parser::OrgSource;
#[derive(Debug, Eq, Hash, PartialEq)]
struct EventKey {
event_type: EventType,
byte_offset: usize,
}
pub(crate) type EventCount = usize;
static GLOBAL_DATA: Mutex<Option<HashMap<EventKey, EventCount>>> = Mutex::new(None);
pub(crate) fn record_event(event_type: EventType, input: OrgSource<'_>) {
let mut db = GLOBAL_DATA.lock().unwrap();
let db = db.get_or_insert_with(HashMap::new);
let key = EventKey {
event_type,
byte_offset: input.get_byte_offset(),
};
*db.entry(key).or_insert(0) += 1;
}
pub fn report(original_document: &str) {
let mut db = GLOBAL_DATA.lock().unwrap();
let db = db.get_or_insert_with(HashMap::new);
let mut results: Vec<_> = db.iter().map(|(k, v)| (k, v)).collect();
results.sort_by_key(|(_k, v)| *v);
// This would put the most common at the top, but that is a pain when there is already a lot of output from the parser.
// results.sort_by(|(_ak, av), (_bk, bv)| bv.cmp(av));
for (key, count) in results {
println!(
"{:?} {} character offset: {} byte offset: {}",
key.event_type,
count,
original_document[..key.byte_offset].chars().count() + 1,
key.byte_offset
)
}
}

View File

@ -0,0 +1,4 @@
#[derive(Debug, Eq, Hash, PartialEq)]
pub(crate) enum EventType {
ElementStart,
}

6
src/event_count/mod.rs Normal file
View File

@ -0,0 +1,6 @@
mod database;
mod event_type;
pub(crate) use database::record_event;
pub use database::report;
pub(crate) use event_type::EventType;

View File

@ -13,6 +13,8 @@ pub mod compare;
mod context;
mod error;
#[cfg(feature = "event_count")]
pub mod event_count;
mod iter;
pub mod parser;
pub mod types;

View File

@ -54,8 +54,11 @@ fn read_stdin_to_string() -> Result<String, Box<dyn std::error::Error>> {
}
fn run_anonymous_parse<P: AsRef<str>>(org_contents: P) -> Result<(), Box<dyn std::error::Error>> {
let rust_parsed = parse(org_contents.as_ref())?;
let org_contents = org_contents.as_ref();
let rust_parsed = parse(org_contents)?;
println!("{:#?}", rust_parsed);
#[cfg(feature = "event_count")]
organic::event_count::report(org_contents);
Ok(())
}
@ -75,5 +78,7 @@ fn run_parse_on_file<P: AsRef<Path>>(org_path: P) -> Result<(), Box<dyn std::err
};
let rust_parsed = parse_with_settings(org_contents, &global_settings)?;
println!("{:#?}", rust_parsed);
#[cfg(feature = "event_count")]
organic::event_count::report(org_contents);
Ok(())
}

View File

@ -14,17 +14,48 @@ use nom::multi::many0;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::keyword::affiliated_keyword;
use super::object_parser::standard_set_object;
use super::util::confine_context;
use super::OrgSource;
use crate::context::bind_context;
use crate::context::Context;
use crate::context::ContextElement;
use crate::context::GlobalSettings;
use crate::context::List;
use crate::context::RefContext;
use crate::error::Res;
use crate::types::AffiliatedKeywordValue;
use crate::types::AffiliatedKeywords;
use crate::types::Keyword;
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(context))
)]
pub(crate) fn affiliated_keywords<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Vec<Keyword<'s>>> {
let mut ret = Vec::new();
let mut remaining = input;
loop {
let result = affiliated_keyword(context, remaining);
match result {
Ok((remain, kw)) => {
remaining = remain;
ret.push(kw);
}
Err(_) => {
break;
}
}
}
Ok((remaining, ret))
}
pub(crate) fn parse_affiliated_keywords<'g, 's, AK>(
global_settings: &'g GlobalSettings<'g, 's>,
input: AK,

View File

@ -129,23 +129,12 @@ fn document_org_source<'b, 'g, 'r, 's>(
})
.collect::<Result<Vec<_>, _>>()?;
for setup_file in setup_files.iter().map(String::as_str) {
let (_, setup_file_settings) =
scan_for_in_buffer_settings(setup_file.into()).map_err(|err| {
eprintln!("{}", err);
nom::Err::Error(CustomError::Static(
"TODO: make this take an owned string so I can dump err.to_string() into it.",
))
})?;
let (_, setup_file_settings) = scan_for_in_buffer_settings(setup_file.into())?;
final_settings.extend(setup_file_settings);
}
final_settings.extend(document_settings);
let new_settings = apply_in_buffer_settings(final_settings, context.get_global_settings())
.map_err(|err| {
eprintln!("{}", err);
nom::Err::Error(CustomError::Static(
"TODO: make this take an owned string so I can dump err.to_string() into it.",
))
})?;
.map_err(nom::Err::Error)?;
let new_context = context.with_global_settings(&new_settings);
let context = &new_context;
@ -170,15 +159,13 @@ fn document_org_source<'b, 'g, 'r, 's>(
let parser_context = context.with_global_settings(&new_global_settings);
let (remaining, mut document) = _document(&parser_context, input)
.map(|(rem, out)| (Into::<&str>::into(rem), out))?;
apply_post_parse_in_buffer_settings(&mut document)
.map_err(|err| nom::Err::<CustomError>::Failure(err.into()))?;
apply_post_parse_in_buffer_settings(&mut document);
return Ok((remaining.into(), document));
}
}
// Find final in-buffer settings that do not impact parsing
apply_post_parse_in_buffer_settings(&mut document)
.map_err(|err| nom::Err::<CustomError>::Failure(err.into()))?;
apply_post_parse_in_buffer_settings(&mut document);
Ok((remaining.into(), document))
}
@ -208,3 +195,17 @@ fn _document<'b, 'g, 'r, 's>(
},
))
}
#[cfg(test)]
mod tests {
use test::Bencher;
use super::*;
#[bench]
fn bench_full_document(b: &mut Bencher) {
let input = include_str!("../../org_mode_samples/element_container_priority/README.org");
b.iter(|| assert!(parse(input).is_ok()));
}
}

View File

@ -1,5 +1,3 @@
use nom::multi::many0;
use super::babel_call::babel_call;
use super::clock::clock;
use super::comment::comment;
@ -14,7 +12,6 @@ use super::footnote_definition::detect_footnote_definition;
use super::footnote_definition::footnote_definition;
use super::greater_block::greater_block;
use super::horizontal_rule::horizontal_rule;
use super::keyword::affiliated_keyword;
use super::keyword::keyword;
use super::latex_environment::latex_environment;
use super::lesser_block::comment_block;
@ -27,10 +24,14 @@ use super::paragraph::paragraph;
use super::plain_list::detect_plain_list;
use super::plain_list::plain_list;
use super::table::detect_table;
use crate::context::parser_with_context;
use crate::context::RefContext;
use crate::error::CustomError;
use crate::error::Res;
#[cfg(feature = "event_count")]
use crate::event_count::record_event;
#[cfg(feature = "event_count")]
use crate::event_count::EventType;
use crate::parser::affiliated_keyword::affiliated_keywords;
use crate::parser::macros::ak_element;
use crate::parser::macros::element;
use crate::parser::table::org_mode_table;
@ -54,8 +55,10 @@ fn _element<'b, 'g, 'r, 's>(
input: OrgSource<'s>,
can_be_paragraph: bool,
) -> Res<OrgSource<'s>, Element<'s>> {
#[cfg(feature = "event_count")]
record_event(EventType::ElementStart, input);
let (post_affiliated_keywords_input, affiliated_keywords) =
many0(parser_with_context!(affiliated_keyword)(context))(input)?;
affiliated_keywords(context, input)?;
let mut affiliated_keywords = affiliated_keywords.into_iter();
@ -270,7 +273,7 @@ fn _detect_element<'b, 'g, 'r, 's>(
can_be_paragraph: bool,
) -> Res<OrgSource<'s>, ()> {
let (post_affiliated_keywords_input, affiliated_keywords) =
many0(parser_with_context!(affiliated_keyword)(context))(input)?;
affiliated_keywords(context, input)?;
let mut affiliated_keywords = affiliated_keywords.into_iter();

View File

@ -18,12 +18,13 @@ use nom::sequence::tuple;
use super::org_source::OrgSource;
use super::section::section;
use super::util::exit_matcher_parser;
use super::util::get_consumed;
use super::util::org_line_ending;
use super::util::org_space;
use super::util::org_space_or_line_ending;
use super::util::start_of_line;
use crate::context::parser_with_context;
use crate::context::bind_context;
use crate::context::ContextElement;
use crate::context::ExitClass;
use crate::context::ExitMatcherNode;
@ -61,10 +62,10 @@ fn _heading<'b, 'g, 'r, 's>(
let mut scheduled = None;
let mut deadline = None;
let mut closed = None;
not(|i| context.check_exit_matcher(i))(input)?;
not(bind_context!(exit_matcher_parser, context))(input)?;
let (remaining, pre_headline) = headline(context, input, parent_star_count)?;
let section_matcher = parser_with_context!(section)(context);
let heading_matcher = parser_with_context!(heading(pre_headline.star_count))(context);
let section_matcher = bind_context!(section, context);
let heading_matcher = bind_context!(heading(pre_headline.star_count), context);
let (remaining, maybe_section) =
opt(map(section_matcher, DocumentElement::Section))(remaining)?;
let (remaining, _ws) = opt(tuple((start_of_line, many0(blank_line))))(remaining)?;
@ -154,7 +155,7 @@ fn headline<'b, 'g, 'r, 's>(
let (remaining, (_, (headline_level, star_count, _), _)) = tuple((
start_of_line,
verify(
parser_with_context!(headline_level)(&parser_context),
bind_context!(headline_level, &parser_context),
|(_, count, _)| *count > parent_star_count,
),
peek(org_space),
@ -162,7 +163,7 @@ fn headline<'b, 'g, 'r, 's>(
let (remaining, maybe_todo_keyword) = opt(tuple((
space1,
parser_with_context!(heading_keyword)(&parser_context),
bind_context!(heading_keyword, &parser_context),
peek(org_space_or_line_ending),
)))(remaining)?;
@ -176,9 +177,7 @@ fn headline<'b, 'g, 'r, 's>(
let (remaining, maybe_title) = opt(tuple((
space1,
consumed(many1(parser_with_context!(standard_set_object)(
&parser_context,
))),
consumed(many1(bind_context!(standard_set_object, &parser_context))),
)))(remaining)?;
let (remaining, maybe_tags) = opt(tuple((space0, tags)))(remaining)?;

View File

@ -88,7 +88,7 @@ fn in_buffer_settings_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSou
pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>(
keywords: Vec<Keyword<'sf>>,
original_settings: &'g GlobalSettings<'g, 's>,
) -> Result<GlobalSettings<'g, 's>, String> {
) -> Result<GlobalSettings<'g, 's>, CustomError> {
let mut new_settings = original_settings.clone();
// Todo Keywords
@ -98,7 +98,11 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>(
|| kw.key.eq_ignore_ascii_case("typ_todo")
}) {
let (_, (in_progress_words, complete_words)) =
todo_keywords(kw.value).map_err(|err| err.to_string())?;
todo_keywords(kw.value).map_err(|err| match err {
nom::Err::Incomplete(_) => CustomError::Text(err.to_string()),
nom::Err::Error(e) => e,
nom::Err::Failure(e) => e,
})?;
new_settings
.in_progress_todo_keywords
.extend(in_progress_words.into_iter().map(str::to_string));
@ -112,9 +116,14 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>(
.iter()
.filter(|kw| kw.key.eq_ignore_ascii_case("startup"))
{
let (_remaining, settings) =
separated_list0(space1::<&str, nom::error::Error<_>>, is_not(" \t"))(kw.value)
.map_err(|err: nom::Err<_>| err.to_string())?;
let (_remaining, settings) = separated_list0(space1::<&str, CustomError>, is_not(" \t"))(
kw.value,
)
.map_err(|err: nom::Err<_>| match err {
nom::Err::Incomplete(_) => CustomError::Text(err.to_string()),
nom::Err::Error(e) => e,
nom::Err::Failure(e) => e,
})?;
if settings.contains(&"odd") {
new_settings.odd_levels_only = HeadlineLevelFilter::Odd;
}
@ -128,7 +137,11 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>(
.iter()
.filter(|kw| kw.key.eq_ignore_ascii_case("link"))
{
let (_, (link_key, link_value)) = link_template(kw.value).map_err(|e| e.to_string())?;
let (_, (link_key, link_value)) = link_template(kw.value).map_err(|err| match err {
nom::Err::Incomplete(_) => CustomError::Text(err.to_string()),
nom::Err::Error(e) => e,
nom::Err::Failure(e) => e,
})?;
new_settings
.link_templates
.insert(link_key.to_owned(), link_value.to_owned());
@ -139,9 +152,7 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>(
/// Apply in-buffer settings that do not impact parsing and therefore can be applied after parsing.
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn apply_post_parse_in_buffer_settings<'g, 's, 'sf>(
document: &mut Document<'s>,
) -> Result<(), &'static str> {
pub(crate) fn apply_post_parse_in_buffer_settings<'g, 's, 'sf>(document: &mut Document<'s>) {
document.category = Into::<AstNode>::into(&*document)
.into_iter()
.filter_map(|ast_node| {
@ -154,7 +165,6 @@ pub(crate) fn apply_post_parse_in_buffer_settings<'g, 's, 'sf>(
})
.last()
.map(|kw| kw.value.to_owned());
Ok(())
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]

View File

@ -4,11 +4,9 @@ use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_while1;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::combinator::consumed;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::peek;
@ -22,7 +20,8 @@ use super::org_source::BracketDepth;
use super::org_source::OrgSource;
use super::util::get_consumed;
use super::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::context::parser_with_context;
use super::util::org_line_ending;
use crate::context::bind_context;
use crate::context::RefContext;
use crate::error::CustomError;
use crate::error::Res;
@ -49,9 +48,8 @@ fn _filtered_keyword<'s, F: Fn(OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s
// TODO: When key is a member of org-element-parsed-keywords, value can contain the standard set objects, excluding footnote references.
let (remaining, (consumed_input, (_, _, parsed_key, _))) =
consumed(tuple((space0, tag("#+"), key_parser, tag(":"))))(input)?;
if let Ok((remaining, _)) =
tuple((space0::<_, CustomError>, alt((line_ending, eof))))(remaining)
{
let (remaining, _ws) = space0(remaining)?;
if let Ok((remaining, _)) = org_line_ending(remaining) {
return Ok((
remaining,
Keyword {
@ -62,12 +60,9 @@ fn _filtered_keyword<'s, F: Fn(OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s
},
));
}
let (remaining, _ws) = space0(remaining)?;
let (remaining, parsed_value) = recognize(many_till(
anychar,
peek(tuple((space0, alt((line_ending, eof))))),
))(remaining)?;
let (remaining, _ws) = tuple((space0, alt((line_ending, eof))))(remaining)?;
let (remaining, parsed_value) =
recognize(many_till(anychar, peek(tuple((space0, org_line_ending)))))(remaining)?;
let (remaining, _ws) = tuple((space0, org_line_ending))(remaining)?;
Ok((
remaining,
Keyword {
@ -107,7 +102,7 @@ pub(crate) fn affiliated_keyword<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Keyword<'s>> {
filtered_keyword(parser_with_context!(affiliated_key)(context))(input)
filtered_keyword(bind_context!(affiliated_key, context))(input)
}
#[cfg_attr(