From 8cd0e4ec638cecc26934c607adf9c7c332e1573d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 24 Sep 2023 02:58:32 -0400 Subject: [PATCH] Optimize scanning for in-buffer settings by scanning forward for possible keywords. Previously we stepped through the document character by character which involved a lot of extra processing inside OrgSource. By scanning for possible keywords, we can skip many of the intermediate steps. --- src/parser/in_buffer_settings.rs | 45 +++++++++++++++++++------ src/parser/org_source.rs | 56 ++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 10 deletions(-) diff --git a/src/parser/in_buffer_settings.rs b/src/parser/in_buffer_settings.rs index 40f8505..824c2da 100644 --- a/src/parser/in_buffer_settings.rs +++ b/src/parser/in_buffer_settings.rs @@ -1,17 +1,15 @@ use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag_no_case; -use nom::character::complete::anychar; +use nom::bytes::complete::take_until; use nom::character::complete::space1; -use nom::combinator::map; -use nom::multi::many0; -use nom::multi::many_till; use nom::multi::separated_list0; use super::keyword::filtered_keyword; use super::keyword_todo::todo_keywords; use super::OrgSource; use crate::context::HeadlineLevelFilter; +use crate::error::CustomError; use crate::error::Res; use crate::types::Keyword; use crate::GlobalSettings; @@ -20,13 +18,40 @@ use crate::GlobalSettings; pub(crate) fn scan_for_in_buffer_settings<'s>( input: OrgSource<'s>, ) -> Res, Vec>> { - // TODO: Optimization idea: since this is slicing the OrgSource at each character, it might be more efficient to do a parser that uses a search function like take_until, and wrap it in a function similar to consumed but returning the input along with the normal output, then pass all of that into a verify that confirms we were at the start of a line using the input we just returned. + // TODO: Write some tests to make sure this is functioning properly. - let keywords = many0(map( - many_till(anychar, filtered_keyword(in_buffer_settings_key)), - |(_, kw)| kw, - ))(input); - keywords + let mut keywords = Vec::new(); + let mut remaining = input; + loop { + // Skip text until possible in_buffer_setting + let start_of_pound = take_until::<_, _, CustomError<_>>("#+")(remaining); + let start_of_pound = if let Ok((start_of_pound, _)) = start_of_pound { + start_of_pound + } else { + break; + }; + // Go backwards to the start of the line and run the filtered_keyword parser + let start_of_line = start_of_pound.get_start_of_line(); + + let (remain, maybe_kw) = match filtered_keyword(in_buffer_settings_key)(start_of_line) { + Ok((remain, kw)) => (remain, Some(kw)), + Err(_) => { + let end_of_line = take_until::<_, _, CustomError<_>>("\n")(start_of_pound); + if let Ok((end_of_line, _)) = end_of_line { + (end_of_line, None) + } else { + break; + } + } + }; + + if let Some(kw) = maybe_kw { + keywords.push(kw); + } + remaining = remain; + } + + Ok((remaining, keywords)) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] diff --git a/src/parser/org_source.rs b/src/parser/org_source.rs index 9f8c0f6..252a5fc 100644 --- a/src/parser/org_source.rs +++ b/src/parser/org_source.rs @@ -1,6 +1,7 @@ use std::ops::RangeBounds; use nom::Compare; +use nom::FindSubstring; use nom::InputIter; use nom::InputLength; use nom::InputTake; @@ -77,6 +78,55 @@ impl<'s> OrgSource<'s> { self.slice(..(other.start - self.start)) } + pub(crate) fn get_start_of_line(&self) -> OrgSource<'s> { + let skipped_text = self.text_since_line_break(); + let mut bracket_depth = self.bracket_depth; + let mut brace_depth = self.brace_depth; + let mut parenthesis_depth = self.parenthesis_depth; + // Since we're going backwards, this does the opposite. + for byte in skipped_text.bytes() { + match byte { + b'\n' => { + panic!("Should not hit a line break when only going back to the start of the line."); + } + b'[' => { + bracket_depth -= 1; + } + b']' => { + bracket_depth += 1; + } + b'{' => { + brace_depth -= 1; + } + b'}' => { + brace_depth += 1; + } + b'(' => { + parenthesis_depth -= 1; + } + b')' => { + parenthesis_depth += 1; + } + _ => {} + }; + } + + OrgSource { + full_source: self.full_source, + start: self.start_of_line, + end: self.end, + start_of_line: self.start_of_line, + preceding_character: if self.start_of_line > 0 { + Some('\n') + } else { + None + }, + bracket_depth, + brace_depth, + parenthesis_depth, + } + } + pub(crate) fn get_bracket_depth(&self) -> BracketDepth { self.bracket_depth } @@ -310,6 +360,12 @@ impl<'s> InputTakeAtPosition for OrgSource<'s> { } } +impl<'n, 's> FindSubstring<&'n str> for OrgSource<'s> { + fn find_substring(&self, substr: &'n str) -> Option { + Into::<&str>::into(self).find(substr) + } +} + pub(crate) fn convert_error<'a, I: Into>>( err: nom::Err, ) -> nom::Err> {