Optimize scanning for in-buffer settings by scanning forward for possible keywords.

Previously we stepped through the document character by character which involved a lot of extra processing inside OrgSource. By scanning for possible keywords, we can skip many of the intermediate steps.
This commit is contained in:
Tom Alexander
2023-09-24 02:58:32 -04:00
parent f9460b88d7
commit 8cd0e4ec63
2 changed files with 91 additions and 10 deletions

View File

@@ -1,17 +1,15 @@
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::anychar;
use nom::bytes::complete::take_until;
use nom::character::complete::space1;
use nom::combinator::map;
use nom::multi::many0;
use nom::multi::many_till;
use nom::multi::separated_list0;
use super::keyword::filtered_keyword;
use super::keyword_todo::todo_keywords;
use super::OrgSource;
use crate::context::HeadlineLevelFilter;
use crate::error::CustomError;
use crate::error::Res;
use crate::types::Keyword;
use crate::GlobalSettings;
@@ -20,13 +18,40 @@ use crate::GlobalSettings;
pub(crate) fn scan_for_in_buffer_settings<'s>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Vec<Keyword<'s>>> {
// TODO: Optimization idea: since this is slicing the OrgSource at each character, it might be more efficient to do a parser that uses a search function like take_until, and wrap it in a function similar to consumed but returning the input along with the normal output, then pass all of that into a verify that confirms we were at the start of a line using the input we just returned.
// TODO: Write some tests to make sure this is functioning properly.
let keywords = many0(map(
many_till(anychar, filtered_keyword(in_buffer_settings_key)),
|(_, kw)| kw,
))(input);
keywords
let mut keywords = Vec::new();
let mut remaining = input;
loop {
// Skip text until possible in_buffer_setting
let start_of_pound = take_until::<_, _, CustomError<_>>("#+")(remaining);
let start_of_pound = if let Ok((start_of_pound, _)) = start_of_pound {
start_of_pound
} else {
break;
};
// Go backwards to the start of the line and run the filtered_keyword parser
let start_of_line = start_of_pound.get_start_of_line();
let (remain, maybe_kw) = match filtered_keyword(in_buffer_settings_key)(start_of_line) {
Ok((remain, kw)) => (remain, Some(kw)),
Err(_) => {
let end_of_line = take_until::<_, _, CustomError<_>>("\n")(start_of_pound);
if let Ok((end_of_line, _)) = end_of_line {
(end_of_line, None)
} else {
break;
}
}
};
if let Some(kw) = maybe_kw {
keywords.push(kw);
}
remaining = remain;
}
Ok((remaining, keywords))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]