Optimize scanning for in-buffer settings by scanning forward for possible keywords.
Previously we stepped through the document character by character which involved a lot of extra processing inside OrgSource. By scanning for possible keywords, we can skip many of the intermediate steps.
This commit is contained in:
parent
f9460b88d7
commit
8cd0e4ec63
@ -1,17 +1,15 @@
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::is_not;
|
||||
use nom::bytes::complete::tag_no_case;
|
||||
use nom::character::complete::anychar;
|
||||
use nom::bytes::complete::take_until;
|
||||
use nom::character::complete::space1;
|
||||
use nom::combinator::map;
|
||||
use nom::multi::many0;
|
||||
use nom::multi::many_till;
|
||||
use nom::multi::separated_list0;
|
||||
|
||||
use super::keyword::filtered_keyword;
|
||||
use super::keyword_todo::todo_keywords;
|
||||
use super::OrgSource;
|
||||
use crate::context::HeadlineLevelFilter;
|
||||
use crate::error::CustomError;
|
||||
use crate::error::Res;
|
||||
use crate::types::Keyword;
|
||||
use crate::GlobalSettings;
|
||||
@ -20,13 +18,40 @@ use crate::GlobalSettings;
|
||||
pub(crate) fn scan_for_in_buffer_settings<'s>(
|
||||
input: OrgSource<'s>,
|
||||
) -> Res<OrgSource<'s>, Vec<Keyword<'s>>> {
|
||||
// TODO: Optimization idea: since this is slicing the OrgSource at each character, it might be more efficient to do a parser that uses a search function like take_until, and wrap it in a function similar to consumed but returning the input along with the normal output, then pass all of that into a verify that confirms we were at the start of a line using the input we just returned.
|
||||
// TODO: Write some tests to make sure this is functioning properly.
|
||||
|
||||
let keywords = many0(map(
|
||||
many_till(anychar, filtered_keyword(in_buffer_settings_key)),
|
||||
|(_, kw)| kw,
|
||||
))(input);
|
||||
keywords
|
||||
let mut keywords = Vec::new();
|
||||
let mut remaining = input;
|
||||
loop {
|
||||
// Skip text until possible in_buffer_setting
|
||||
let start_of_pound = take_until::<_, _, CustomError<_>>("#+")(remaining);
|
||||
let start_of_pound = if let Ok((start_of_pound, _)) = start_of_pound {
|
||||
start_of_pound
|
||||
} else {
|
||||
break;
|
||||
};
|
||||
// Go backwards to the start of the line and run the filtered_keyword parser
|
||||
let start_of_line = start_of_pound.get_start_of_line();
|
||||
|
||||
let (remain, maybe_kw) = match filtered_keyword(in_buffer_settings_key)(start_of_line) {
|
||||
Ok((remain, kw)) => (remain, Some(kw)),
|
||||
Err(_) => {
|
||||
let end_of_line = take_until::<_, _, CustomError<_>>("\n")(start_of_pound);
|
||||
if let Ok((end_of_line, _)) = end_of_line {
|
||||
(end_of_line, None)
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(kw) = maybe_kw {
|
||||
keywords.push(kw);
|
||||
}
|
||||
remaining = remain;
|
||||
}
|
||||
|
||||
Ok((remaining, keywords))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::ops::RangeBounds;
|
||||
|
||||
use nom::Compare;
|
||||
use nom::FindSubstring;
|
||||
use nom::InputIter;
|
||||
use nom::InputLength;
|
||||
use nom::InputTake;
|
||||
@ -77,6 +78,55 @@ impl<'s> OrgSource<'s> {
|
||||
self.slice(..(other.start - self.start))
|
||||
}
|
||||
|
||||
pub(crate) fn get_start_of_line(&self) -> OrgSource<'s> {
|
||||
let skipped_text = self.text_since_line_break();
|
||||
let mut bracket_depth = self.bracket_depth;
|
||||
let mut brace_depth = self.brace_depth;
|
||||
let mut parenthesis_depth = self.parenthesis_depth;
|
||||
// Since we're going backwards, this does the opposite.
|
||||
for byte in skipped_text.bytes() {
|
||||
match byte {
|
||||
b'\n' => {
|
||||
panic!("Should not hit a line break when only going back to the start of the line.");
|
||||
}
|
||||
b'[' => {
|
||||
bracket_depth -= 1;
|
||||
}
|
||||
b']' => {
|
||||
bracket_depth += 1;
|
||||
}
|
||||
b'{' => {
|
||||
brace_depth -= 1;
|
||||
}
|
||||
b'}' => {
|
||||
brace_depth += 1;
|
||||
}
|
||||
b'(' => {
|
||||
parenthesis_depth -= 1;
|
||||
}
|
||||
b')' => {
|
||||
parenthesis_depth += 1;
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
|
||||
OrgSource {
|
||||
full_source: self.full_source,
|
||||
start: self.start_of_line,
|
||||
end: self.end,
|
||||
start_of_line: self.start_of_line,
|
||||
preceding_character: if self.start_of_line > 0 {
|
||||
Some('\n')
|
||||
} else {
|
||||
None
|
||||
},
|
||||
bracket_depth,
|
||||
brace_depth,
|
||||
parenthesis_depth,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_bracket_depth(&self) -> BracketDepth {
|
||||
self.bracket_depth
|
||||
}
|
||||
@ -310,6 +360,12 @@ impl<'s> InputTakeAtPosition for OrgSource<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'n, 's> FindSubstring<&'n str> for OrgSource<'s> {
|
||||
fn find_substring(&self, substr: &'n str) -> Option<usize> {
|
||||
Into::<&str>::into(self).find(substr)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn convert_error<'a, I: Into<CustomError<&'a str>>>(
|
||||
err: nom::Err<I>,
|
||||
) -> nom::Err<CustomError<&'a str>> {
|
||||
|
Loading…
Reference in New Issue
Block a user