Optimize scanning for in-buffer settings by scanning forward for possible keywords.
Previously we stepped through the document character by character which involved a lot of extra processing inside OrgSource. By scanning for possible keywords, we can skip many of the intermediate steps.
This commit is contained in:
parent
f9460b88d7
commit
8cd0e4ec63
@ -1,17 +1,15 @@
|
|||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::is_not;
|
use nom::bytes::complete::is_not;
|
||||||
use nom::bytes::complete::tag_no_case;
|
use nom::bytes::complete::tag_no_case;
|
||||||
use nom::character::complete::anychar;
|
use nom::bytes::complete::take_until;
|
||||||
use nom::character::complete::space1;
|
use nom::character::complete::space1;
|
||||||
use nom::combinator::map;
|
|
||||||
use nom::multi::many0;
|
|
||||||
use nom::multi::many_till;
|
|
||||||
use nom::multi::separated_list0;
|
use nom::multi::separated_list0;
|
||||||
|
|
||||||
use super::keyword::filtered_keyword;
|
use super::keyword::filtered_keyword;
|
||||||
use super::keyword_todo::todo_keywords;
|
use super::keyword_todo::todo_keywords;
|
||||||
use super::OrgSource;
|
use super::OrgSource;
|
||||||
use crate::context::HeadlineLevelFilter;
|
use crate::context::HeadlineLevelFilter;
|
||||||
|
use crate::error::CustomError;
|
||||||
use crate::error::Res;
|
use crate::error::Res;
|
||||||
use crate::types::Keyword;
|
use crate::types::Keyword;
|
||||||
use crate::GlobalSettings;
|
use crate::GlobalSettings;
|
||||||
@ -20,13 +18,40 @@ use crate::GlobalSettings;
|
|||||||
pub(crate) fn scan_for_in_buffer_settings<'s>(
|
pub(crate) fn scan_for_in_buffer_settings<'s>(
|
||||||
input: OrgSource<'s>,
|
input: OrgSource<'s>,
|
||||||
) -> Res<OrgSource<'s>, Vec<Keyword<'s>>> {
|
) -> Res<OrgSource<'s>, Vec<Keyword<'s>>> {
|
||||||
// TODO: Optimization idea: since this is slicing the OrgSource at each character, it might be more efficient to do a parser that uses a search function like take_until, and wrap it in a function similar to consumed but returning the input along with the normal output, then pass all of that into a verify that confirms we were at the start of a line using the input we just returned.
|
// TODO: Write some tests to make sure this is functioning properly.
|
||||||
|
|
||||||
let keywords = many0(map(
|
let mut keywords = Vec::new();
|
||||||
many_till(anychar, filtered_keyword(in_buffer_settings_key)),
|
let mut remaining = input;
|
||||||
|(_, kw)| kw,
|
loop {
|
||||||
))(input);
|
// Skip text until possible in_buffer_setting
|
||||||
keywords
|
let start_of_pound = take_until::<_, _, CustomError<_>>("#+")(remaining);
|
||||||
|
let start_of_pound = if let Ok((start_of_pound, _)) = start_of_pound {
|
||||||
|
start_of_pound
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
// Go backwards to the start of the line and run the filtered_keyword parser
|
||||||
|
let start_of_line = start_of_pound.get_start_of_line();
|
||||||
|
|
||||||
|
let (remain, maybe_kw) = match filtered_keyword(in_buffer_settings_key)(start_of_line) {
|
||||||
|
Ok((remain, kw)) => (remain, Some(kw)),
|
||||||
|
Err(_) => {
|
||||||
|
let end_of_line = take_until::<_, _, CustomError<_>>("\n")(start_of_pound);
|
||||||
|
if let Ok((end_of_line, _)) = end_of_line {
|
||||||
|
(end_of_line, None)
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(kw) = maybe_kw {
|
||||||
|
keywords.push(kw);
|
||||||
|
}
|
||||||
|
remaining = remain;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((remaining, keywords))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use std::ops::RangeBounds;
|
use std::ops::RangeBounds;
|
||||||
|
|
||||||
use nom::Compare;
|
use nom::Compare;
|
||||||
|
use nom::FindSubstring;
|
||||||
use nom::InputIter;
|
use nom::InputIter;
|
||||||
use nom::InputLength;
|
use nom::InputLength;
|
||||||
use nom::InputTake;
|
use nom::InputTake;
|
||||||
@ -77,6 +78,55 @@ impl<'s> OrgSource<'s> {
|
|||||||
self.slice(..(other.start - self.start))
|
self.slice(..(other.start - self.start))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_start_of_line(&self) -> OrgSource<'s> {
|
||||||
|
let skipped_text = self.text_since_line_break();
|
||||||
|
let mut bracket_depth = self.bracket_depth;
|
||||||
|
let mut brace_depth = self.brace_depth;
|
||||||
|
let mut parenthesis_depth = self.parenthesis_depth;
|
||||||
|
// Since we're going backwards, this does the opposite.
|
||||||
|
for byte in skipped_text.bytes() {
|
||||||
|
match byte {
|
||||||
|
b'\n' => {
|
||||||
|
panic!("Should not hit a line break when only going back to the start of the line.");
|
||||||
|
}
|
||||||
|
b'[' => {
|
||||||
|
bracket_depth -= 1;
|
||||||
|
}
|
||||||
|
b']' => {
|
||||||
|
bracket_depth += 1;
|
||||||
|
}
|
||||||
|
b'{' => {
|
||||||
|
brace_depth -= 1;
|
||||||
|
}
|
||||||
|
b'}' => {
|
||||||
|
brace_depth += 1;
|
||||||
|
}
|
||||||
|
b'(' => {
|
||||||
|
parenthesis_depth -= 1;
|
||||||
|
}
|
||||||
|
b')' => {
|
||||||
|
parenthesis_depth += 1;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
OrgSource {
|
||||||
|
full_source: self.full_source,
|
||||||
|
start: self.start_of_line,
|
||||||
|
end: self.end,
|
||||||
|
start_of_line: self.start_of_line,
|
||||||
|
preceding_character: if self.start_of_line > 0 {
|
||||||
|
Some('\n')
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
},
|
||||||
|
bracket_depth,
|
||||||
|
brace_depth,
|
||||||
|
parenthesis_depth,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn get_bracket_depth(&self) -> BracketDepth {
|
pub(crate) fn get_bracket_depth(&self) -> BracketDepth {
|
||||||
self.bracket_depth
|
self.bracket_depth
|
||||||
}
|
}
|
||||||
@ -310,6 +360,12 @@ impl<'s> InputTakeAtPosition for OrgSource<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'n, 's> FindSubstring<&'n str> for OrgSource<'s> {
|
||||||
|
fn find_substring(&self, substr: &'n str) -> Option<usize> {
|
||||||
|
Into::<&str>::into(self).find(substr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn convert_error<'a, I: Into<CustomError<&'a str>>>(
|
pub(crate) fn convert_error<'a, I: Into<CustomError<&'a str>>>(
|
||||||
err: nom::Err<I>,
|
err: nom::Err<I>,
|
||||||
) -> nom::Err<CustomError<&'a str>> {
|
) -> nom::Err<CustomError<&'a str>> {
|
||||||
|
Loading…
Reference in New Issue
Block a user