Compare commits

...

4 Commits

Author SHA1 Message Date
Tom Alexander
dd91e506bd
Merge branch 'scan_optimization'
All checks were successful
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded
2023-09-24 03:11:46 -04:00
Tom Alexander
cd781a7dcf
Add simple test to prove the scan for in-buffer settings is still working. 2023-09-24 03:09:51 -04:00
Tom Alexander
8cd0e4ec63
Optimize scanning for in-buffer settings by scanning forward for possible keywords.
Previously we stepped through the document character by character which involved a lot of extra processing inside OrgSource. By scanning for possible keywords, we can skip many of the intermediate steps.
2023-09-24 02:58:32 -04:00
Tom Alexander
f9460b88d7
Add a TODO for a performance optimization. 2023-09-24 01:59:26 -04:00
3 changed files with 122 additions and 10 deletions

View File

@ -107,6 +107,7 @@ fn _element<'b, 'g, 'r, 's>(
match map(paragraph_matcher, Element::Paragraph)(remaining) { match map(paragraph_matcher, Element::Paragraph)(remaining) {
the_ok @ Ok(_) => the_ok, the_ok @ Ok(_) => the_ok,
Err(_) => { Err(_) => {
// TODO: Because this function expects a single element, if there are multiple affiliated keywords before an element that cannot have affiliated keywords, we end up re-parsing the affiliated keywords many times.
affiliated_keywords.clear(); affiliated_keywords.clear();
map(affiliated_keyword_matcher, Element::Keyword)(input) map(affiliated_keyword_matcher, Element::Keyword)(input)
} }

View File

@ -1,17 +1,15 @@
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::is_not; use nom::bytes::complete::is_not;
use nom::bytes::complete::tag_no_case; use nom::bytes::complete::tag_no_case;
use nom::character::complete::anychar; use nom::bytes::complete::take_until;
use nom::character::complete::space1; use nom::character::complete::space1;
use nom::combinator::map;
use nom::multi::many0;
use nom::multi::many_till;
use nom::multi::separated_list0; use nom::multi::separated_list0;
use super::keyword::filtered_keyword; use super::keyword::filtered_keyword;
use super::keyword_todo::todo_keywords; use super::keyword_todo::todo_keywords;
use super::OrgSource; use super::OrgSource;
use crate::context::HeadlineLevelFilter; use crate::context::HeadlineLevelFilter;
use crate::error::CustomError;
use crate::error::Res; use crate::error::Res;
use crate::types::Keyword; use crate::types::Keyword;
use crate::GlobalSettings; use crate::GlobalSettings;
@ -20,13 +18,40 @@ use crate::GlobalSettings;
pub(crate) fn scan_for_in_buffer_settings<'s>( pub(crate) fn scan_for_in_buffer_settings<'s>(
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Vec<Keyword<'s>>> { ) -> Res<OrgSource<'s>, Vec<Keyword<'s>>> {
// TODO: Optimization idea: since this is slicing the OrgSource at each character, it might be more efficient to do a parser that uses a search function like take_until, and wrap it in a function similar to consumed but returning the input along with the normal output, then pass all of that into a verify that confirms we were at the start of a line using the input we just returned. // TODO: Write some tests to make sure this is functioning properly.
let keywords = many0(map( let mut keywords = Vec::new();
many_till(anychar, filtered_keyword(in_buffer_settings_key)), let mut remaining = input;
|(_, kw)| kw, loop {
))(input); // Skip text until possible in_buffer_setting
keywords let start_of_pound = take_until::<_, _, CustomError<_>>("#+")(remaining);
let start_of_pound = if let Ok((start_of_pound, _)) = start_of_pound {
start_of_pound
} else {
break;
};
// Go backwards to the start of the line and run the filtered_keyword parser
let start_of_line = start_of_pound.get_start_of_line();
let (remain, maybe_kw) = match filtered_keyword(in_buffer_settings_key)(start_of_line) {
Ok((remain, kw)) => (remain, Some(kw)),
Err(_) => {
let end_of_line = take_until::<_, _, CustomError<_>>("\n")(start_of_pound);
if let Ok((end_of_line, _)) = end_of_line {
(end_of_line, None)
} else {
break;
}
}
};
if let Some(kw) = maybe_kw {
keywords.push(kw);
}
remaining = remain;
}
Ok((remaining, keywords))
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -88,3 +113,33 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>(
Ok(new_settings) Ok(new_settings)
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scan_test() -> Result<(), Box<dyn std::error::Error>> {
let input = OrgSource::new(
r#"
foo
#+archive: bar
baz #+category: lorem
#+label: ipsum
#+todo: dolar
cat
"#,
);
let (remaining, settings) = scan_for_in_buffer_settings(input)?;
assert_eq!(Into::<&str>::into(remaining), "cat\n");
let keys: Vec<_> = settings.iter().map(|kw| kw.key).collect();
// category is skipped because it is not the first non-whitespace on the line.
//
// label is skipped because it is not an in-buffer setting.
assert_eq!(keys, vec!["archive", "todo"]);
Ok(())
}
}

View File

@ -1,6 +1,7 @@
use std::ops::RangeBounds; use std::ops::RangeBounds;
use nom::Compare; use nom::Compare;
use nom::FindSubstring;
use nom::InputIter; use nom::InputIter;
use nom::InputLength; use nom::InputLength;
use nom::InputTake; use nom::InputTake;
@ -77,6 +78,55 @@ impl<'s> OrgSource<'s> {
self.slice(..(other.start - self.start)) self.slice(..(other.start - self.start))
} }
pub(crate) fn get_start_of_line(&self) -> OrgSource<'s> {
let skipped_text = self.text_since_line_break();
let mut bracket_depth = self.bracket_depth;
let mut brace_depth = self.brace_depth;
let mut parenthesis_depth = self.parenthesis_depth;
// Since we're going backwards, this does the opposite.
for byte in skipped_text.bytes() {
match byte {
b'\n' => {
panic!("Should not hit a line break when only going back to the start of the line.");
}
b'[' => {
bracket_depth -= 1;
}
b']' => {
bracket_depth += 1;
}
b'{' => {
brace_depth -= 1;
}
b'}' => {
brace_depth += 1;
}
b'(' => {
parenthesis_depth -= 1;
}
b')' => {
parenthesis_depth += 1;
}
_ => {}
};
}
OrgSource {
full_source: self.full_source,
start: self.start_of_line,
end: self.end,
start_of_line: self.start_of_line,
preceding_character: if self.start_of_line > 0 {
Some('\n')
} else {
None
},
bracket_depth,
brace_depth,
parenthesis_depth,
}
}
pub(crate) fn get_bracket_depth(&self) -> BracketDepth { pub(crate) fn get_bracket_depth(&self) -> BracketDepth {
self.bracket_depth self.bracket_depth
} }
@ -310,6 +360,12 @@ impl<'s> InputTakeAtPosition for OrgSource<'s> {
} }
} }
impl<'n, 's> FindSubstring<&'n str> for OrgSource<'s> {
fn find_substring(&self, substr: &'n str) -> Option<usize> {
Into::<&str>::into(self).find(substr)
}
}
pub(crate) fn convert_error<'a, I: Into<CustomError<&'a str>>>( pub(crate) fn convert_error<'a, I: Into<CustomError<&'a str>>>(
err: nom::Err<I>, err: nom::Err<I>,
) -> nom::Err<CustomError<&'a str>> { ) -> nom::Err<CustomError<&'a str>> {