From 275b4b53d1b9499015f21a069d5d4244f57333f2 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 4 Sep 2023 19:17:23 -0400 Subject: [PATCH] Use a single function for finding all keywords. --- src/compare/diff.rs | 2 ++ src/parser/document.rs | 43 +++++++++++++++++------ src/parser/in_buffer_settings.rs | 28 +++++++++++++++ src/parser/keyword.rs | 60 +++++++++++++++++++------------- src/parser/mod.rs | 1 + src/parser/setup_file.rs | 29 ++++++--------- src/types/lesser_element.rs | 2 ++ 7 files changed, 111 insertions(+), 54 deletions(-) create mode 100644 src/parser/in_buffer_settings.rs diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 4402200..704335d 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1392,6 +1392,8 @@ fn compare_keyword<'s>( Ok(_) => {} }; + // TODO: Compare key and value + Ok(DiffResult { status: this_status, name: emacs_name.to_owned(), diff --git a/src/parser/document.rs b/src/parser/document.rs index 5f0d1c0..9fd8021 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -18,6 +18,7 @@ use nom::multi::many_till; use nom::multi::separated_list1; use nom::sequence::tuple; +use super::in_buffer_settings::scan_for_in_buffer_settings; use super::org_source::OrgSource; use super::setup_file::scan_for_setup_file; use super::token::AllTokensIterator; @@ -92,6 +93,8 @@ pub fn parse_with_settings<'g, 's>( /// Parse a full org-mode document. /// /// Use this entry point when you want to have direct control over the starting context or if you want to use this integrated with other nom parsers. For general-purpose usage, the `parse` and `parse_with_settings` functions are a lot simpler. +/// +/// This will not prevent additional settings from being learned during parsing, for example when encountering a "#+TODO". #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[allow(dead_code)] pub fn document<'b, 'g, 'r, 's>( @@ -109,24 +112,42 @@ fn document_org_source<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, Document<'s>> { let setup_file = scan_for_setup_file(input); - if setup_file.is_ok() { + let setup_file = if setup_file.is_ok() { let (_remaining, setup_file) = setup_file.expect("If-statement proves this is okay."); let setup_file_contents = context .get_global_settings() .file_access .read_file(Into::<&str>::into(setup_file)) .map_err(|err| nom::Err::>>::Failure(err.into()))?; - let parsed_setup_file = _document(context, setup_file_contents.as_str().into()); - if parsed_setup_file.is_err() { - return Err(nom::Err::Error(CustomError::MyError(MyError( - "Failed to parse the setup file.".into(), - )))); - } - let (_remaining, parsed_setup_file) = - parsed_setup_file.expect("The if-statement proves this is ok."); - - println!("TODO: Process setup_file: {:#?}", parsed_setup_file); + Some(setup_file_contents) + } else { + None + }; + let setup_file_settings = setup_file + .as_ref() + .map(|input| input.as_str().into()) + .map(scan_for_in_buffer_settings) + .map_or(Ok(None), |r| r.map(Some)) + .map_err(|_err| { + nom::Err::Error(CustomError::MyError(MyError( + "TODO: make this take an owned string so I can dump err.to_string() into it." + .into(), + ))) + })?; + let (_, document_settings) = scan_for_in_buffer_settings(input)?; + let mut final_settings = Vec::with_capacity( + document_settings.len() + + match setup_file_settings { + Some((_, ref setup_file_settings)) => setup_file_settings.len(), + None => 0, + }, + ); + if let Some((_, setup_file_settings)) = setup_file_settings { + final_settings.extend(setup_file_settings.into_iter()); } + + // TODO: read the keywords into settings and apply them to the GlobalSettings. + let (remaining, document) = _document(context, input).map(|(rem, out)| (Into::<&str>::into(rem), out))?; { diff --git a/src/parser/in_buffer_settings.rs b/src/parser/in_buffer_settings.rs new file mode 100644 index 0000000..e653e2f --- /dev/null +++ b/src/parser/in_buffer_settings.rs @@ -0,0 +1,28 @@ +use nom::branch::alt; +use nom::bytes::complete::tag_no_case; +use nom::character::complete::anychar; +use nom::combinator::map; +use nom::multi::many0; +use nom::multi::many_till; + +use super::keyword::filtered_keyword; +use super::OrgSource; +use crate::error::Res; +use crate::types::Keyword; + +pub fn scan_for_in_buffer_settings<'s>( + input: OrgSource<'s>, +) -> Res, Vec>> { + // TODO: Optimization idea: since this is slicing the OrgSource at each character, it might be more efficient to do a parser that uses a search function like take_until, and wrap it in a function similar to consumed but returning the input along with the normal output, then pass all of that into a verify that confirms we were at the start of a line using the input we just returned. + + let keywords = many0(map( + many_till(anychar, filtered_keyword(in_buffer_settings_key)), + |(_, kw)| kw, + ))(input); + keywords +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn in_buffer_settings_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + alt((tag_no_case("todo"), tag_no_case("setupfile")))(input) +} diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index ee7a22d..0d1a11d 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -7,6 +7,7 @@ use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; +use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::peek; @@ -16,6 +17,7 @@ use nom::sequence::tuple; use super::org_source::BracketDepth; use super::org_source::OrgSource; +use crate::context::Matcher; use crate::context::RefContext; use crate::error::CustomError; use crate::error::MyError; @@ -29,19 +31,26 @@ const ORG_ELEMENT_AFFILIATED_KEYWORDS: [&'static str; 13] = [ ]; const ORG_ELEMENT_DUAL_KEYWORDS: [&'static str; 2] = ["caption", "results"]; -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn keyword<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, +pub fn filtered_keyword( + key_parser: F, +) -> impl for<'s> Fn(OrgSource<'s>) -> Res, Keyword<'s>> { + move |input| _filtered_keyword(&key_parser, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(key_parser)) +)] +fn _filtered_keyword<'s, F: Matcher>( + key_parser: F, input: OrgSource<'s>, ) -> Res, Keyword<'s>> { start_of_line(input)?; // TODO: When key is a member of org-element-parsed-keywords, value can contain the standard set objects, excluding footnote references. - let (remaining, rule) = recognize(tuple(( + let (remaining, (consumed_input, (_, _, parsed_key, _, parsed_value, _))) = consumed(tuple(( space0, tag("#+"), - not(peek(tag_no_case("call"))), - not(peek(tag_no_case("begin"))), - is_not(" \t\r\n:"), + key_parser, tag(":"), alt((recognize(tuple((space1, is_not("\r\n")))), space0)), alt((line_ending, eof)), @@ -49,33 +58,36 @@ pub fn keyword<'b, 'g, 'r, 's>( Ok(( remaining, Keyword { - source: rule.into(), + source: consumed_input.into(), + key: parsed_key.into(), + value: parsed_value.into(), }, )) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn keyword<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Keyword<'s>> { + filtered_keyword(regular_keyword_key)(input) +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn affiliated_keyword<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Keyword<'s>> { - start_of_line(input)?; + filtered_keyword(affiliated_key)(input) +} - // TODO: When key is a member of org-element-parsed-keywords, value can contain the standard set objects, excluding footnote references. - let (remaining, rule) = recognize(tuple(( - space0, - tag("#+"), - affiliated_key, - tag(":"), - alt((recognize(tuple((space1, is_not("\r\n")))), space0)), - alt((line_ending, eof)), - )))(input)?; - Ok(( - remaining, - Keyword { - source: rule.into(), - }, - )) +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn regular_keyword_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + recognize(tuple(( + not(peek(tag_no_case("call"))), + not(peek(tag_no_case("begin"))), + is_not(" \t\r\n:"), + )))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 124fdb6..ebda885 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15,6 +15,7 @@ mod footnote_definition; mod footnote_reference; mod greater_block; mod horizontal_rule; +mod in_buffer_settings; mod inline_babel_call; mod inline_source_block; mod keyword; diff --git a/src/parser/setup_file.rs b/src/parser/setup_file.rs index e422a38..509ab17 100644 --- a/src/parser/setup_file.rs +++ b/src/parser/setup_file.rs @@ -1,31 +1,22 @@ -use nom::branch::alt; -use nom::bytes::complete::is_not; use nom::bytes::complete::tag_no_case; use nom::character::complete::anychar; -use nom::character::complete::line_ending; -use nom::character::complete::space1; -use nom::combinator::eof; +use nom::combinator::map; use nom::multi::many_till; -use nom::sequence::tuple; +use super::keyword::filtered_keyword; use super::OrgSource; use crate::error::Res; -use crate::parser::util::start_of_line; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn scan_for_setup_file<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { - let (remaining, setup) = many_till(anychar, setup_file)(input) - .map(|(remaining, (_, setup_file))| (remaining, setup_file))?; +pub fn scan_for_setup_file<'s>(input: OrgSource<'s>) -> Res, &'s str> { + let (remaining, setup) = map( + many_till(anychar, filtered_keyword(setupfile_key)), + |(_, kw)| kw.value, + )(input)?; Ok((remaining, setup)) } -fn setup_file<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { - let (remaining, (_, _, _, setup_file, _)) = tuple(( - start_of_line, - tag_no_case("#+SETUPFILE:"), - space1, - is_not("\r\n"), - alt((line_ending, eof)), - ))(input)?; - Ok((remaining, setup_file)) +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn setupfile_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + tag_no_case("setupfile")(input) } diff --git a/src/types/lesser_element.rs b/src/types/lesser_element.rs index 33f6529..06ae33b 100644 --- a/src/types/lesser_element.rs +++ b/src/types/lesser_element.rs @@ -87,6 +87,8 @@ pub struct HorizontalRule<'s> { #[derive(Debug)] pub struct Keyword<'s> { pub source: &'s str, + pub key: &'s str, + pub value: &'s str, } #[derive(Debug)]