diff --git a/src/context/global_settings.rs b/src/context/global_settings.rs index cb32e6d8..88eadde7 100644 --- a/src/context/global_settings.rs +++ b/src/context/global_settings.rs @@ -2,6 +2,7 @@ use std::collections::BTreeSet; use super::FileAccessInterface; use super::LocalFileAccessInterface; +use crate::types::IndentationLevel; use crate::types::Object; // TODO: Ultimately, I think we'll need most of this: https://orgmode.org/manual/In_002dbuffer-Settings.html @@ -16,6 +17,11 @@ pub struct GlobalSettings<'g, 's> { /// /// Corresponds to the org-list-allow-alphabetical elisp variable. pub org_list_allow_alphabetical: bool, + + /// How many spaces a tab should be equal to. + /// + /// Corresponds to the tab-width elisp variable. + pub tab_width: IndentationLevel, } impl<'g, 's> GlobalSettings<'g, 's> { @@ -28,6 +34,7 @@ impl<'g, 's> GlobalSettings<'g, 's> { in_progress_todo_keywords: BTreeSet::new(), complete_todo_keywords: BTreeSet::new(), org_list_allow_alphabetical: false, + tab_width: 8, } } } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 6508dce6..74a819e9 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -22,6 +22,7 @@ use super::element_parser::element; use super::object_parser::standard_set_object; use super::org_source::OrgSource; use super::util::include_input; +use super::util::indentation_level; use super::util::non_whitespace_character; use crate::context::parser_with_context; use crate::context::ContextElement; @@ -39,6 +40,7 @@ use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::parser::util::org_space; use crate::parser::util::start_of_line; use crate::types::CheckboxType; +use crate::types::IndentationLevel; use crate::types::Object; use crate::types::PlainList; use crate::types::PlainListItem; @@ -87,7 +89,7 @@ pub(crate) fn plain_list<'b, 'g, 'r, 's>( let parser_context = parser_context.with_additional_node(&contexts[2]); // children stores tuple of (input string, parsed object) so we can re-parse the final item let mut children = Vec::new(); - let mut first_item_indentation: Option = None; + let mut first_item_indentation: Option = None; let mut remaining = input; // The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here: @@ -148,9 +150,7 @@ fn plain_list_item<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, PlainListItem<'s>> { start_of_line(input)?; - let (remaining, leading_whitespace) = space0(input)?; - // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) - let indent_level = leading_whitespace.len(); + let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?; let (remaining, bull) = verify( parser_with_context!(bullet)(context), |bull: &OrgSource<'_>| Into::<&str>::into(bull) != "*" || indent_level > 0, @@ -287,7 +287,7 @@ fn plain_list_end<'b, 'g, 'r, 's>( )))(input) } -const fn plain_list_item_end(indent_level: usize) -> impl ContextMatcher { +const fn plain_list_item_end(indent_level: IndentationLevel) -> impl ContextMatcher { let line_indented_lte_matcher = line_indented_lte(indent_level); move |context, input: OrgSource<'_>| { _plain_list_item_end(context, input, &line_indented_lte_matcher) @@ -310,20 +310,23 @@ fn _plain_list_item_end<'b, 'g, 'r, 's>( )))(input) } -const fn line_indented_lte(indent_level: usize) -> impl ContextMatcher { +const fn line_indented_lte(indent_level: IndentationLevel) -> impl ContextMatcher { move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn _line_indented_lte<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, + context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, - indent_level: usize, + indent_level: IndentationLevel, ) -> Res, OrgSource<'s>> { let matched = recognize(verify( - tuple((space0::, _>, non_whitespace_character)), + tuple(( + parser_with_context!(indentation_level)(context), + non_whitespace_character, + )), // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) - |(_space0, _anychar)| _space0.len() <= indent_level, + |((indentation_level, _leading_whitespace), _anychar)| *indentation_level <= indent_level, ))(input)?; Ok(matched) diff --git a/src/parser/util.rs b/src/parser/util.rs index aeb2d9e0..5faef14f 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -20,6 +20,7 @@ use crate::context::RefContext; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; +use crate::types::IndentationLevel; pub(crate) const WORD_CONSTITUENT_CHARACTERS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; @@ -240,7 +241,7 @@ pub(crate) fn org_space<'s>(input: OrgSource<'s>) -> Res, char> { pub(crate) fn org_space_or_line_ending<'s>( input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { - alt((recognize(one_of(" \t")), org_line_ending))(input) + alt((recognize(org_space), org_line_ending))(input) } /// Match a line break or the end of the file. @@ -249,3 +250,20 @@ pub(crate) fn org_space_or_line_ending<'s>( pub(crate) fn org_line_ending<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { alt((line_ending, eof))(input) } + +/// Match the whitespace at the beginning of a line and give it an indentation level. +pub(crate) fn indentation_level<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, (IndentationLevel, OrgSource<'s>)> { + let (remaining, leading_whitespace) = space0(input)?; + let indentation_level = Into::<&str>::into(leading_whitespace) + .chars() + .map(|c| match c { + ' ' => 1, + '\t' => context.get_global_settings().tab_width, + _ => unreachable!(), + }) + .sum(); + Ok((remaining, (indentation_level, leading_whitespace))) +} diff --git a/src/types/greater_element.rs b/src/types/greater_element.rs index 6807f79e..dfbd904c 100644 --- a/src/types/greater_element.rs +++ b/src/types/greater_element.rs @@ -10,10 +10,13 @@ pub struct PlainList<'s> { pub children: Vec>, } +/// The width that something is indented. For example, a single tab character could be a value of 4 or 8. +pub type IndentationLevel = u16; + #[derive(Debug)] pub struct PlainListItem<'s> { pub source: &'s str, - pub indentation: usize, + pub indentation: IndentationLevel, pub bullet: &'s str, pub checkbox: Option<(CheckboxType, &'s str)>, pub tag: Vec>, diff --git a/src/types/mod.rs b/src/types/mod.rs index facf202c..83666107 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -16,6 +16,7 @@ pub use greater_element::Drawer; pub use greater_element::DynamicBlock; pub use greater_element::FootnoteDefinition; pub use greater_element::GreaterBlock; +pub use greater_element::IndentationLevel; pub use greater_element::NodeProperty; pub use greater_element::PlainList; pub use greater_element::PlainListItem;