Introduce the tab_width setting and give tabs a greater value when counting indentation level.

This commit is contained in:
Tom Alexander 2023-09-15 21:52:42 -04:00
parent c9ce32c881
commit d443dbd468
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
5 changed files with 44 additions and 12 deletions

View File

@ -2,6 +2,7 @@ use std::collections::BTreeSet;
use super::FileAccessInterface; use super::FileAccessInterface;
use super::LocalFileAccessInterface; use super::LocalFileAccessInterface;
use crate::types::IndentationLevel;
use crate::types::Object; use crate::types::Object;
// TODO: Ultimately, I think we'll need most of this: https://orgmode.org/manual/In_002dbuffer-Settings.html // TODO: Ultimately, I think we'll need most of this: https://orgmode.org/manual/In_002dbuffer-Settings.html
@ -16,6 +17,11 @@ pub struct GlobalSettings<'g, 's> {
/// ///
/// Corresponds to the org-list-allow-alphabetical elisp variable. /// Corresponds to the org-list-allow-alphabetical elisp variable.
pub org_list_allow_alphabetical: bool, pub org_list_allow_alphabetical: bool,
/// How many spaces a tab should be equal to.
///
/// Corresponds to the tab-width elisp variable.
pub tab_width: IndentationLevel,
} }
impl<'g, 's> GlobalSettings<'g, 's> { impl<'g, 's> GlobalSettings<'g, 's> {
@ -28,6 +34,7 @@ impl<'g, 's> GlobalSettings<'g, 's> {
in_progress_todo_keywords: BTreeSet::new(), in_progress_todo_keywords: BTreeSet::new(),
complete_todo_keywords: BTreeSet::new(), complete_todo_keywords: BTreeSet::new(),
org_list_allow_alphabetical: false, org_list_allow_alphabetical: false,
tab_width: 8,
} }
} }
} }

View File

@ -22,6 +22,7 @@ use super::element_parser::element;
use super::object_parser::standard_set_object; use super::object_parser::standard_set_object;
use super::org_source::OrgSource; use super::org_source::OrgSource;
use super::util::include_input; use super::util::include_input;
use super::util::indentation_level;
use super::util::non_whitespace_character; use super::util::non_whitespace_character;
use crate::context::parser_with_context; use crate::context::parser_with_context;
use crate::context::ContextElement; use crate::context::ContextElement;
@ -39,6 +40,7 @@ use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::org_space; use crate::parser::util::org_space;
use crate::parser::util::start_of_line; use crate::parser::util::start_of_line;
use crate::types::CheckboxType; use crate::types::CheckboxType;
use crate::types::IndentationLevel;
use crate::types::Object; use crate::types::Object;
use crate::types::PlainList; use crate::types::PlainList;
use crate::types::PlainListItem; use crate::types::PlainListItem;
@ -87,7 +89,7 @@ pub(crate) fn plain_list<'b, 'g, 'r, 's>(
let parser_context = parser_context.with_additional_node(&contexts[2]); let parser_context = parser_context.with_additional_node(&contexts[2]);
// children stores tuple of (input string, parsed object) so we can re-parse the final item // children stores tuple of (input string, parsed object) so we can re-parse the final item
let mut children = Vec::new(); let mut children = Vec::new();
let mut first_item_indentation: Option<usize> = None; let mut first_item_indentation: Option<IndentationLevel> = None;
let mut remaining = input; let mut remaining = input;
// The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here: // The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
@ -148,9 +150,7 @@ fn plain_list_item<'b, 'g, 'r, 's>(
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainListItem<'s>> { ) -> Res<OrgSource<'s>, PlainListItem<'s>> {
start_of_line(input)?; start_of_line(input)?;
let (remaining, leading_whitespace) = space0(input)?; let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
let (remaining, bull) = verify( let (remaining, bull) = verify(
parser_with_context!(bullet)(context), parser_with_context!(bullet)(context),
|bull: &OrgSource<'_>| Into::<&str>::into(bull) != "*" || indent_level > 0, |bull: &OrgSource<'_>| Into::<&str>::into(bull) != "*" || indent_level > 0,
@ -287,7 +287,7 @@ fn plain_list_end<'b, 'g, 'r, 's>(
)))(input) )))(input)
} }
const fn plain_list_item_end(indent_level: usize) -> impl ContextMatcher { const fn plain_list_item_end(indent_level: IndentationLevel) -> impl ContextMatcher {
let line_indented_lte_matcher = line_indented_lte(indent_level); let line_indented_lte_matcher = line_indented_lte(indent_level);
move |context, input: OrgSource<'_>| { move |context, input: OrgSource<'_>| {
_plain_list_item_end(context, input, &line_indented_lte_matcher) _plain_list_item_end(context, input, &line_indented_lte_matcher)
@ -310,20 +310,23 @@ fn _plain_list_item_end<'b, 'g, 'r, 's>(
)))(input) )))(input)
} }
const fn line_indented_lte(indent_level: usize) -> impl ContextMatcher { const fn line_indented_lte(indent_level: IndentationLevel) -> impl ContextMatcher {
move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level) move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level)
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _line_indented_lte<'b, 'g, 'r, 's>( fn _line_indented_lte<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>, context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>, input: OrgSource<'s>,
indent_level: usize, indent_level: IndentationLevel,
) -> Res<OrgSource<'s>, OrgSource<'s>> { ) -> Res<OrgSource<'s>, OrgSource<'s>> {
let matched = recognize(verify( let matched = recognize(verify(
tuple((space0::<OrgSource<'_>, _>, non_whitespace_character)), tuple((
parser_with_context!(indentation_level)(context),
non_whitespace_character,
)),
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|(_space0, _anychar)| _space0.len() <= indent_level, |((indentation_level, _leading_whitespace), _anychar)| *indentation_level <= indent_level,
))(input)?; ))(input)?;
Ok(matched) Ok(matched)

View File

@ -20,6 +20,7 @@ use crate::context::RefContext;
use crate::error::CustomError; use crate::error::CustomError;
use crate::error::MyError; use crate::error::MyError;
use crate::error::Res; use crate::error::Res;
use crate::types::IndentationLevel;
pub(crate) const WORD_CONSTITUENT_CHARACTERS: &str = pub(crate) const WORD_CONSTITUENT_CHARACTERS: &str =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
@ -240,7 +241,7 @@ pub(crate) fn org_space<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, char> {
pub(crate) fn org_space_or_line_ending<'s>( pub(crate) fn org_space_or_line_ending<'s>(
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> { ) -> Res<OrgSource<'s>, OrgSource<'s>> {
alt((recognize(one_of(" \t")), org_line_ending))(input) alt((recognize(org_space), org_line_ending))(input)
} }
/// Match a line break or the end of the file. /// Match a line break or the end of the file.
@ -249,3 +250,20 @@ pub(crate) fn org_space_or_line_ending<'s>(
pub(crate) fn org_line_ending<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> { pub(crate) fn org_line_ending<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
alt((line_ending, eof))(input) alt((line_ending, eof))(input)
} }
/// Match the whitespace at the beginning of a line and give it an indentation level.
pub(crate) fn indentation_level<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, (IndentationLevel, OrgSource<'s>)> {
let (remaining, leading_whitespace) = space0(input)?;
let indentation_level = Into::<&str>::into(leading_whitespace)
.chars()
.map(|c| match c {
' ' => 1,
'\t' => context.get_global_settings().tab_width,
_ => unreachable!(),
})
.sum();
Ok((remaining, (indentation_level, leading_whitespace)))
}

View File

@ -10,10 +10,13 @@ pub struct PlainList<'s> {
pub children: Vec<PlainListItem<'s>>, pub children: Vec<PlainListItem<'s>>,
} }
/// The width that something is indented. For example, a single tab character could be a value of 4 or 8.
pub type IndentationLevel = u16;
#[derive(Debug)] #[derive(Debug)]
pub struct PlainListItem<'s> { pub struct PlainListItem<'s> {
pub source: &'s str, pub source: &'s str,
pub indentation: usize, pub indentation: IndentationLevel,
pub bullet: &'s str, pub bullet: &'s str,
pub checkbox: Option<(CheckboxType, &'s str)>, pub checkbox: Option<(CheckboxType, &'s str)>,
pub tag: Vec<Object<'s>>, pub tag: Vec<Object<'s>>,

View File

@ -16,6 +16,7 @@ pub use greater_element::Drawer;
pub use greater_element::DynamicBlock; pub use greater_element::DynamicBlock;
pub use greater_element::FootnoteDefinition; pub use greater_element::FootnoteDefinition;
pub use greater_element::GreaterBlock; pub use greater_element::GreaterBlock;
pub use greater_element::IndentationLevel;
pub use greater_element::NodeProperty; pub use greater_element::NodeProperty;
pub use greater_element::PlainList; pub use greater_element::PlainList;
pub use greater_element::PlainListItem; pub use greater_element::PlainListItem;