Add test showing we are not handling the odd startup option for headline depth.

Introduce the tab_width setting and give tabs a greater value when counting indentation level.
Remve redundant org_spaces functions.
2023-09-15 22:08:42 -04:00 · 2023-09-15 21:59:48 -04:00 · 2023-09-15 21:28:40 -04:00 · 2023-09-15 21:14:44 -04:00 · 2023-09-15 21:08:52 -04:00
11 changed files with 66 additions and 60 deletions
--- a/org_mode_samples/sections_and_headings/odd_level_depth.org
+++ b/org_mode_samples/sections_and_headings/odd_level_depth.org
@ -0,0 +1,6 @@
 #+STARTUP: odd
 * Foo
 ***** Bar
 * Baz
 *** Lorem
 * Ipsum
--- a/src/context/global_settings.rs
+++ b/src/context/global_settings.rs
@ -2,6 +2,7 @@ use std::collections::BTreeSet;
 use super::FileAccessInterface;
 use super::LocalFileAccessInterface;
 use crate::types::IndentationLevel;
 use crate::types::Object;
 // TODO: Ultimately, I think we'll need most of this: https://orgmode.org/manual/In_002dbuffer-Settings.html
@ -16,6 +17,11 @@ pub struct GlobalSettings<'g, 's> {
    ///
    /// Corresponds to the org-list-allow-alphabetical elisp variable.
    pub org_list_allow_alphabetical: bool,
    /// How many spaces a tab should be equal to.
    ///
    /// Corresponds to the tab-width elisp variable.
    pub tab_width: IndentationLevel,
 }
 impl<'g, 's> GlobalSettings<'g, 's> {
@ -28,6 +34,7 @@ impl<'g, 's> GlobalSettings<'g, 's> {
            in_progress_todo_keywords: BTreeSet::new(),
            complete_todo_keywords: BTreeSet::new(),
            org_list_allow_alphabetical: false,
            tab_width: 8,
        }
    }
 }
--- a/src/parser/fixed_width_area.rs
+++ b/src/parser/fixed_width_area.rs
@ -3,6 +3,7 @@ use nom::bytes::complete::is_not;
 use nom::bytes::complete::tag;
 use nom::character::complete::line_ending;
 use nom::character::complete::space0;
 use nom::character::complete::space1;
 use nom::combinator::eof;
 use nom::combinator::not;
 use nom::combinator::recognize;
@ -12,8 +13,6 @@ use nom::sequence::tuple;
 use super::org_source::OrgSource;
 use super::util::org_line_ending;
 use super::util::org_spaces0;
 use super::util::org_spaces1;
 use crate::context::parser_with_context;
 use crate::context::RefContext;
 use crate::error::Res;
@ -51,7 +50,7 @@ fn fixed_width_area_line<'b, 'g, 'r, 's>(
    let (remaining, _indent) = space0(input)?;
    let (remaining, _) = tuple((
        tag(":"),
-        alt((recognize(tuple((org_spaces1, is_not("\r\n")))), org_spaces0)),
+        alt((recognize(tuple((space1, is_not("\r\n")))), space0)),
        org_line_ending,
    ))(remaining)?;
    let source = get_consumed(input, remaining);
--- a/src/parser/footnote_definition.rs
+++ b/src/parser/footnote_definition.rs
@ -2,7 +2,6 @@ use nom::branch::alt;
 use nom::bytes::complete::tag;
 use nom::bytes::complete::tag_no_case;
 use nom::bytes::complete::take_while;
 use nom::character::complete::digit1;
 use nom::character::complete::space0;
 use nom::combinator::opt;
 use nom::combinator::recognize;
@ -94,10 +93,7 @@ pub(crate) fn footnote_definition<'b, 'g, 'r, 's>(
 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
 pub(crate) fn label<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
-    alt((
+    take_while(|c| WORD_CONSTITUENT_CHARACTERS.contains(c) || "-_".contains(c))(input)
        digit1,
        take_while(|c| WORD_CONSTITUENT_CHARACTERS.contains(c) || "-_".contains(c)),
    ))(input)
 }
 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
--- a/src/parser/greater_block.rs
+++ b/src/parser/greater_block.rs
@ -127,7 +127,6 @@ fn parameters<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
 }
 fn greater_block_end<'c>(name: &'c str) -> impl ContextMatcher + 'c {
    // TODO: Can this be done without making an owned copy?
    move |context, input: OrgSource<'_>| _greater_block_end(context, input, name)
 }
--- a/src/parser/headline.rs
+++ b/src/parser/headline.rs
@ -1,6 +1,7 @@
 use nom::branch::alt;
 use nom::bytes::complete::tag;
 use nom::character::complete::anychar;
 use nom::character::complete::space0;
 use nom::character::complete::space1;
 use nom::combinator::map;
 use nom::combinator::not;
@ -20,8 +21,6 @@ use super::util::get_consumed;
 use super::util::org_line_ending;
 use super::util::org_space;
 use super::util::org_space_or_line_ending;
 use super::util::org_spaces0;
 use super::util::org_spaces1;
 use super::util::start_of_line;
 use crate::context::parser_with_context;
 use crate::context::ContextElement;
@ -134,27 +133,27 @@ fn headline<'b, 'g, 'r, 's>(
    ))(input)?;
    let (remaining, maybe_todo_keyword) = opt(tuple((
-        org_spaces1,
+        space1,
        parser_with_context!(heading_keyword)(&parser_context),
        peek(org_space_or_line_ending),
    )))(remaining)?;
-    let (remaining, maybe_priority) = opt(tuple((org_spaces1, priority_cookie)))(remaining)?;
+    let (remaining, maybe_priority) = opt(tuple((space1, priority_cookie)))(remaining)?;
    let (remaining, maybe_comment) = opt(tuple((
-        org_spaces1,
+        space1,
        tag("COMMENT"),
        peek(org_space_or_line_ending),
    )))(remaining)?;
    let (remaining, maybe_title) = opt(tuple((
-        org_spaces1,
+        space1,
        many1(parser_with_context!(standard_set_object)(&parser_context)),
    )))(remaining)?;
-    let (remaining, maybe_tags) = opt(tuple((org_spaces0, tags)))(remaining)?;
+    let (remaining, maybe_tags) = opt(tuple((space0, tags)))(remaining)?;
-    let (remaining, _) = tuple((org_spaces0, org_line_ending))(remaining)?;
+    let (remaining, _) = tuple((space0, org_line_ending))(remaining)?;
    Ok((
        remaining,
@ -180,11 +179,7 @@ fn headline_title_end<'b, 'g, 'r, 's>(
    _context: RefContext<'b, 'g, 'r, 's>,
    input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, OrgSource<'s>> {
-    recognize(tuple((
+    recognize(tuple((space0, opt(tuple((tags, space0))), org_line_ending)))(input)
        org_spaces0,
        opt(tuple((tags, org_spaces0))),
        org_line_ending,
    )))(input)
 }
 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
--- a/src/parser/plain_list.rs
+++ b/src/parser/plain_list.rs
@ -22,6 +22,7 @@ use super::element_parser::element;
 use super::object_parser::standard_set_object;
 use super::org_source::OrgSource;
 use super::util::include_input;
 use super::util::indentation_level;
 use super::util::non_whitespace_character;
 use crate::context::parser_with_context;
 use crate::context::ContextElement;
@ -39,6 +40,7 @@ use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
 use crate::parser::util::org_space;
 use crate::parser::util::start_of_line;
 use crate::types::CheckboxType;
 use crate::types::IndentationLevel;
 use crate::types::Object;
 use crate::types::PlainList;
 use crate::types::PlainListItem;
@ -87,7 +89,7 @@ pub(crate) fn plain_list<'b, 'g, 'r, 's>(
    let parser_context = parser_context.with_additional_node(&contexts[2]);
    // children stores tuple of (input string, parsed object) so we can re-parse the final item
    let mut children = Vec::new();
-    let mut first_item_indentation: Option<usize> = None;
+    let mut first_item_indentation: Option<IndentationLevel> = None;
    let mut remaining = input;
    // The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
@ -148,9 +150,7 @@ fn plain_list_item<'b, 'g, 'r, 's>(
    input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, PlainListItem<'s>> {
    start_of_line(input)?;
-    let (remaining, leading_whitespace) = space0(input)?;
+    let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?;
    // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
    let indent_level = leading_whitespace.len();
    let (remaining, bull) = verify(
        parser_with_context!(bullet)(context),
        |bull: &OrgSource<'_>| Into::<&str>::into(bull) != "*" || indent_level > 0,
@ -287,7 +287,7 @@ fn plain_list_end<'b, 'g, 'r, 's>(
    )))(input)
 }
-const fn plain_list_item_end(indent_level: usize) -> impl ContextMatcher {
+const fn plain_list_item_end(indent_level: IndentationLevel) -> impl ContextMatcher {
    let line_indented_lte_matcher = line_indented_lte(indent_level);
    move |context, input: OrgSource<'_>| {
        _plain_list_item_end(context, input, &line_indented_lte_matcher)
@ -310,20 +310,23 @@ fn _plain_list_item_end<'b, 'g, 'r, 's>(
    )))(input)
 }
-const fn line_indented_lte(indent_level: usize) -> impl ContextMatcher {
+const fn line_indented_lte(indent_level: IndentationLevel) -> impl ContextMatcher {
    move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level)
 }
 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
 fn _line_indented_lte<'b, 'g, 'r, 's>(
-    _context: RefContext<'b, 'g, 'r, 's>,
+    context: RefContext<'b, 'g, 'r, 's>,
    input: OrgSource<'s>,
-    indent_level: usize,
+    indent_level: IndentationLevel,
 ) -> Res<OrgSource<'s>, OrgSource<'s>> {
    let matched = recognize(verify(
-        tuple((space0::<OrgSource<'_>, _>, non_whitespace_character)),
+        tuple((
            parser_with_context!(indentation_level)(context),
            non_whitespace_character,
        )),
        // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
-        |(_space0, _anychar)| _space0.len() <= indent_level,
+        |((indentation_level, _leading_whitespace), _anychar)| *indentation_level <= indent_level,
    ))(input)?;
    Ok(matched)
--- a/src/parser/planning.rs
+++ b/src/parser/planning.rs
@ -1,6 +1,8 @@
 use nom::branch::alt;
 use nom::bytes::complete::tag;
 use nom::bytes::complete::tag_no_case;
 use nom::character::complete::space0;
 use nom::character::complete::space1;
 use nom::multi::many1;
 use nom::sequence::tuple;
@ -8,8 +10,6 @@ use super::org_source::OrgSource;
 use super::timestamp::timestamp;
 use super::util::maybe_consume_trailing_whitespace_if_not_exiting;
 use super::util::org_line_ending;
 use super::util::org_spaces0;
 use super::util::org_spaces1;
 use crate::context::parser_with_context;
 use crate::context::RefContext;
 use crate::error::Res;
@ -23,10 +23,10 @@ pub(crate) fn planning<'b, 'g, 'r, 's>(
    input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, Planning<'s>> {
    start_of_line(input)?;
-    let (remaining, _leading_whitespace) = org_spaces0(input)?;
+    let (remaining, _leading_whitespace) = space0(input)?;
    let (remaining, _planning_parameters) =
        many1(parser_with_context!(planning_parameter)(context))(remaining)?;
-    let (remaining, _trailing_ws) = tuple((org_spaces0, org_line_ending))(remaining)?;
+    let (remaining, _trailing_ws) = tuple((space0, org_line_ending))(remaining)?;
    let (remaining, _trailing_ws) =
        maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
@ -50,7 +50,7 @@ fn planning_parameter<'b, 'g, 'r, 's>(
        tag_no_case("SCHEDULED"),
        tag_no_case("CLOSED"),
    ))(input)?;
-    let (remaining, _gap) = tuple((tag(":"), org_spaces1))(remaining)?;
+    let (remaining, _gap) = tuple((tag(":"), space1))(remaining)?;
    let (remaining, _timestamp) = timestamp(context, remaining)?;
    let source = get_consumed(input, remaining);
    Ok((remaining, source))
--- a/src/parser/util.rs
+++ b/src/parser/util.rs
@ -1,5 +1,4 @@
 use nom::branch::alt;
 use nom::bytes::complete::is_a;
 use nom::character::complete::anychar;
 use nom::character::complete::line_ending;
 use nom::character::complete::none_of;
@ -10,11 +9,9 @@ use nom::combinator::not;
 use nom::combinator::opt;
 use nom::combinator::peek;
 use nom::combinator::recognize;
 use nom::combinator::verify;
 use nom::multi::many0;
 use nom::multi::many_till;
 use nom::sequence::tuple;
 use nom::Slice;
 use super::org_source::OrgSource;
 use crate::context::parser_with_context;
@ -23,6 +20,7 @@ use crate::context::RefContext;
 use crate::error::CustomError;
 use crate::error::MyError;
 use crate::error::Res;
 use crate::types::IndentationLevel;
 pub(crate) const WORD_CONSTITUENT_CHARACTERS: &str =
    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
@ -243,25 +241,7 @@ pub(crate) fn org_space<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, char> {
 pub(crate) fn org_space_or_line_ending<'s>(
    input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, OrgSource<'s>> {
-    alt((recognize(one_of(" \t")), org_line_ending))(input)
+    alt((recognize(org_space), org_line_ending))(input)
 }
 /// Match as many spaces and tabs as possible. No minimum match.
 ///
 /// In org-mode syntax, spaces and tabs are interchangeable.
 pub(crate) fn org_spaces0<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
    let found = is_a(" \t")(input);
    if found.is_ok() {
        return found;
    }
    Ok((input, input.slice(..0)))
 }
 /// Match as many spaces and tabs as possible. Minimum 1 character.
 ///
 /// In org-mode syntax, spaces and tabs are interchangeable.
 pub(crate) fn org_spaces1<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
    verify(is_a(" \t"), |res: &OrgSource<'_>| res.len() > 0)(input)
 }
 /// Match a line break or the end of the file.
@ -270,3 +250,20 @@ pub(crate) fn org_spaces1<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSou
 pub(crate) fn org_line_ending<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
    alt((line_ending, eof))(input)
 }
 /// Match the whitespace at the beginning of a line and give it an indentation level.
 pub(crate) fn indentation_level<'b, 'g, 'r, 's>(
    context: RefContext<'b, 'g, 'r, 's>,
    input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, (IndentationLevel, OrgSource<'s>)> {
    let (remaining, leading_whitespace) = space0(input)?;
    let indentation_level = Into::<&str>::into(leading_whitespace)
        .chars()
        .map(|c| match c {
            ' ' => 1,
            '\t' => context.get_global_settings().tab_width,
            _ => unreachable!(),
        })
        .sum();
    Ok((remaining, (indentation_level, leading_whitespace)))
 }
--- a/src/types/greater_element.rs
+++ b/src/types/greater_element.rs
@ -10,10 +10,13 @@ pub struct PlainList<'s> {
    pub children: Vec<PlainListItem<'s>>,
 }
 /// The width that something is indented. For example, a single tab character could be a value of 4 or 8.
 pub type IndentationLevel = u16;
 #[derive(Debug)]
 pub struct PlainListItem<'s> {
    pub source: &'s str,
-    pub indentation: usize,
+    pub indentation: IndentationLevel,
    pub bullet: &'s str,
    pub checkbox: Option<(CheckboxType, &'s str)>,
    pub tag: Vec<Object<'s>>,
--- a/src/types/mod.rs
+++ b/src/types/mod.rs
@ -16,6 +16,7 @@ pub use greater_element::Drawer;
 pub use greater_element::DynamicBlock;
 pub use greater_element::FootnoteDefinition;
 pub use greater_element::GreaterBlock;
 pub use greater_element::IndentationLevel;
 pub use greater_element::NodeProperty;
 pub use greater_element::PlainList;
 pub use greater_element::PlainListItem;
Author	SHA1	Message	Date
Tom Alexander	8450785186	Add test showing we are not handling the odd startup option for headline depth. Some checks failed rust-test Build rust-test has failed Details rust-build Build rust-build has succeeded Details rust-foreign-document-test Build rust-foreign-document-test has failed Details	2023-09-15 22:08:42 -04:00
Tom Alexander	d443dbd468	Introduce the tab_width setting and give tabs a greater value when counting indentation level.	2023-09-15 21:59:48 -04:00
Tom Alexander	c9ce32c881	Remve redundant org_spaces functions. Turns out the nom space0/space1 parsers accept tab characters already.	2023-09-15 21:28:40 -04:00
Tom Alexander	85454a0a27	Fix footnote reference function label matcher. Previously when a label started with a number but contained other characters, this parser would fail because it would not match the entire label.	2023-09-15 21:14:44 -04:00
Tom Alexander	fdebf6dec5	Delete already solved TODO.	2023-09-15 21:08:52 -04:00