Compare commits

..

5 Commits

Author SHA1 Message Date
Tom Alexander
8450785186
Add test showing we are not handling the odd startup option for headline depth.
Some checks failed
rust-test Build rust-test has failed
rust-build Build rust-build has succeeded
rust-foreign-document-test Build rust-foreign-document-test has failed
2023-09-15 22:08:42 -04:00
Tom Alexander
d443dbd468
Introduce the tab_width setting and give tabs a greater value when counting indentation level. 2023-09-15 21:59:48 -04:00
Tom Alexander
c9ce32c881
Remve redundant org_spaces functions.
Turns out the nom space0/space1 parsers accept tab characters already.
2023-09-15 21:28:40 -04:00
Tom Alexander
85454a0a27
Fix footnote reference function label matcher.
Previously when a label started with a number but contained other characters, this parser would fail because it would not match the entire label.
2023-09-15 21:14:44 -04:00
Tom Alexander
fdebf6dec5
Delete already solved TODO. 2023-09-15 21:08:52 -04:00
11 changed files with 66 additions and 60 deletions

View File

@ -0,0 +1,6 @@
#+STARTUP: odd
* Foo
***** Bar
* Baz
*** Lorem
* Ipsum

View File

@ -2,6 +2,7 @@ use std::collections::BTreeSet;
use super::FileAccessInterface; use super::FileAccessInterface;
use super::LocalFileAccessInterface; use super::LocalFileAccessInterface;
use crate::types::IndentationLevel;
use crate::types::Object; use crate::types::Object;
// TODO: Ultimately, I think we'll need most of this: https://orgmode.org/manual/In_002dbuffer-Settings.html // TODO: Ultimately, I think we'll need most of this: https://orgmode.org/manual/In_002dbuffer-Settings.html
@ -16,6 +17,11 @@ pub struct GlobalSettings<'g, 's> {
/// ///
/// Corresponds to the org-list-allow-alphabetical elisp variable. /// Corresponds to the org-list-allow-alphabetical elisp variable.
pub org_list_allow_alphabetical: bool, pub org_list_allow_alphabetical: bool,
/// How many spaces a tab should be equal to.
///
/// Corresponds to the tab-width elisp variable.
pub tab_width: IndentationLevel,
} }
impl<'g, 's> GlobalSettings<'g, 's> { impl<'g, 's> GlobalSettings<'g, 's> {
@ -28,6 +34,7 @@ impl<'g, 's> GlobalSettings<'g, 's> {
in_progress_todo_keywords: BTreeSet::new(), in_progress_todo_keywords: BTreeSet::new(),
complete_todo_keywords: BTreeSet::new(), complete_todo_keywords: BTreeSet::new(),
org_list_allow_alphabetical: false, org_list_allow_alphabetical: false,
tab_width: 8,
} }
} }
} }

View File

@ -3,6 +3,7 @@ use nom::bytes::complete::is_not;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::character::complete::line_ending; use nom::character::complete::line_ending;
use nom::character::complete::space0; use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::combinator::eof; use nom::combinator::eof;
use nom::combinator::not; use nom::combinator::not;
use nom::combinator::recognize; use nom::combinator::recognize;
@ -12,8 +13,6 @@ use nom::sequence::tuple;
use super::org_source::OrgSource; use super::org_source::OrgSource;
use super::util::org_line_ending; use super::util::org_line_ending;
use super::util::org_spaces0;
use super::util::org_spaces1;
use crate::context::parser_with_context; use crate::context::parser_with_context;
use crate::context::RefContext; use crate::context::RefContext;
use crate::error::Res; use crate::error::Res;
@ -51,7 +50,7 @@ fn fixed_width_area_line<'b, 'g, 'r, 's>(
let (remaining, _indent) = space0(input)?; let (remaining, _indent) = space0(input)?;
let (remaining, _) = tuple(( let (remaining, _) = tuple((
tag(":"), tag(":"),
alt((recognize(tuple((org_spaces1, is_not("\r\n")))), org_spaces0)), alt((recognize(tuple((space1, is_not("\r\n")))), space0)),
org_line_ending, org_line_ending,
))(remaining)?; ))(remaining)?;
let source = get_consumed(input, remaining); let source = get_consumed(input, remaining);

View File

@ -2,7 +2,6 @@ use nom::branch::alt;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case; use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_while; use nom::bytes::complete::take_while;
use nom::character::complete::digit1;
use nom::character::complete::space0; use nom::character::complete::space0;
use nom::combinator::opt; use nom::combinator::opt;
use nom::combinator::recognize; use nom::combinator::recognize;
@ -94,10 +93,7 @@ pub(crate) fn footnote_definition<'b, 'g, 'r, 's>(
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn label<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> { pub(crate) fn label<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
alt(( take_while(|c| WORD_CONSTITUENT_CHARACTERS.contains(c) || "-_".contains(c))(input)
digit1,
take_while(|c| WORD_CONSTITUENT_CHARACTERS.contains(c) || "-_".contains(c)),
))(input)
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]

View File

@ -127,7 +127,6 @@ fn parameters<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
} }
fn greater_block_end<'c>(name: &'c str) -> impl ContextMatcher + 'c { fn greater_block_end<'c>(name: &'c str) -> impl ContextMatcher + 'c {
// TODO: Can this be done without making an owned copy?
move |context, input: OrgSource<'_>| _greater_block_end(context, input, name) move |context, input: OrgSource<'_>| _greater_block_end(context, input, name)
} }

View File

@ -1,6 +1,7 @@
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::character::complete::anychar; use nom::character::complete::anychar;
use nom::character::complete::space0;
use nom::character::complete::space1; use nom::character::complete::space1;
use nom::combinator::map; use nom::combinator::map;
use nom::combinator::not; use nom::combinator::not;
@ -20,8 +21,6 @@ use super::util::get_consumed;
use super::util::org_line_ending; use super::util::org_line_ending;
use super::util::org_space; use super::util::org_space;
use super::util::org_space_or_line_ending; use super::util::org_space_or_line_ending;
use super::util::org_spaces0;
use super::util::org_spaces1;
use super::util::start_of_line; use super::util::start_of_line;
use crate::context::parser_with_context; use crate::context::parser_with_context;
use crate::context::ContextElement; use crate::context::ContextElement;
@ -134,27 +133,27 @@ fn headline<'b, 'g, 'r, 's>(
))(input)?; ))(input)?;
let (remaining, maybe_todo_keyword) = opt(tuple(( let (remaining, maybe_todo_keyword) = opt(tuple((
org_spaces1, space1,
parser_with_context!(heading_keyword)(&parser_context), parser_with_context!(heading_keyword)(&parser_context),
peek(org_space_or_line_ending), peek(org_space_or_line_ending),
)))(remaining)?; )))(remaining)?;
let (remaining, maybe_priority) = opt(tuple((org_spaces1, priority_cookie)))(remaining)?; let (remaining, maybe_priority) = opt(tuple((space1, priority_cookie)))(remaining)?;
let (remaining, maybe_comment) = opt(tuple(( let (remaining, maybe_comment) = opt(tuple((
org_spaces1, space1,
tag("COMMENT"), tag("COMMENT"),
peek(org_space_or_line_ending), peek(org_space_or_line_ending),
)))(remaining)?; )))(remaining)?;
let (remaining, maybe_title) = opt(tuple(( let (remaining, maybe_title) = opt(tuple((
org_spaces1, space1,
many1(parser_with_context!(standard_set_object)(&parser_context)), many1(parser_with_context!(standard_set_object)(&parser_context)),
)))(remaining)?; )))(remaining)?;
let (remaining, maybe_tags) = opt(tuple((org_spaces0, tags)))(remaining)?; let (remaining, maybe_tags) = opt(tuple((space0, tags)))(remaining)?;
let (remaining, _) = tuple((org_spaces0, org_line_ending))(remaining)?; let (remaining, _) = tuple((space0, org_line_ending))(remaining)?;
Ok(( Ok((
remaining, remaining,
@ -180,11 +179,7 @@ fn headline_title_end<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>, _context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> { ) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(tuple(( recognize(tuple((space0, opt(tuple((tags, space0))), org_line_ending)))(input)
org_spaces0,
opt(tuple((tags, org_spaces0))),
org_line_ending,
)))(input)
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]

View File

@ -22,6 +22,7 @@ use super::element_parser::element;
use super::object_parser::standard_set_object; use super::object_parser::standard_set_object;
use super::org_source::OrgSource; use super::org_source::OrgSource;
use super::util::include_input; use super::util::include_input;
use super::util::indentation_level;
use super::util::non_whitespace_character; use super::util::non_whitespace_character;
use crate::context::parser_with_context; use crate::context::parser_with_context;
use crate::context::ContextElement; use crate::context::ContextElement;
@ -39,6 +40,7 @@ use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::org_space; use crate::parser::util::org_space;
use crate::parser::util::start_of_line; use crate::parser::util::start_of_line;
use crate::types::CheckboxType; use crate::types::CheckboxType;
use crate::types::IndentationLevel;
use crate::types::Object; use crate::types::Object;
use crate::types::PlainList; use crate::types::PlainList;
use crate::types::PlainListItem; use crate::types::PlainListItem;
@ -87,7 +89,7 @@ pub(crate) fn plain_list<'b, 'g, 'r, 's>(
let parser_context = parser_context.with_additional_node(&contexts[2]); let parser_context = parser_context.with_additional_node(&contexts[2]);
// children stores tuple of (input string, parsed object) so we can re-parse the final item // children stores tuple of (input string, parsed object) so we can re-parse the final item
let mut children = Vec::new(); let mut children = Vec::new();
let mut first_item_indentation: Option<usize> = None; let mut first_item_indentation: Option<IndentationLevel> = None;
let mut remaining = input; let mut remaining = input;
// The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here: // The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
@ -148,9 +150,7 @@ fn plain_list_item<'b, 'g, 'r, 's>(
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainListItem<'s>> { ) -> Res<OrgSource<'s>, PlainListItem<'s>> {
start_of_line(input)?; start_of_line(input)?;
let (remaining, leading_whitespace) = space0(input)?; let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
let (remaining, bull) = verify( let (remaining, bull) = verify(
parser_with_context!(bullet)(context), parser_with_context!(bullet)(context),
|bull: &OrgSource<'_>| Into::<&str>::into(bull) != "*" || indent_level > 0, |bull: &OrgSource<'_>| Into::<&str>::into(bull) != "*" || indent_level > 0,
@ -287,7 +287,7 @@ fn plain_list_end<'b, 'g, 'r, 's>(
)))(input) )))(input)
} }
const fn plain_list_item_end(indent_level: usize) -> impl ContextMatcher { const fn plain_list_item_end(indent_level: IndentationLevel) -> impl ContextMatcher {
let line_indented_lte_matcher = line_indented_lte(indent_level); let line_indented_lte_matcher = line_indented_lte(indent_level);
move |context, input: OrgSource<'_>| { move |context, input: OrgSource<'_>| {
_plain_list_item_end(context, input, &line_indented_lte_matcher) _plain_list_item_end(context, input, &line_indented_lte_matcher)
@ -310,20 +310,23 @@ fn _plain_list_item_end<'b, 'g, 'r, 's>(
)))(input) )))(input)
} }
const fn line_indented_lte(indent_level: usize) -> impl ContextMatcher { const fn line_indented_lte(indent_level: IndentationLevel) -> impl ContextMatcher {
move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level) move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level)
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _line_indented_lte<'b, 'g, 'r, 's>( fn _line_indented_lte<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>, context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>, input: OrgSource<'s>,
indent_level: usize, indent_level: IndentationLevel,
) -> Res<OrgSource<'s>, OrgSource<'s>> { ) -> Res<OrgSource<'s>, OrgSource<'s>> {
let matched = recognize(verify( let matched = recognize(verify(
tuple((space0::<OrgSource<'_>, _>, non_whitespace_character)), tuple((
parser_with_context!(indentation_level)(context),
non_whitespace_character,
)),
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|(_space0, _anychar)| _space0.len() <= indent_level, |((indentation_level, _leading_whitespace), _anychar)| *indentation_level <= indent_level,
))(input)?; ))(input)?;
Ok(matched) Ok(matched)

View File

@ -1,6 +1,8 @@
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case; use nom::bytes::complete::tag_no_case;
use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::multi::many1; use nom::multi::many1;
use nom::sequence::tuple; use nom::sequence::tuple;
@ -8,8 +10,6 @@ use super::org_source::OrgSource;
use super::timestamp::timestamp; use super::timestamp::timestamp;
use super::util::maybe_consume_trailing_whitespace_if_not_exiting; use super::util::maybe_consume_trailing_whitespace_if_not_exiting;
use super::util::org_line_ending; use super::util::org_line_ending;
use super::util::org_spaces0;
use super::util::org_spaces1;
use crate::context::parser_with_context; use crate::context::parser_with_context;
use crate::context::RefContext; use crate::context::RefContext;
use crate::error::Res; use crate::error::Res;
@ -23,10 +23,10 @@ pub(crate) fn planning<'b, 'g, 'r, 's>(
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Planning<'s>> { ) -> Res<OrgSource<'s>, Planning<'s>> {
start_of_line(input)?; start_of_line(input)?;
let (remaining, _leading_whitespace) = org_spaces0(input)?; let (remaining, _leading_whitespace) = space0(input)?;
let (remaining, _planning_parameters) = let (remaining, _planning_parameters) =
many1(parser_with_context!(planning_parameter)(context))(remaining)?; many1(parser_with_context!(planning_parameter)(context))(remaining)?;
let (remaining, _trailing_ws) = tuple((org_spaces0, org_line_ending))(remaining)?; let (remaining, _trailing_ws) = tuple((space0, org_line_ending))(remaining)?;
let (remaining, _trailing_ws) = let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
@ -50,7 +50,7 @@ fn planning_parameter<'b, 'g, 'r, 's>(
tag_no_case("SCHEDULED"), tag_no_case("SCHEDULED"),
tag_no_case("CLOSED"), tag_no_case("CLOSED"),
))(input)?; ))(input)?;
let (remaining, _gap) = tuple((tag(":"), org_spaces1))(remaining)?; let (remaining, _gap) = tuple((tag(":"), space1))(remaining)?;
let (remaining, _timestamp) = timestamp(context, remaining)?; let (remaining, _timestamp) = timestamp(context, remaining)?;
let source = get_consumed(input, remaining); let source = get_consumed(input, remaining);
Ok((remaining, source)) Ok((remaining, source))

View File

@ -1,5 +1,4 @@
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::is_a;
use nom::character::complete::anychar; use nom::character::complete::anychar;
use nom::character::complete::line_ending; use nom::character::complete::line_ending;
use nom::character::complete::none_of; use nom::character::complete::none_of;
@ -10,11 +9,9 @@ use nom::combinator::not;
use nom::combinator::opt; use nom::combinator::opt;
use nom::combinator::peek; use nom::combinator::peek;
use nom::combinator::recognize; use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many0; use nom::multi::many0;
use nom::multi::many_till; use nom::multi::many_till;
use nom::sequence::tuple; use nom::sequence::tuple;
use nom::Slice;
use super::org_source::OrgSource; use super::org_source::OrgSource;
use crate::context::parser_with_context; use crate::context::parser_with_context;
@ -23,6 +20,7 @@ use crate::context::RefContext;
use crate::error::CustomError; use crate::error::CustomError;
use crate::error::MyError; use crate::error::MyError;
use crate::error::Res; use crate::error::Res;
use crate::types::IndentationLevel;
pub(crate) const WORD_CONSTITUENT_CHARACTERS: &str = pub(crate) const WORD_CONSTITUENT_CHARACTERS: &str =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
@ -243,25 +241,7 @@ pub(crate) fn org_space<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, char> {
pub(crate) fn org_space_or_line_ending<'s>( pub(crate) fn org_space_or_line_ending<'s>(
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> { ) -> Res<OrgSource<'s>, OrgSource<'s>> {
alt((recognize(one_of(" \t")), org_line_ending))(input) alt((recognize(org_space), org_line_ending))(input)
}
/// Match as many spaces and tabs as possible. No minimum match.
///
/// In org-mode syntax, spaces and tabs are interchangeable.
pub(crate) fn org_spaces0<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
let found = is_a(" \t")(input);
if found.is_ok() {
return found;
}
Ok((input, input.slice(..0)))
}
/// Match as many spaces and tabs as possible. Minimum 1 character.
///
/// In org-mode syntax, spaces and tabs are interchangeable.
pub(crate) fn org_spaces1<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
verify(is_a(" \t"), |res: &OrgSource<'_>| res.len() > 0)(input)
} }
/// Match a line break or the end of the file. /// Match a line break or the end of the file.
@ -270,3 +250,20 @@ pub(crate) fn org_spaces1<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSou
pub(crate) fn org_line_ending<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> { pub(crate) fn org_line_ending<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
alt((line_ending, eof))(input) alt((line_ending, eof))(input)
} }
/// Match the whitespace at the beginning of a line and give it an indentation level.
pub(crate) fn indentation_level<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, (IndentationLevel, OrgSource<'s>)> {
let (remaining, leading_whitespace) = space0(input)?;
let indentation_level = Into::<&str>::into(leading_whitespace)
.chars()
.map(|c| match c {
' ' => 1,
'\t' => context.get_global_settings().tab_width,
_ => unreachable!(),
})
.sum();
Ok((remaining, (indentation_level, leading_whitespace)))
}

View File

@ -10,10 +10,13 @@ pub struct PlainList<'s> {
pub children: Vec<PlainListItem<'s>>, pub children: Vec<PlainListItem<'s>>,
} }
/// The width that something is indented. For example, a single tab character could be a value of 4 or 8.
pub type IndentationLevel = u16;
#[derive(Debug)] #[derive(Debug)]
pub struct PlainListItem<'s> { pub struct PlainListItem<'s> {
pub source: &'s str, pub source: &'s str,
pub indentation: usize, pub indentation: IndentationLevel,
pub bullet: &'s str, pub bullet: &'s str,
pub checkbox: Option<(CheckboxType, &'s str)>, pub checkbox: Option<(CheckboxType, &'s str)>,
pub tag: Vec<Object<'s>>, pub tag: Vec<Object<'s>>,

View File

@ -16,6 +16,7 @@ pub use greater_element::Drawer;
pub use greater_element::DynamicBlock; pub use greater_element::DynamicBlock;
pub use greater_element::FootnoteDefinition; pub use greater_element::FootnoteDefinition;
pub use greater_element::GreaterBlock; pub use greater_element::GreaterBlock;
pub use greater_element::IndentationLevel;
pub use greater_element::NodeProperty; pub use greater_element::NodeProperty;
pub use greater_element::PlainList; pub use greater_element::PlainList;
pub use greater_element::PlainListItem; pub use greater_element::PlainListItem;