diff --git a/src/parser/element.rs b/src/parser/element.rs index 49f543d..b38505c 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,5 +1,7 @@ use super::error::Res; +use super::footnote_definition::footnote_definition; use super::greater_block::greater_block; +use super::greater_element::FootnoteDefinition; use super::greater_element::GreaterBlock; use super::greater_element::PlainList; use super::lesser_element::Paragraph; @@ -16,6 +18,7 @@ pub enum Element<'s> { Paragraph(Paragraph<'s>), PlainList(PlainList<'s>), GreaterBlock(GreaterBlock<'s>), + FootnoteDefinition(FootnoteDefinition<'s>), /// The whitespace that follows an element. /// /// This isn't a real org-mode element. Except for items in plain lists, trailing blank lines belong to the preceding element. It is a separate `Element` in this enum to make parsing easier. @@ -29,6 +32,7 @@ impl<'s> Source<'s> for Element<'s> { Element::PlainList(obj) => obj.source, Element::GreaterBlock(obj) => obj.source, Element::TrailingWhitespace(src) => src, + Element::FootnoteDefinition(obj) => obj.source, } } } @@ -50,8 +54,10 @@ pub fn non_paragraph_element<'r, 's>( ) -> Res<&'s str, Element<'s>> { let plain_list_matcher = parser_with_context!(plain_list)(context); let greater_block_matcher = parser_with_context!(greater_block)(context); + let footnote_definition_matcher = parser_with_context!(footnote_definition)(context); alt(( map(plain_list_matcher, Element::PlainList), map(greater_block_matcher, Element::GreaterBlock), + map(footnote_definition_matcher, Element::FootnoteDefinition), ))(input) } diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs new file mode 100644 index 0000000..3567cc8 --- /dev/null +++ b/src/parser/footnote_definition.rs @@ -0,0 +1,75 @@ +use super::error::Res; +use super::util::WORD_CONSTITUENT_CHARACTERS; +use super::Context; +use crate::parser::element::element; +use crate::parser::greater_element::FootnoteDefinition; +use crate::parser::parser_context::ChainBehavior; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::blank_line; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::util::start_of_line; +use crate::parser::util::whitespace_eof; +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::bytes::complete::tag_no_case; +use nom::bytes::complete::take_while; +use nom::character::complete::digit1; +use nom::character::complete::space0; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many1; +use nom::multi::many_till; +use nom::sequence::tuple; + +#[tracing::instrument(ret, level = "debug")] +pub fn footnote_definition<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, FootnoteDefinition<'s>> { + start_of_line(context, input)?; + // Cannot be indented. + let (remaining, (_lead_in, lbl, _lead_out, _ws)) = + tuple((tag_no_case("[fn:"), label, tag("]"), space0))(input)?; + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::IgnoreParent(Some(&footnote_definition_end)), + })); + let element_matcher = parser_with_context!(element)(&parser_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let (remaining, (children, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; + let footnote_definition_end_matcher = parser_with_context!(footnote_definition_end)(context); + let (remaining, _end) = alt((footnote_definition_end_matcher, whitespace_eof))(remaining)?; + let source = get_consumed(input, remaining); + Ok(( + remaining, + FootnoteDefinition { + source, + label: lbl, + children, + }, + )) +} + +#[tracing::instrument(ret, level = "debug")] +fn label<'s>(input: &'s str) -> Res<&'s str, &'s str> { + alt(( + digit1, + take_while(|c| WORD_CONSTITUENT_CHARACTERS.contains(c) || "-_".contains(c)), + ))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn footnote_definition_end<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let start_of_line_matcher = parser_with_context!(start_of_line)(&context); + recognize(tuple(( + start_of_line_matcher, + verify(many1(blank_line), |lines: &Vec<&str>| lines.len() >= 2), + )))(input) +} diff --git a/src/parser/greater_element.rs b/src/parser/greater_element.rs index 9e828c7..119d03e 100644 --- a/src/parser/greater_element.rs +++ b/src/parser/greater_element.rs @@ -21,3 +21,10 @@ pub struct GreaterBlock<'s> { pub parameters: Option<&'s str>, pub children: Vec>, } + +#[derive(Debug)] +pub struct FootnoteDefinition<'s> { + pub source: &'s str, + pub label: &'s str, + pub children: Vec>, +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fbb3fe7..8f3915e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,6 +1,7 @@ mod document; mod element; mod error; +mod footnote_definition; mod greater_block; mod greater_element; mod lesser_element; diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 02cf02e..215280a 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -27,6 +27,7 @@ use nom::multi::many_till; use nom::sequence::tuple; pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> { + // TODO: Are we handling 2 blank lines causing the end of all plain lists? let (mut remaining, first_item) = plain_list_item(context, input)?; let first_item_indentation = first_item.indentation; let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); diff --git a/src/parser/util.rs b/src/parser/util.rs index 8abbe1a..9e4838e 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,5 +1,11 @@ +use super::error::CustomError; +use super::error::MyError; +use super::error::Res; +use super::parser_context::ContextElement; +use super::Context; use nom::branch::alt; use nom::character::complete::line_ending; +use nom::character::complete::multispace0; use nom::character::complete::none_of; use nom::character::complete::space0; use nom::combinator::eof; @@ -9,11 +15,8 @@ use nom::combinator::recognize; use nom::multi::many0; use nom::sequence::tuple; -use super::error::CustomError; -use super::error::MyError; -use super::error::Res; -use super::parser_context::ContextElement; -use super::Context; +pub const WORD_CONSTITUENT_CHARACTERS: &str = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; /// Check if we are below a section of the given section type regardless of depth pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool { @@ -162,6 +165,11 @@ pub fn regurgitate<'s>(input: &'s str, remaining: &'s str) -> &'s str { } } +#[tracing::instrument(ret, level = "debug")] +pub fn whitespace_eof(input: &str) -> Res<&str, &str> { + recognize(tuple((multispace0, eof)))(input) +} + #[cfg(test)] mod tests { use super::*;