organic/src/parser/document.rs

402 lines
14 KiB
Rust
Raw Normal View History

2023-03-23 23:35:32 +00:00
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
2023-03-23 23:53:20 +00:00
use nom::character::complete::line_ending;
use nom::character::complete::space0;
2023-03-23 23:53:20 +00:00
use nom::character::complete::space1;
use nom::combinator::eof;
2023-03-24 21:19:46 +00:00
use nom::combinator::map;
2023-03-24 00:12:42 +00:00
use nom::combinator::not;
2023-03-24 21:34:56 +00:00
use nom::combinator::opt;
2023-03-23 23:35:32 +00:00
use nom::combinator::recognize;
2023-03-24 21:19:46 +00:00
use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many1;
2023-03-23 23:35:32 +00:00
use nom::multi::many1_count;
use nom::multi::many_till;
use nom::multi::separated_list1;
2023-03-23 23:35:32 +00:00
use nom::sequence::tuple;
2023-03-23 21:51:49 +00:00
use super::element::Element;
use super::object::Object;
use super::org_source::convert_error;
use super::org_source::OrgSource;
2023-03-23 23:35:32 +00:00
use super::parser_with_context::parser_with_context;
2023-03-23 21:51:49 +00:00
use super::source::Source;
use super::token::AllTokensIterator;
use super::token::Token;
use super::util::exit_matcher_parser;
2023-03-25 15:25:10 +00:00
use super::util::get_consumed;
2023-03-25 18:10:22 +00:00
use super::util::start_of_line;
2023-03-23 23:35:32 +00:00
use super::Context;
use crate::error::Res;
use crate::parser::comment::comment;
use crate::parser::element_parser::element;
use crate::parser::exiting::ExitClass;
use crate::parser::object_parser::standard_set_object;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::planning::planning;
use crate::parser::property_drawer::property_drawer;
use crate::parser::util::blank_line;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
2022-10-15 00:17:48 +00:00
2023-03-23 21:51:49 +00:00
#[derive(Debug)]
pub struct Document<'s> {
pub source: &'s str,
pub zeroth_section: Option<Section<'s>>,
pub children: Vec<Heading<'s>>,
}
#[derive(Debug)]
pub struct Heading<'s> {
pub source: &'s str,
2023-03-24 21:19:46 +00:00
pub stars: usize,
pub todo_keyword: Option<&'s str>,
// TODO: add todo-type enum
2023-03-25 16:18:47 +00:00
pub title: Vec<Object<'s>>,
2023-08-25 10:20:06 +00:00
pub tags: Vec<&'s str>,
2023-03-23 21:51:49 +00:00
pub children: Vec<DocumentElement<'s>>,
}
#[derive(Debug)]
pub struct Section<'s> {
pub source: &'s str,
pub children: Vec<Element<'s>>,
}
#[derive(Debug)]
pub enum DocumentElement<'s> {
Heading(Heading<'s>),
Section(Section<'s>),
}
2023-03-23 21:51:49 +00:00
impl<'s> Source<'s> for Document<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
2022-12-18 09:22:28 +00:00
2023-03-23 21:51:49 +00:00
impl<'s> Source<'s> for DocumentElement<'s> {
fn get_source(&'s self) -> &'s str {
match self {
DocumentElement::Heading(obj) => obj.source,
DocumentElement::Section(obj) => obj.source,
}
}
}
2023-03-23 21:59:39 +00:00
impl<'s> Source<'s> for Section<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
impl<'s> Source<'s> for Heading<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
2023-08-11 00:04:59 +00:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
2023-03-24 00:12:42 +00:00
#[allow(dead_code)]
2023-03-23 21:59:39 +00:00
pub fn document(input: &str) -> Res<&str, Document> {
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let wrapped_input = OrgSource::new(input);
let (remaining, document) = _document(&initial_context, wrapped_input)
.map(|(rem, out)| (Into::<&str>::into(rem), out))
.map_err(convert_error)?;
2023-07-14 23:06:58 +00:00
{
// If there are radio targets in this document then we need to parse the entire document again with the knowledge of the radio targets.
let all_radio_targets: Vec<&Vec<Object<'_>>> = document
.iter_tokens()
.filter_map(|tkn| match tkn {
Token::Object(obj) => Some(obj),
_ => None,
})
.filter_map(|obj| match obj {
Object::RadioTarget(rt) => Some(rt),
_ => None,
})
.map(|rt| &rt.children)
.collect();
if !all_radio_targets.is_empty() {
let initial_context = initial_context
2023-07-14 23:06:58 +00:00
.with_additional_node(ContextElement::RadioTarget(all_radio_targets));
let (remaining, document) = _document(&initial_context, wrapped_input)
.map(|(rem, out)| (Into::<&str>::into(rem), out))
.map_err(convert_error)?;
return Ok((remaining.into(), document));
2023-07-14 23:06:58 +00:00
}
}
Ok((remaining.into(), document))
2023-07-14 23:06:58 +00:00
}
2023-08-11 00:04:59 +00:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _document<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Document<'s>> {
2023-07-14 23:06:58 +00:00
let zeroth_section_matcher = parser_with_context!(zeroth_section)(context);
let heading_matcher = parser_with_context!(heading(0))(context);
let (remaining, _blank_lines) = many0(blank_line)(input)?;
let (remaining, zeroth_section) = opt(zeroth_section_matcher)(remaining)?;
let (remaining, children) = many0(heading_matcher)(remaining)?;
2023-03-24 21:34:56 +00:00
let source = get_consumed(input, remaining);
Ok((
remaining,
Document {
source: source.into(),
2023-03-24 21:34:56 +00:00
zeroth_section,
children,
},
))
2023-03-23 21:59:39 +00:00
}
2023-03-23 23:35:32 +00:00
2023-08-11 00:04:59 +00:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn zeroth_section<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Section<'s>> {
// TODO: The zeroth section is specialized so it probably needs its own parser
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::Context("section"))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Document,
exit_matcher: &section_end,
}));
2023-04-19 23:03:51 +00:00
let without_consuming_whitespace_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
2023-04-22 05:45:38 +00:00
let element_matcher = parser_with_context!(element(true))(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let (remaining, comment_and_property_drawer_element) = opt(tuple((
2023-04-19 23:03:51 +00:00
opt(parser_with_context!(comment)(
&without_consuming_whitespace_context,
)),
parser_with_context!(property_drawer)(context),
many0(blank_line),
)))(input)?;
let (remaining, (mut children, _exit_contents)) = verify(
many_till(element_matcher, exit_matcher),
2023-04-19 23:03:51 +00:00
|(children, _exit_contents)| {
!children.is_empty() || comment_and_property_drawer_element.is_some()
2023-04-19 23:03:51 +00:00
},
)(remaining)?;
comment_and_property_drawer_element.map(|(comment, property_drawer, _ws)| {
children.insert(0, Element::PropertyDrawer(property_drawer));
comment
.map(Element::Comment)
.map(|ele| children.insert(0, ele));
});
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
Section {
source: source.into(),
children,
},
))
}
2023-08-11 00:04:59 +00:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn section<'r, 's>(
context: Context<'r, 's>,
mut input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Section<'s>> {
2023-03-23 23:35:32 +00:00
// TODO: The zeroth section is specialized so it probably needs its own parser
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::Context("section"))
2023-03-23 23:35:32 +00:00
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Document,
exit_matcher: &section_end,
}));
2023-04-22 05:45:38 +00:00
let element_matcher = parser_with_context!(element(true))(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let (mut remaining, (planning_element, property_drawer_element)) = tuple((
opt(parser_with_context!(planning)(&parser_context)),
opt(parser_with_context!(property_drawer)(&parser_context)),
))(input)?;
if planning_element.is_none() && property_drawer_element.is_none() {
let (remain, _ws) = many0(blank_line)(remaining)?;
remaining = remain;
input = remain;
}
2023-04-19 23:03:51 +00:00
let (remaining, (mut children, _exit_contents)) = verify(
many_till(element_matcher, exit_matcher),
|(children, _exit_contents)| {
!children.is_empty() || property_drawer_element.is_some() || planning_element.is_some()
},
2023-04-19 23:03:51 +00:00
)(remaining)?;
property_drawer_element
.map(Element::PropertyDrawer)
.map(|ele| children.insert(0, ele));
planning_element
.map(Element::Planning)
.map(|ele| children.insert(0, ele));
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
2023-03-24 21:00:27 +00:00
let source = get_consumed(input, remaining);
Ok((
remaining,
Section {
source: source.into(),
children,
},
))
2023-03-23 23:35:32 +00:00
}
2023-08-11 00:04:59 +00:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn section_end<'r, 's>(
_context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(detect_headline)(input)
2023-03-23 23:35:32 +00:00
}
const fn heading(
parent_stars: usize,
) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res<OrgSource<'s>, Heading<'s>> {
move |context: Context, input: OrgSource<'_>| _heading(context, input, parent_stars)
}
2023-08-11 00:04:59 +00:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _heading<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
parent_stars: usize,
) -> Res<OrgSource<'s>, Heading<'s>> {
2023-03-24 00:12:42 +00:00
not(|i| context.check_exit_matcher(i))(input)?;
let (remaining, (star_count, _ws, maybe_todo_keyword, title, heading_tags)) =
headline(context, input, parent_stars)?;
2023-03-24 21:19:46 +00:00
let section_matcher = parser_with_context!(section)(context);
let heading_matcher = parser_with_context!(heading(star_count))(context);
2023-03-24 21:19:46 +00:00
let (remaining, children) = many0(alt((
map(heading_matcher, DocumentElement::Heading),
2023-03-24 21:19:46 +00:00
map(section_matcher, DocumentElement::Section),
)))(remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
Heading {
source: source.into(),
2023-03-24 21:19:46 +00:00
stars: star_count,
todo_keyword: maybe_todo_keyword
.map(|(todo_keyword, _ws)| Into::<&str>::into(todo_keyword)),
2023-03-25 16:18:47 +00:00
title,
2023-08-25 10:20:06 +00:00
tags: heading_tags,
2023-03-24 21:19:46 +00:00
children,
},
))
2023-03-23 23:53:20 +00:00
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn detect_headline<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
tuple((start_of_line, many1(tag("*")), space1))(input)?;
Ok((input, ()))
}
2023-08-11 00:04:59 +00:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn headline<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
parent_stars: usize,
) -> Res<
OrgSource<'s>,
(
usize,
OrgSource<'s>,
Option<(OrgSource<'s>, OrgSource<'s>)>,
Vec<Object<'s>>,
Vec<&'s str>,
),
> {
2023-03-23 23:53:20 +00:00
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Document,
exit_matcher: &headline_title_end,
2023-03-23 23:53:20 +00:00
}));
let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);
2023-03-23 23:53:20 +00:00
let (
remaining,
(_sol, star_count, ws, maybe_todo_keyword, title, maybe_tags, _ws, _line_ending),
) = tuple((
start_of_line,
verify(many1_count(tag("*")), |star_count| {
*star_count > parent_stars
}),
2023-03-23 23:53:20 +00:00
space1,
opt(tuple((heading_keyword, space1))),
many1(standard_set_object_matcher),
opt(tuple((space0, tags))),
space0,
alt((line_ending, eof)),
2023-03-24 20:37:34 +00:00
))(input)?;
2023-08-25 10:20:06 +00:00
Ok((
remaining,
(
star_count,
ws,
maybe_todo_keyword,
2023-08-25 10:20:06 +00:00
title,
maybe_tags
.map(|(_ws, tags)| {
tags.into_iter()
.map(|single_tag| Into::<&str>::into(single_tag))
.collect()
})
.unwrap_or(Vec::new()),
),
))
2023-03-23 23:35:32 +00:00
}
2023-08-11 00:04:59 +00:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn headline_title_end<'r, 's>(
_context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(tuple((
opt(tuple((space0, tags, space0))),
alt((line_ending, eof)),
)))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn tags<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, Vec<OrgSource<'s>>> {
let (remaining, (_open, tags, _close)) =
tuple((tag(":"), separated_list1(tag(":"), single_tag), tag(":")))(input)?;
Ok((remaining, tags))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn single_tag<'r, 's>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(many1(verify(anychar, |c| {
c.is_alphanumeric() || "_@#%".contains(*c)
})))(input)
2023-03-23 23:53:20 +00:00
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn heading_keyword<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
// TODO: This should take into account the value of "#+TODO:" ref https://orgmode.org/manual/Per_002dfile-keywords.html and possibly the configurable variable org-todo-keywords ref https://orgmode.org/manual/Workflow-states.html. Case is significant.
alt((tag("TODO"), tag("DONE")))(input)
}
impl<'s> Document<'s> {
pub fn iter_tokens<'r>(&'r self) -> impl Iterator<Item = Token<'r, 's>> {
AllTokensIterator::new(Token::Document(self))
}
}