organic/src/parser/document.rs
2023-07-14 12:28:52 -04:00

287 lines
9.7 KiB
Rust

use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::line_ending;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many1_count;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::element::Element;
use super::object::Object;
use super::parser_with_context::parser_with_context;
use super::source::Source;
use super::token::Token;
use super::util::exit_matcher_parser;
use super::util::get_consumed;
use super::util::start_of_line;
use super::Context;
use crate::error::Res;
use crate::parser::comment::comment;
use crate::parser::element_parser::element;
use crate::parser::exiting::ExitClass;
use crate::parser::object_parser::standard_set_object;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::planning::planning;
use crate::parser::property_drawer::property_drawer;
use crate::parser::util::blank_line;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
#[derive(Debug)]
pub struct Document<'s> {
pub source: &'s str,
pub zeroth_section: Option<Section<'s>>,
pub children: Vec<Heading<'s>>,
}
#[derive(Debug)]
pub struct Heading<'s> {
pub source: &'s str,
pub stars: usize,
pub title: Vec<Object<'s>>,
pub children: Vec<DocumentElement<'s>>,
}
#[derive(Debug)]
pub struct Section<'s> {
pub source: &'s str,
pub children: Vec<Element<'s>>,
}
#[derive(Debug)]
pub enum DocumentElement<'s> {
Heading(Heading<'s>),
Section(Section<'s>),
}
impl<'s> Source<'s> for Document<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
impl<'s> Source<'s> for DocumentElement<'s> {
fn get_source(&'s self) -> &'s str {
match self {
DocumentElement::Heading(obj) => obj.source,
DocumentElement::Section(obj) => obj.source,
}
}
}
impl<'s> Source<'s> for Section<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
impl<'s> Source<'s> for Heading<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
#[tracing::instrument(ret, level = "debug")]
#[allow(dead_code)]
pub fn document(input: &str) -> Res<&str, Document> {
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let zeroth_section_matcher = parser_with_context!(zeroth_section)(&document_context);
let heading_matcher = parser_with_context!(heading)(&document_context);
let (remaining, _blank_lines) = many0(blank_line)(input)?;
let (remaining, zeroth_section) = opt(zeroth_section_matcher)(remaining)?;
let (remaining, children) = many0(heading_matcher)(remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
Document {
source,
zeroth_section,
children,
},
))
}
#[tracing::instrument(ret, level = "debug")]
fn zeroth_section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> {
// TODO: The zeroth section is specialized so it probably needs its own parser
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::Context("section"))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Document,
exit_matcher: &section_end,
}));
let without_consuming_whitespace_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
let element_matcher = parser_with_context!(element(true))(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let (remaining, comment_and_property_drawer_element) = opt(tuple((
opt(parser_with_context!(comment)(
&without_consuming_whitespace_context,
)),
parser_with_context!(property_drawer)(&without_consuming_whitespace_context),
many0(blank_line),
)))(input)?;
let (remaining, (mut children, _exit_contents)) = verify(
many_till(element_matcher, exit_matcher),
|(children, _exit_contents)| {
!children.is_empty() || comment_and_property_drawer_element.is_some()
},
)(remaining)?;
comment_and_property_drawer_element.map(|(comment, property_drawer, _ws)| {
children.insert(0, Element::PropertyDrawer(property_drawer));
comment
.map(Element::Comment)
.map(|ele| children.insert(0, ele));
});
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Section { source, children }))
}
#[tracing::instrument(ret, level = "debug")]
fn section<'r, 's>(context: Context<'r, 's>, mut input: &'s str) -> Res<&'s str, Section<'s>> {
// TODO: The zeroth section is specialized so it probably needs its own parser
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::Context("section"))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Document,
exit_matcher: &section_end,
}));
let element_matcher = parser_with_context!(element(true))(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let (mut remaining, (planning_element, property_drawer_element)) = tuple((
opt(parser_with_context!(planning)(&parser_context)),
opt(parser_with_context!(property_drawer)(&parser_context)),
))(input)?;
if planning_element.is_none() && property_drawer_element.is_none() {
let (remain, _ws) = many0(blank_line)(remaining)?;
remaining = remain;
input = remain;
}
let (remaining, (mut children, _exit_contents)) = verify(
many_till(element_matcher, exit_matcher),
|(children, _exit_contents)| {
!children.is_empty() || property_drawer_element.is_some() || planning_element.is_some()
},
)(remaining)?;
property_drawer_element
.map(Element::PropertyDrawer)
.map(|ele| children.insert(0, ele));
planning_element
.map(Element::Planning)
.map(|ele| children.insert(0, ele));
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Section { source, children }))
}
#[tracing::instrument(ret, level = "debug")]
fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let headline_matcher = parser_with_context!(headline)(context);
recognize(headline_matcher)(input)
}
#[tracing::instrument(ret, level = "debug")]
fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> {
not(|i| context.check_exit_matcher(i))(input)?;
let (remaining, (star_count, _ws, title)) = headline(context, input)?;
let section_matcher = parser_with_context!(section)(context);
let heading_matcher = parser_with_context!(heading)(context);
let (remaining, children) = many0(alt((
map(
verify(heading_matcher, |h| h.stars > star_count),
DocumentElement::Heading,
),
map(section_matcher, DocumentElement::Section),
)))(remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
Heading {
source,
stars: star_count,
title,
children,
},
))
}
#[tracing::instrument(ret, level = "debug")]
fn headline<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, (usize, &'s str, Vec<Object<'s>>)> {
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Document,
exit_matcher: &headline_end,
}));
let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);
let start_of_line_matcher = parser_with_context!(start_of_line)(&parser_context);
let (remaining, (_sol, star_count, ws, title, _line_ending)) = tuple((
start_of_line_matcher,
many1_count(tag("*")),
space1,
many1(standard_set_object_matcher),
alt((line_ending, eof)),
))(input)?;
Ok((remaining, (star_count, ws, title)))
}
#[tracing::instrument(ret, level = "debug")]
fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
line_ending(input)
}
impl<'s> Document<'s> {
pub fn iter_tokens<'r>(&'r self) -> impl Iterator<Item = Token<'r, 's>> {
self.zeroth_section
.iter()
.map(Token::Section)
.chain(self.children.iter().map(Token::Heading))
}
}
impl<'s> Heading<'s> {
pub fn iter_tokens<'r>(&'r self) -> impl Iterator<Item = Token<'r, 's>> {
self.title.iter().map(Token::Object).chain(self.children.iter().map(
|de| {
match de {
DocumentElement::Heading(obj) => Token::Heading(obj),
DocumentElement::Section(obj) => Token::Section(obj),
}
}
))
}
}
impl<'s> Section<'s> {
pub fn iter_tokens<'r>(&'r self) -> impl Iterator<Item = Token<'r, 's>> {
self.children.iter().map(Token::Element)
}
}