organic/src/parser/document.rs

199 lines
6.4 KiB
Rust
Raw Normal View History

2023-03-23 23:35:32 +00:00
use nom::branch::alt;
use nom::bytes::complete::tag;
2023-03-23 23:53:20 +00:00
use nom::character::complete::line_ending;
use nom::character::complete::space1;
2023-03-23 23:35:32 +00:00
use nom::combinator::eof;
2023-03-24 21:19:46 +00:00
use nom::combinator::map;
2023-03-24 00:12:42 +00:00
use nom::combinator::not;
2023-03-24 21:34:56 +00:00
use nom::combinator::opt;
2023-03-23 23:35:32 +00:00
use nom::combinator::recognize;
2023-03-24 21:19:46 +00:00
use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many1;
2023-03-23 23:35:32 +00:00
use nom::multi::many1_count;
use nom::multi::many_till;
2023-03-23 23:35:32 +00:00
use nom::sequence::tuple;
2023-03-24 21:00:27 +00:00
use crate::parser::element::element;
2023-03-23 23:53:20 +00:00
use crate::parser::object::standard_set_object;
2023-03-23 23:35:32 +00:00
use crate::parser::parser_context::ChainBehavior;
2023-03-23 21:59:39 +00:00
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
2023-03-23 23:35:32 +00:00
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::element_trailing_whitespace;
2023-03-23 21:59:39 +00:00
2023-03-23 21:51:49 +00:00
use super::element::Element;
2023-03-23 21:59:39 +00:00
use super::error::Res;
use super::object::Object;
2023-03-23 23:35:32 +00:00
use super::parser_with_context::parser_with_context;
2023-03-23 21:51:49 +00:00
use super::source::Source;
use super::util::exit_matcher_parser;
2023-03-25 15:25:10 +00:00
use super::util::get_consumed;
2023-03-25 18:10:22 +00:00
use super::util::start_of_line;
use super::util::trailing_whitespace;
2023-03-23 23:35:32 +00:00
use super::Context;
2022-10-15 00:17:48 +00:00
2023-03-23 21:51:49 +00:00
#[derive(Debug)]
pub struct Document<'s> {
pub source: &'s str,
pub zeroth_section: Option<Section<'s>>,
pub children: Vec<Heading<'s>>,
}
#[derive(Debug)]
pub struct Heading<'s> {
pub source: &'s str,
2023-03-24 21:19:46 +00:00
pub stars: usize,
2023-03-25 16:18:47 +00:00
pub title: Vec<Object<'s>>,
2023-03-23 21:51:49 +00:00
pub children: Vec<DocumentElement<'s>>,
}
#[derive(Debug)]
pub struct Section<'s> {
pub source: &'s str,
pub children: Vec<Element<'s>>,
}
#[derive(Debug)]
pub enum DocumentElement<'s> {
Heading(Heading<'s>),
Section(Section<'s>),
}
2023-03-23 21:51:49 +00:00
impl<'s> Source<'s> for Document<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
2022-12-18 09:22:28 +00:00
2023-03-23 21:51:49 +00:00
impl<'s> Source<'s> for DocumentElement<'s> {
fn get_source(&'s self) -> &'s str {
match self {
DocumentElement::Heading(obj) => obj.source,
DocumentElement::Section(obj) => obj.source,
}
}
}
2023-03-23 21:59:39 +00:00
2023-03-27 19:08:29 +00:00
#[tracing::instrument(ret, level = "debug")]
2023-03-24 00:12:42 +00:00
#[allow(dead_code)]
2023-03-23 21:59:39 +00:00
pub fn document(input: &str) -> Res<&str, Document> {
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
2023-03-24 21:34:56 +00:00
let section_matcher = parser_with_context!(section)(&document_context);
let heading_matcher = parser_with_context!(heading)(&document_context);
let (remaining, zeroth_section) = opt(section_matcher)(input)?;
let (remaining, children) = many0(heading_matcher)(remaining)?;
2023-03-24 21:34:56 +00:00
let source = get_consumed(input, remaining);
Ok((
remaining,
Document {
source,
zeroth_section,
children,
},
))
2023-03-23 21:59:39 +00:00
}
2023-03-23 23:35:32 +00:00
2023-03-27 19:08:29 +00:00
#[tracing::instrument(ret, level = "debug")]
2023-03-23 23:35:32 +00:00
fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> {
// TODO: The zeroth section is specialized so it probably needs its own parser
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::Context("section"))
2023-03-23 23:35:32 +00:00
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&section_end)),
}));
2023-03-24 21:00:27 +00:00
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let trailing_matcher = parser_with_context!(element_trailing_whitespace)(&parser_context);
let (remaining, (children, _exit_contents)) = verify(
many_till(
tuple((
element_matcher,
opt(map(trailing_matcher, Element::TrailingWhitespace)),
)),
exit_matcher,
),
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
let flattened_children: Vec<Element> = children
.into_iter()
.flat_map(|tpl| {
let mut flattened_children = Vec::with_capacity(2);
flattened_children.push(tpl.0);
if let Some(bar) = tpl.1 {
flattened_children.push(bar);
}
flattened_children.into_iter()
})
.collect();
2023-03-24 21:00:27 +00:00
let source = get_consumed(input, remaining);
Ok((
remaining,
Section {
source,
children: flattened_children,
},
))
2023-03-23 23:35:32 +00:00
}
2023-03-27 19:08:29 +00:00
#[tracing::instrument(ret, level = "debug")]
2023-03-23 23:35:32 +00:00
fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let headline_matcher = parser_with_context!(headline)(context);
alt((recognize(headline_matcher), eof))(input)
2023-03-23 23:35:32 +00:00
}
2023-03-27 19:08:29 +00:00
#[tracing::instrument(ret, level = "debug")]
2023-03-23 23:35:32 +00:00
fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> {
2023-03-24 00:12:42 +00:00
not(|i| context.check_exit_matcher(i))(input)?;
let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?;
2023-03-24 21:19:46 +00:00
let section_matcher = parser_with_context!(section)(context);
let heading_matcher = parser_with_context!(heading)(context);
let (remaining, children) = many0(alt((
map(
verify(heading_matcher, |h| h.stars > star_count),
DocumentElement::Heading,
),
map(section_matcher, DocumentElement::Section),
)))(remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
Heading {
2023-03-24 21:34:56 +00:00
source,
2023-03-24 21:19:46 +00:00
stars: star_count,
2023-03-25 16:18:47 +00:00
title,
2023-03-24 21:19:46 +00:00
children,
},
))
2023-03-23 23:53:20 +00:00
}
2023-03-27 19:08:29 +00:00
#[tracing::instrument(ret, level = "debug")]
fn headline<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, (usize, &'s str, Vec<Object<'s>>, &'s str)> {
2023-03-23 23:53:20 +00:00
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&headline_end)),
}));
let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);
2023-03-24 20:37:34 +00:00
let start_of_line_matcher = parser_with_context!(start_of_line)(&parser_context);
2023-03-23 23:53:20 +00:00
2023-03-24 20:37:34 +00:00
let (remaining, (_sol, star_count, ws, title, ws2)) = tuple((
start_of_line_matcher,
2023-03-23 23:35:32 +00:00
many1_count(tag("*")),
2023-03-23 23:53:20 +00:00
space1,
many1(standard_set_object_matcher),
trailing_whitespace,
2023-03-24 20:37:34 +00:00
))(input)?;
Ok((remaining, (star_count, ws, title, ws2)))
2023-03-23 23:35:32 +00:00
}
2023-03-27 19:08:29 +00:00
#[tracing::instrument(ret, level = "debug")]
2023-03-25 18:10:22 +00:00
fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
2023-03-25 15:22:59 +00:00
alt((line_ending, eof))(input)
2023-03-23 23:53:20 +00:00
}