organic/src/parser/text.rs

147 lines
3.6 KiB
Rust
Raw Normal View History

2022-10-15 04:01:37 +00:00
use std::cell::RefCell;
use std::rc::Rc;
2022-07-16 03:26:49 +00:00
/*
hypothetical link:
fn link = many_till(text_element, link_end)
but what if you start a bold?
fn bold = many_till(text_element, bold_end) could eat the link_end
Do I pass along break-conditions? Passing link_end into bold's parser?
I'll try a very simple language first where asterisks always start/end bold and links are just between [ and ]. Paragraphs will have a blank line between them.
*/
use nom::bytes::complete::tag;
use nom::character::complete::alphanumeric1;
use nom::character::complete::line_ending;
use nom::character::complete::space1;
use nom::combinator::map;
use nom::combinator::recognize;
use nom::error::VerboseError;
use nom::multi::many1;
2022-07-16 03:26:49 +00:00
use nom::multi::many_till;
use nom::sequence::tuple;
use nom::IResult;
2022-11-25 22:54:26 +00:00
use tracing::instrument;
2022-07-16 03:26:49 +00:00
2022-11-24 20:14:53 +00:00
use super::nom_context::ContextTree;
2022-11-24 20:40:07 +00:00
use super::nom_context::OrgModeContextTree;
use super::parser_with_context::parser_with_context;
2022-10-15 18:16:52 +00:00
use super::text_element_parser::paragraph;
2022-07-16 03:26:49 +00:00
pub type Res<T, U> = IResult<T, U, VerboseError<T>>;
#[derive(Debug)]
pub enum TextElement<'a> {
Span(Span<'a>),
Space(Space<'a>),
LineBreak(LineBreak<'a>),
Symbol(Symbol<'a>),
Bold(Bold<'a>),
Link(Link<'a>),
}
#[derive(Debug)]
pub struct Span<'a> {
contents: &'a str,
}
#[derive(Debug)]
pub struct Space<'a> {
contents: &'a str,
}
#[derive(Debug)]
pub struct LineBreak<'a> {
contents: &'a str,
}
#[derive(Debug)]
pub struct Symbol<'a> {
contents: &'a str,
}
#[derive(Debug)]
pub struct BlankLine<'a> {
contents: Vec<TextElement<'a>>,
}
#[derive(Debug)]
pub struct Sequence<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Bold<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Link<'a> {
pub contents: &'a str,
2022-07-16 03:26:49 +00:00
}
pub fn line_break(input: &str) -> Res<&str, LineBreak> {
map(line_ending, |s: &str| LineBreak { contents: s })(input)
}
pub fn space(input: &str) -> Res<&str, Space> {
map(space1, |s: &str| Space { contents: s })(input)
}
pub fn span(input: &str) -> Res<&str, Span> {
map(alphanumeric1, |s: &str| Span { contents: s })(input)
}
pub fn symbol(symbol_tag: &'static str) -> impl for<'a> Fn(&'a str) -> Res<&'a str, Symbol<'a>> {
move |i: &str| map(tag(symbol_tag), |s: &str| Symbol { contents: s })(i)
}
/// A line containing only whitespace and then a line break
///
/// It is up to the caller to ensure this is called at the start of a line.
fn blank_line(input: &str) -> Res<&str, BlankLine> {
map(
many_till(
map(space, TextElement::Space),
map(line_break, TextElement::LineBreak),
),
|(mut whitespace, end_of_line)| {
whitespace.push(end_of_line);
BlankLine {
contents: whitespace,
}
},
)(input)
}
pub fn bold_start(input: &str) -> Res<&str, TextElement> {
map(symbol("*"), TextElement::Symbol)(input)
}
pub fn bold_end(input: &str) -> Res<&str, TextElement> {
map(symbol("*"), TextElement::Symbol)(input)
}
pub fn link_start(input: &str) -> Res<&str, TextElement> {
map(symbol("["), TextElement::Symbol)(input)
}
pub fn link_end(input: &str) -> Res<&str, TextElement> {
map(symbol("]"), TextElement::Symbol)(input)
}
2022-07-17 01:32:23 +00:00
pub fn paragraph_end(input: &str) -> Res<&str, &str> {
2022-07-16 03:26:49 +00:00
recognize(tuple((map(line_break, TextElement::LineBreak), blank_line)))(input)
}
pub fn document(input: &str) -> Res<&str, Vec<(Vec<TextElement>, &str)>> {
2022-11-24 20:14:53 +00:00
let initial_context = ContextTree::new();
2022-11-24 20:40:07 +00:00
let paragraph_context = initial_context.with_additional_fail_matcher(&paragraph_end);
let ret = many1(parser_with_context!(paragraph)(&paragraph_context))(input);
2022-10-15 18:16:52 +00:00
ret
}