Initial setup for the parser.

This commit is contained in:
Tom Alexander 2022-07-15 23:26:49 -04:00
commit ee9e6297a6
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
13 changed files with 350 additions and 0 deletions

3
.dockerignore Normal file
View File

@ -0,0 +1,3 @@
**/.git
target
Cargo.lock

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
Cargo.lock

17
Cargo.toml Normal file
View File

@ -0,0 +1,17 @@
[package]
name = "toy"
version = "0.1.0"
edition = "2021"
[[bin]]
name = "toy"
path = "src/main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
log = "0.4.17"
nom = "7.1.1"
pretty_env_logger = "0.4.0"
[features]

13
rustfmt.toml Normal file
View File

@ -0,0 +1,13 @@
imports_granularity = "Item"
# In rustfmt 2.0 I will want to adjust these settings.
#
# max_width controls the max length of a line before rustfmt gives up
# but that also scales the length of a bunch of other lines
# automaticaly due to width_heuristics. I want to find a way to enable
# rustfmt to work on longer lines when necessary without making my
# regular code too wide.
#
# max_width = 100
# error_on_line_overflow = true
# width_heuristics = "Off"

14
src/main.rs Normal file
View File

@ -0,0 +1,14 @@
use nom::multi::many1;
use crate::parser::paragraph;
mod parser;
const TEST_DOC: &'static str = include_str!("../toy_language.txt");
fn main() -> Result<(), Box<dyn std::error::Error>> {
pretty_env_logger::init();
println!("{}\n\n\n", TEST_DOC);
println!("{:#?}", many1(paragraph)(TEST_DOC));
Ok(())
}

16
src/parser/bold_parser.rs Normal file
View File

@ -0,0 +1,16 @@
//! Text between asterisks to make it bold.
use super::failable_sequence::failable_sequence;
use super::nom_context::NomContext;
use super::text::bold_end;
use super::text::bold_start;
use super::text_element_parser::text_element;
use nom::branch::alt;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::recognize;
use nom::error::VerboseError;
use nom::sequence::tuple;
// Sequence
failable_sequence!(bold, i, context, bold_start, text_element, bold_end);

View File

@ -0,0 +1,52 @@
macro_rules! failable_sequence {
($name:ident,$inp:ident,$context:ident,$begin_matcher:expr,$element_matcher:expr,$success_matcher:expr) => {
pub fn $name<'b, F>(
$context: &'b NomContext<F>,
) -> impl for<'a> FnMut(
&'a str,
) -> nom::IResult<
&'a str,
crate::parser::text::Sequence<'a>,
VerboseError<&'a str>,
> + 'b
where
F: for<'a> nom::Parser<&'a str, &'a str, VerboseError<&'a str>>,
{
let fail_matcher = $context.fail_matcher.clone();
let new_fail_matcher = alt((
|i| fail_matcher.borrow_mut().parse(i),
recognize($success_matcher),
));
move |$inp: &str| {
let new_context = $context.with_no_bold();
// let other_new_context = NomContext::with_additional_fail_matcher(
// |i: &str| recognize($success_matcher)(i),
// $context,
// );
let other_new_context = super::nom_context::NomContext::new(new_fail_matcher);
let element_matcher = recognize($element_matcher(&new_context));
let local_fail_matcher = $context.fail_matcher.clone();
let ret = map(
recognize(tuple((
$begin_matcher,
nom::multi::many_till(
nom::sequence::preceded(
not(|i| local_fail_matcher.borrow_mut().parse(i)),
element_matcher,
),
nom::sequence::preceded(
not(|i| local_fail_matcher.borrow_mut().parse(i)),
$success_matcher,
),
),
))),
|s: &str| crate::parser::text::Sequence { contents: s },
)($inp)?;
Ok(ret)
}
}
};
}
pub(crate) use failable_sequence;

7
src/parser/mod.rs Normal file
View File

@ -0,0 +1,7 @@
mod bold_parser;
mod failable_sequence;
mod nom_context;
mod parser_with_context;
mod text;
mod text_element_parser;
pub use text::paragraph;

34
src/parser/nom_context.rs Normal file
View File

@ -0,0 +1,34 @@
use nom::error::VerboseError;
use nom::Parser;
use std::cell::RefCell;
use std::rc::Rc;
#[derive(Clone)]
pub struct NomContext<F> {
pub fail_matcher: Rc<RefCell<F>>,
/// You can't have nested bolds in org-mode
pub can_match_bold: bool,
pub can_match_link: bool,
}
impl<F> NomContext<F>
where
F: for<'a> Parser<&'a str, &'a str, VerboseError<&'a str>>,
{
pub fn new(fail_matcher: F) -> Self {
NomContext {
fail_matcher: Rc::new(RefCell::new(fail_matcher)),
can_match_bold: true,
can_match_link: true,
}
}
pub fn with_no_bold(&self) -> NomContext<F> {
NomContext {
fail_matcher: self.fail_matcher.clone(),
can_match_bold: false,
can_match_link: self.can_match_link,
}
}
}

View File

@ -0,0 +1,14 @@
macro_rules! parser_with_context {
($name:ident,$typ:ty,$inp:ident,$context:ident,$fnbody:block) => {
pub fn $name<F>(
$context: &NomContext<F>,
) -> impl for<'a> FnMut(&'a str) -> IResult<&'a str, $typ, VerboseError<&'a str>> + '_
where
F: for<'a> nom::Parser<&'a str, &'a str, VerboseError<&'a str>>,
{
|$inp: &str| $fnbody
}
};
}
pub(crate) use parser_with_context;

134
src/parser/text.rs Normal file
View File

@ -0,0 +1,134 @@
/*
hypothetical link:
fn link = many_till(text_element, link_end)
but what if you start a bold?
fn bold = many_till(text_element, bold_end) could eat the link_end
Do I pass along break-conditions? Passing link_end into bold's parser?
I'll try a very simple language first where asterisks always start/end bold and links are just between [ and ]. Paragraphs will have a blank line between them.
*/
use nom::bytes::complete::tag;
use nom::character::complete::alphanumeric1;
use nom::character::complete::line_ending;
use nom::character::complete::space1;
use nom::combinator::map;
use nom::combinator::recognize;
use nom::error::VerboseError;
use nom::multi::many_till;
use nom::sequence::tuple;
use nom::IResult;
pub type Res<T, U> = IResult<T, U, VerboseError<T>>;
#[derive(Debug)]
pub enum TextElement<'a> {
Span(Span<'a>),
Space(Space<'a>),
LineBreak(LineBreak<'a>),
Symbol(Symbol<'a>),
Bold(Bold<'a>),
Link(Link<'a>),
}
#[derive(Debug)]
pub struct Span<'a> {
contents: &'a str,
}
#[derive(Debug)]
pub struct Space<'a> {
contents: &'a str,
}
#[derive(Debug)]
pub struct LineBreak<'a> {
contents: &'a str,
}
#[derive(Debug)]
pub struct Symbol<'a> {
contents: &'a str,
}
#[derive(Debug)]
pub struct BlankLine<'a> {
contents: Vec<TextElement<'a>>,
}
#[derive(Debug)]
pub struct Sequence<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Bold<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Link<'a> {
contents: &'a str,
}
pub fn line_break(input: &str) -> Res<&str, LineBreak> {
map(line_ending, |s: &str| LineBreak { contents: s })(input)
}
pub fn space(input: &str) -> Res<&str, Space> {
map(space1, |s: &str| Space { contents: s })(input)
}
pub fn span(input: &str) -> Res<&str, Span> {
map(alphanumeric1, |s: &str| Span { contents: s })(input)
}
pub fn symbol(symbol_tag: &'static str) -> impl for<'a> Fn(&'a str) -> Res<&'a str, Symbol<'a>> {
move |i: &str| map(tag(symbol_tag), |s: &str| Symbol { contents: s })(i)
}
/// A line containing only whitespace and then a line break
///
/// It is up to the caller to ensure this is called at the start of a line.
fn blank_line(input: &str) -> Res<&str, BlankLine> {
map(
many_till(
map(space, TextElement::Space),
map(line_break, TextElement::LineBreak),
),
|(mut whitespace, end_of_line)| {
whitespace.push(end_of_line);
BlankLine {
contents: whitespace,
}
},
)(input)
}
pub fn bold_start(input: &str) -> Res<&str, TextElement> {
map(symbol("*"), TextElement::Symbol)(input)
}
pub fn bold_end(input: &str) -> Res<&str, TextElement> {
map(symbol("*"), TextElement::Symbol)(input)
}
pub fn link_start(input: &str) -> Res<&str, TextElement> {
map(symbol("["), TextElement::Symbol)(input)
}
pub fn link_end(input: &str) -> Res<&str, TextElement> {
map(symbol("]"), TextElement::Symbol)(input)
}
pub fn paragraph(input: &str) -> Res<&str, (Vec<TextElement>, &str)> {
todo!()
// many_till(TextElementParser::new(paragraph_end), paragraph_end)(input)
}
fn paragraph_end(input: &str) -> Res<&str, &str> {
recognize(tuple((map(line_break, TextElement::LineBreak), blank_line)))(input)
}

View File

@ -0,0 +1,33 @@
//! A single element of text.
use super::nom_context::NomContext;
use super::parser_with_context::parser_with_context;
use super::text::line_break;
use super::text::space;
use super::text::span;
use super::text::symbol;
use super::text::TextElement;
use nom::branch::alt;
use nom::combinator::map;
use nom::combinator::not;
use nom::error::VerboseError;
use nom::IResult;
parser_with_context!(text_element, TextElement, i, context, {
not(|i| context.fail_matcher.borrow_mut().parse(i))(i)?;
alt((
// map(
// BoldParser::new(slf.context.fail_matcher.clone()),
// TextElement::Bold,
// ),
// map(
// LinkParser::new(slf.context.fail_matcher.clone()),
// TextElement::Link,
// ),
map(span, TextElement::Span),
map(symbol("*"), TextElement::Symbol),
map(symbol("["), TextElement::Symbol),
map(symbol("]"), TextElement::Symbol),
map(space, TextElement::Space),
map(line_break, TextElement::LineBreak),
))(i)
});

11
toy_language.txt Normal file
View File

@ -0,0 +1,11 @@
prologue *goes here* I guess *bold
text*
I guess *regular
text*
[foo *bar] baz* car
*nesting *bold entrances* and* exits