Initial setup for the parser.

2022-07-15 23:26:49 -04:00 · 2022-07-15 23:26:49 -04:00 · ee9e6297a6
commit ee9e6297a6
13 changed files with 350 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,3 @@
+**/.git
+target
+Cargo.lock
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+/target
+Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,17 @@
+[package]
+name = "toy"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "toy"
+path = "src/main.rs"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+log = "0.4.17"
+nom = "7.1.1"
+pretty_env_logger = "0.4.0"
+
+[features]
--- a/rustfmt.toml
+++ b/rustfmt.toml
@ -0,0 +1,13 @@
+imports_granularity = "Item"
+
+# In rustfmt 2.0 I will want to adjust these settings.
+#
+# max_width controls the max length of a line before rustfmt gives up
+# but that also scales the length of a bunch of other lines
+# automaticaly due to width_heuristics. I want to find a way to enable
+# rustfmt to work on longer lines when necessary without making my
+# regular code too wide.
+#
+# max_width = 100
+# error_on_line_overflow = true
+# width_heuristics = "Off"
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,14 @@
+use nom::multi::many1;
+
+use crate::parser::paragraph;
+
+mod parser;
+
+const TEST_DOC: &'static str = include_str!("../toy_language.txt");
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    pretty_env_logger::init();
+    println!("{}\n\n\n", TEST_DOC);
+    println!("{:#?}", many1(paragraph)(TEST_DOC));
+    Ok(())
+}
--- a/src/parser/bold_parser.rs
+++ b/src/parser/bold_parser.rs
@ -0,0 +1,16 @@
+//! Text between asterisks to make it bold.
+use super::failable_sequence::failable_sequence;
+use super::nom_context::NomContext;
+use super::text::bold_end;
+use super::text::bold_start;
+use super::text_element_parser::text_element;
+use nom::branch::alt;
+use nom::combinator::map;
+use nom::combinator::not;
+use nom::combinator::recognize;
+use nom::error::VerboseError;
+use nom::sequence::tuple;
+
+// Sequence
+
+failable_sequence!(bold, i, context, bold_start, text_element, bold_end);
--- a/src/parser/failable_sequence.rs
+++ b/src/parser/failable_sequence.rs
@ -0,0 +1,52 @@
+macro_rules! failable_sequence {
+    ($name:ident,$inp:ident,$context:ident,$begin_matcher:expr,$element_matcher:expr,$success_matcher:expr) => {
+        pub fn $name<'b, F>(
+            $context: &'b NomContext<F>,
+        ) -> impl for<'a> FnMut(
+            &'a str,
+        ) -> nom::IResult<
+            &'a str,
+            crate::parser::text::Sequence<'a>,
+            VerboseError<&'a str>,
+        > + 'b
+        where
+            F: for<'a> nom::Parser<&'a str, &'a str, VerboseError<&'a str>>,
+        {
+            let fail_matcher = $context.fail_matcher.clone();
+            let new_fail_matcher = alt((
+                |i| fail_matcher.borrow_mut().parse(i),
+                recognize($success_matcher),
+            ));
+
+            move |$inp: &str| {
+                let new_context = $context.with_no_bold();
+                // let other_new_context = NomContext::with_additional_fail_matcher(
+                //     |i: &str| recognize($success_matcher)(i),
+                //     $context,
+                // );
+                let other_new_context = super::nom_context::NomContext::new(new_fail_matcher);
+                let element_matcher = recognize($element_matcher(&new_context));
+                let local_fail_matcher = $context.fail_matcher.clone();
+                let ret = map(
+                    recognize(tuple((
+                        $begin_matcher,
+                        nom::multi::many_till(
+                            nom::sequence::preceded(
+                                not(|i| local_fail_matcher.borrow_mut().parse(i)),
+                                element_matcher,
+                            ),
+                            nom::sequence::preceded(
+                                not(|i| local_fail_matcher.borrow_mut().parse(i)),
+                                $success_matcher,
+                            ),
+                        ),
+                    ))),
+                    |s: &str| crate::parser::text::Sequence { contents: s },
+                )($inp)?;
+                Ok(ret)
+            }
+        }
+    };
+}
+
+pub(crate) use failable_sequence;
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@ -0,0 +1,7 @@
+mod bold_parser;
+mod failable_sequence;
+mod nom_context;
+mod parser_with_context;
+mod text;
+mod text_element_parser;
+pub use text::paragraph;
--- a/src/parser/nom_context.rs
+++ b/src/parser/nom_context.rs
@ -0,0 +1,34 @@
+use nom::error::VerboseError;
+use nom::Parser;
+use std::cell::RefCell;
+use std::rc::Rc;
+
+#[derive(Clone)]
+pub struct NomContext<F> {
+    pub fail_matcher: Rc<RefCell<F>>,
+
+    /// You can't have nested bolds in org-mode
+    pub can_match_bold: bool,
+    pub can_match_link: bool,
+}
+
+impl<F> NomContext<F>
+where
+    F: for<'a> Parser<&'a str, &'a str, VerboseError<&'a str>>,
+{
+    pub fn new(fail_matcher: F) -> Self {
+        NomContext {
+            fail_matcher: Rc::new(RefCell::new(fail_matcher)),
+            can_match_bold: true,
+            can_match_link: true,
+        }
+    }
+
+    pub fn with_no_bold(&self) -> NomContext<F> {
+        NomContext {
+            fail_matcher: self.fail_matcher.clone(),
+            can_match_bold: false,
+            can_match_link: self.can_match_link,
+        }
+    }
+}
--- a/src/parser/parser_with_context.rs
+++ b/src/parser/parser_with_context.rs
@ -0,0 +1,14 @@
+macro_rules! parser_with_context {
+    ($name:ident,$typ:ty,$inp:ident,$context:ident,$fnbody:block) => {
+        pub fn $name<F>(
+            $context: &NomContext<F>,
+        ) -> impl for<'a> FnMut(&'a str) -> IResult<&'a str, $typ, VerboseError<&'a str>> + '_
+        where
+            F: for<'a> nom::Parser<&'a str, &'a str, VerboseError<&'a str>>,
+        {
+            |$inp: &str| $fnbody
+        }
+    };
+}
+
+pub(crate) use parser_with_context;
--- a/src/parser/text.rs
+++ b/src/parser/text.rs
@ -0,0 +1,134 @@
+/*
+
+hypothetical link:
+fn link = many_till(text_element, link_end)
+
+but what if you start a bold?
+fn bold = many_till(text_element, bold_end) could eat the link_end
+
+Do I pass along break-conditions? Passing link_end into bold's parser?
+
+I'll try a very simple language first where asterisks always start/end bold and links are just between [ and ]. Paragraphs will have a blank line between them.
+
+*/
+use nom::bytes::complete::tag;
+use nom::character::complete::alphanumeric1;
+use nom::character::complete::line_ending;
+use nom::character::complete::space1;
+use nom::combinator::map;
+use nom::combinator::recognize;
+use nom::error::VerboseError;
+use nom::multi::many_till;
+use nom::sequence::tuple;
+use nom::IResult;
+
+pub type Res<T, U> = IResult<T, U, VerboseError<T>>;
+
+#[derive(Debug)]
+pub enum TextElement<'a> {
+    Span(Span<'a>),
+    Space(Space<'a>),
+    LineBreak(LineBreak<'a>),
+    Symbol(Symbol<'a>),
+    Bold(Bold<'a>),
+    Link(Link<'a>),
+}
+
+#[derive(Debug)]
+pub struct Span<'a> {
+    contents: &'a str,
+}
+
+#[derive(Debug)]
+pub struct Space<'a> {
+    contents: &'a str,
+}
+
+#[derive(Debug)]
+pub struct LineBreak<'a> {
+    contents: &'a str,
+}
+
+#[derive(Debug)]
+pub struct Symbol<'a> {
+    contents: &'a str,
+}
+
+#[derive(Debug)]
+pub struct BlankLine<'a> {
+    contents: Vec<TextElement<'a>>,
+}
+
+#[derive(Debug)]
+pub struct Sequence<'a> {
+    pub contents: &'a str,
+}
+
+#[derive(Debug)]
+pub struct Bold<'a> {
+    pub contents: &'a str,
+}
+
+#[derive(Debug)]
+pub struct Link<'a> {
+    contents: &'a str,
+}
+
+pub fn line_break(input: &str) -> Res<&str, LineBreak> {
+    map(line_ending, |s: &str| LineBreak { contents: s })(input)
+}
+
+pub fn space(input: &str) -> Res<&str, Space> {
+    map(space1, |s: &str| Space { contents: s })(input)
+}
+
+pub fn span(input: &str) -> Res<&str, Span> {
+    map(alphanumeric1, |s: &str| Span { contents: s })(input)
+}
+
+pub fn symbol(symbol_tag: &'static str) -> impl for<'a> Fn(&'a str) -> Res<&'a str, Symbol<'a>> {
+    move |i: &str| map(tag(symbol_tag), |s: &str| Symbol { contents: s })(i)
+}
+
+/// A line containing only whitespace and then a line break
+///
+/// It is up to the caller to ensure this is called at the start of a line.
+fn blank_line(input: &str) -> Res<&str, BlankLine> {
+    map(
+        many_till(
+            map(space, TextElement::Space),
+            map(line_break, TextElement::LineBreak),
+        ),
+        |(mut whitespace, end_of_line)| {
+            whitespace.push(end_of_line);
+            BlankLine {
+                contents: whitespace,
+            }
+        },
+    )(input)
+}
+
+pub fn bold_start(input: &str) -> Res<&str, TextElement> {
+    map(symbol("*"), TextElement::Symbol)(input)
+}
+
+pub fn bold_end(input: &str) -> Res<&str, TextElement> {
+    map(symbol("*"), TextElement::Symbol)(input)
+}
+
+pub fn link_start(input: &str) -> Res<&str, TextElement> {
+    map(symbol("["), TextElement::Symbol)(input)
+}
+
+pub fn link_end(input: &str) -> Res<&str, TextElement> {
+    map(symbol("]"), TextElement::Symbol)(input)
+}
+
+pub fn paragraph(input: &str) -> Res<&str, (Vec<TextElement>, &str)> {
+    todo!()
+    // many_till(TextElementParser::new(paragraph_end), paragraph_end)(input)
+}
+
+fn paragraph_end(input: &str) -> Res<&str, &str> {
+    recognize(tuple((map(line_break, TextElement::LineBreak), blank_line)))(input)
+}
--- a/src/parser/text_element_parser.rs
+++ b/src/parser/text_element_parser.rs
@ -0,0 +1,33 @@
+//! A single element of text.
+use super::nom_context::NomContext;
+use super::parser_with_context::parser_with_context;
+use super::text::line_break;
+use super::text::space;
+use super::text::span;
+use super::text::symbol;
+use super::text::TextElement;
+use nom::branch::alt;
+use nom::combinator::map;
+use nom::combinator::not;
+use nom::error::VerboseError;
+use nom::IResult;
+
+parser_with_context!(text_element, TextElement, i, context, {
+    not(|i| context.fail_matcher.borrow_mut().parse(i))(i)?;
+    alt((
+        // map(
+        //     BoldParser::new(slf.context.fail_matcher.clone()),
+        //     TextElement::Bold,
+        // ),
+        // map(
+        //     LinkParser::new(slf.context.fail_matcher.clone()),
+        //     TextElement::Link,
+        // ),
+        map(span, TextElement::Span),
+        map(symbol("*"), TextElement::Symbol),
+        map(symbol("["), TextElement::Symbol),
+        map(symbol("]"), TextElement::Symbol),
+        map(space, TextElement::Space),
+        map(line_break, TextElement::LineBreak),
+    ))(i)
+});
--- a/toy_language.txt
+++ b/toy_language.txt
@ -0,0 +1,11 @@
+prologue *goes here* I guess *bold
+text*
+
+I guess *regular
+
+text*
+
+[foo *bar] baz* car
+
+
+*nesting *bold entrances* and* exits