Initial setup for the parser.
This commit is contained in:
commit
ee9e6297a6
3
.dockerignore
Normal file
3
.dockerignore
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
**/.git
|
||||||
|
target
|
||||||
|
Cargo.lock
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
/target
|
||||||
|
Cargo.lock
|
17
Cargo.toml
Normal file
17
Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
[package]
|
||||||
|
name = "toy"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "toy"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
log = "0.4.17"
|
||||||
|
nom = "7.1.1"
|
||||||
|
pretty_env_logger = "0.4.0"
|
||||||
|
|
||||||
|
[features]
|
13
rustfmt.toml
Normal file
13
rustfmt.toml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
imports_granularity = "Item"
|
||||||
|
|
||||||
|
# In rustfmt 2.0 I will want to adjust these settings.
|
||||||
|
#
|
||||||
|
# max_width controls the max length of a line before rustfmt gives up
|
||||||
|
# but that also scales the length of a bunch of other lines
|
||||||
|
# automaticaly due to width_heuristics. I want to find a way to enable
|
||||||
|
# rustfmt to work on longer lines when necessary without making my
|
||||||
|
# regular code too wide.
|
||||||
|
#
|
||||||
|
# max_width = 100
|
||||||
|
# error_on_line_overflow = true
|
||||||
|
# width_heuristics = "Off"
|
14
src/main.rs
Normal file
14
src/main.rs
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
use nom::multi::many1;
|
||||||
|
|
||||||
|
use crate::parser::paragraph;
|
||||||
|
|
||||||
|
mod parser;
|
||||||
|
|
||||||
|
const TEST_DOC: &'static str = include_str!("../toy_language.txt");
|
||||||
|
|
||||||
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
pretty_env_logger::init();
|
||||||
|
println!("{}\n\n\n", TEST_DOC);
|
||||||
|
println!("{:#?}", many1(paragraph)(TEST_DOC));
|
||||||
|
Ok(())
|
||||||
|
}
|
16
src/parser/bold_parser.rs
Normal file
16
src/parser/bold_parser.rs
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
//! Text between asterisks to make it bold.
|
||||||
|
use super::failable_sequence::failable_sequence;
|
||||||
|
use super::nom_context::NomContext;
|
||||||
|
use super::text::bold_end;
|
||||||
|
use super::text::bold_start;
|
||||||
|
use super::text_element_parser::text_element;
|
||||||
|
use nom::branch::alt;
|
||||||
|
use nom::combinator::map;
|
||||||
|
use nom::combinator::not;
|
||||||
|
use nom::combinator::recognize;
|
||||||
|
use nom::error::VerboseError;
|
||||||
|
use nom::sequence::tuple;
|
||||||
|
|
||||||
|
// Sequence
|
||||||
|
|
||||||
|
failable_sequence!(bold, i, context, bold_start, text_element, bold_end);
|
52
src/parser/failable_sequence.rs
Normal file
52
src/parser/failable_sequence.rs
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
macro_rules! failable_sequence {
|
||||||
|
($name:ident,$inp:ident,$context:ident,$begin_matcher:expr,$element_matcher:expr,$success_matcher:expr) => {
|
||||||
|
pub fn $name<'b, F>(
|
||||||
|
$context: &'b NomContext<F>,
|
||||||
|
) -> impl for<'a> FnMut(
|
||||||
|
&'a str,
|
||||||
|
) -> nom::IResult<
|
||||||
|
&'a str,
|
||||||
|
crate::parser::text::Sequence<'a>,
|
||||||
|
VerboseError<&'a str>,
|
||||||
|
> + 'b
|
||||||
|
where
|
||||||
|
F: for<'a> nom::Parser<&'a str, &'a str, VerboseError<&'a str>>,
|
||||||
|
{
|
||||||
|
let fail_matcher = $context.fail_matcher.clone();
|
||||||
|
let new_fail_matcher = alt((
|
||||||
|
|i| fail_matcher.borrow_mut().parse(i),
|
||||||
|
recognize($success_matcher),
|
||||||
|
));
|
||||||
|
|
||||||
|
move |$inp: &str| {
|
||||||
|
let new_context = $context.with_no_bold();
|
||||||
|
// let other_new_context = NomContext::with_additional_fail_matcher(
|
||||||
|
// |i: &str| recognize($success_matcher)(i),
|
||||||
|
// $context,
|
||||||
|
// );
|
||||||
|
let other_new_context = super::nom_context::NomContext::new(new_fail_matcher);
|
||||||
|
let element_matcher = recognize($element_matcher(&new_context));
|
||||||
|
let local_fail_matcher = $context.fail_matcher.clone();
|
||||||
|
let ret = map(
|
||||||
|
recognize(tuple((
|
||||||
|
$begin_matcher,
|
||||||
|
nom::multi::many_till(
|
||||||
|
nom::sequence::preceded(
|
||||||
|
not(|i| local_fail_matcher.borrow_mut().parse(i)),
|
||||||
|
element_matcher,
|
||||||
|
),
|
||||||
|
nom::sequence::preceded(
|
||||||
|
not(|i| local_fail_matcher.borrow_mut().parse(i)),
|
||||||
|
$success_matcher,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
))),
|
||||||
|
|s: &str| crate::parser::text::Sequence { contents: s },
|
||||||
|
)($inp)?;
|
||||||
|
Ok(ret)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) use failable_sequence;
|
7
src/parser/mod.rs
Normal file
7
src/parser/mod.rs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
mod bold_parser;
|
||||||
|
mod failable_sequence;
|
||||||
|
mod nom_context;
|
||||||
|
mod parser_with_context;
|
||||||
|
mod text;
|
||||||
|
mod text_element_parser;
|
||||||
|
pub use text::paragraph;
|
34
src/parser/nom_context.rs
Normal file
34
src/parser/nom_context.rs
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
use nom::error::VerboseError;
|
||||||
|
use nom::Parser;
|
||||||
|
use std::cell::RefCell;
|
||||||
|
use std::rc::Rc;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct NomContext<F> {
|
||||||
|
pub fail_matcher: Rc<RefCell<F>>,
|
||||||
|
|
||||||
|
/// You can't have nested bolds in org-mode
|
||||||
|
pub can_match_bold: bool,
|
||||||
|
pub can_match_link: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F> NomContext<F>
|
||||||
|
where
|
||||||
|
F: for<'a> Parser<&'a str, &'a str, VerboseError<&'a str>>,
|
||||||
|
{
|
||||||
|
pub fn new(fail_matcher: F) -> Self {
|
||||||
|
NomContext {
|
||||||
|
fail_matcher: Rc::new(RefCell::new(fail_matcher)),
|
||||||
|
can_match_bold: true,
|
||||||
|
can_match_link: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_no_bold(&self) -> NomContext<F> {
|
||||||
|
NomContext {
|
||||||
|
fail_matcher: self.fail_matcher.clone(),
|
||||||
|
can_match_bold: false,
|
||||||
|
can_match_link: self.can_match_link,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
14
src/parser/parser_with_context.rs
Normal file
14
src/parser/parser_with_context.rs
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
macro_rules! parser_with_context {
|
||||||
|
($name:ident,$typ:ty,$inp:ident,$context:ident,$fnbody:block) => {
|
||||||
|
pub fn $name<F>(
|
||||||
|
$context: &NomContext<F>,
|
||||||
|
) -> impl for<'a> FnMut(&'a str) -> IResult<&'a str, $typ, VerboseError<&'a str>> + '_
|
||||||
|
where
|
||||||
|
F: for<'a> nom::Parser<&'a str, &'a str, VerboseError<&'a str>>,
|
||||||
|
{
|
||||||
|
|$inp: &str| $fnbody
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) use parser_with_context;
|
134
src/parser/text.rs
Normal file
134
src/parser/text.rs
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
hypothetical link:
|
||||||
|
fn link = many_till(text_element, link_end)
|
||||||
|
|
||||||
|
but what if you start a bold?
|
||||||
|
fn bold = many_till(text_element, bold_end) could eat the link_end
|
||||||
|
|
||||||
|
Do I pass along break-conditions? Passing link_end into bold's parser?
|
||||||
|
|
||||||
|
I'll try a very simple language first where asterisks always start/end bold and links are just between [ and ]. Paragraphs will have a blank line between them.
|
||||||
|
|
||||||
|
*/
|
||||||
|
use nom::bytes::complete::tag;
|
||||||
|
use nom::character::complete::alphanumeric1;
|
||||||
|
use nom::character::complete::line_ending;
|
||||||
|
use nom::character::complete::space1;
|
||||||
|
use nom::combinator::map;
|
||||||
|
use nom::combinator::recognize;
|
||||||
|
use nom::error::VerboseError;
|
||||||
|
use nom::multi::many_till;
|
||||||
|
use nom::sequence::tuple;
|
||||||
|
use nom::IResult;
|
||||||
|
|
||||||
|
pub type Res<T, U> = IResult<T, U, VerboseError<T>>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum TextElement<'a> {
|
||||||
|
Span(Span<'a>),
|
||||||
|
Space(Space<'a>),
|
||||||
|
LineBreak(LineBreak<'a>),
|
||||||
|
Symbol(Symbol<'a>),
|
||||||
|
Bold(Bold<'a>),
|
||||||
|
Link(Link<'a>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Span<'a> {
|
||||||
|
contents: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Space<'a> {
|
||||||
|
contents: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct LineBreak<'a> {
|
||||||
|
contents: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Symbol<'a> {
|
||||||
|
contents: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct BlankLine<'a> {
|
||||||
|
contents: Vec<TextElement<'a>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Sequence<'a> {
|
||||||
|
pub contents: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Bold<'a> {
|
||||||
|
pub contents: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Link<'a> {
|
||||||
|
contents: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn line_break(input: &str) -> Res<&str, LineBreak> {
|
||||||
|
map(line_ending, |s: &str| LineBreak { contents: s })(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn space(input: &str) -> Res<&str, Space> {
|
||||||
|
map(space1, |s: &str| Space { contents: s })(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn span(input: &str) -> Res<&str, Span> {
|
||||||
|
map(alphanumeric1, |s: &str| Span { contents: s })(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn symbol(symbol_tag: &'static str) -> impl for<'a> Fn(&'a str) -> Res<&'a str, Symbol<'a>> {
|
||||||
|
move |i: &str| map(tag(symbol_tag), |s: &str| Symbol { contents: s })(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A line containing only whitespace and then a line break
|
||||||
|
///
|
||||||
|
/// It is up to the caller to ensure this is called at the start of a line.
|
||||||
|
fn blank_line(input: &str) -> Res<&str, BlankLine> {
|
||||||
|
map(
|
||||||
|
many_till(
|
||||||
|
map(space, TextElement::Space),
|
||||||
|
map(line_break, TextElement::LineBreak),
|
||||||
|
),
|
||||||
|
|(mut whitespace, end_of_line)| {
|
||||||
|
whitespace.push(end_of_line);
|
||||||
|
BlankLine {
|
||||||
|
contents: whitespace,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bold_start(input: &str) -> Res<&str, TextElement> {
|
||||||
|
map(symbol("*"), TextElement::Symbol)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bold_end(input: &str) -> Res<&str, TextElement> {
|
||||||
|
map(symbol("*"), TextElement::Symbol)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn link_start(input: &str) -> Res<&str, TextElement> {
|
||||||
|
map(symbol("["), TextElement::Symbol)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn link_end(input: &str) -> Res<&str, TextElement> {
|
||||||
|
map(symbol("]"), TextElement::Symbol)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn paragraph(input: &str) -> Res<&str, (Vec<TextElement>, &str)> {
|
||||||
|
todo!()
|
||||||
|
// many_till(TextElementParser::new(paragraph_end), paragraph_end)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn paragraph_end(input: &str) -> Res<&str, &str> {
|
||||||
|
recognize(tuple((map(line_break, TextElement::LineBreak), blank_line)))(input)
|
||||||
|
}
|
33
src/parser/text_element_parser.rs
Normal file
33
src/parser/text_element_parser.rs
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
//! A single element of text.
|
||||||
|
use super::nom_context::NomContext;
|
||||||
|
use super::parser_with_context::parser_with_context;
|
||||||
|
use super::text::line_break;
|
||||||
|
use super::text::space;
|
||||||
|
use super::text::span;
|
||||||
|
use super::text::symbol;
|
||||||
|
use super::text::TextElement;
|
||||||
|
use nom::branch::alt;
|
||||||
|
use nom::combinator::map;
|
||||||
|
use nom::combinator::not;
|
||||||
|
use nom::error::VerboseError;
|
||||||
|
use nom::IResult;
|
||||||
|
|
||||||
|
parser_with_context!(text_element, TextElement, i, context, {
|
||||||
|
not(|i| context.fail_matcher.borrow_mut().parse(i))(i)?;
|
||||||
|
alt((
|
||||||
|
// map(
|
||||||
|
// BoldParser::new(slf.context.fail_matcher.clone()),
|
||||||
|
// TextElement::Bold,
|
||||||
|
// ),
|
||||||
|
// map(
|
||||||
|
// LinkParser::new(slf.context.fail_matcher.clone()),
|
||||||
|
// TextElement::Link,
|
||||||
|
// ),
|
||||||
|
map(span, TextElement::Span),
|
||||||
|
map(symbol("*"), TextElement::Symbol),
|
||||||
|
map(symbol("["), TextElement::Symbol),
|
||||||
|
map(symbol("]"), TextElement::Symbol),
|
||||||
|
map(space, TextElement::Space),
|
||||||
|
map(line_break, TextElement::LineBreak),
|
||||||
|
))(i)
|
||||||
|
});
|
11
toy_language.txt
Normal file
11
toy_language.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
prologue *goes here* I guess *bold
|
||||||
|
text*
|
||||||
|
|
||||||
|
I guess *regular
|
||||||
|
|
||||||
|
text*
|
||||||
|
|
||||||
|
[foo *bar] baz* car
|
||||||
|
|
||||||
|
|
||||||
|
*nesting *bold entrances* and* exits
|
Loading…
Reference in New Issue
Block a user