Initial setup for the parser.
This commit is contained in:
commit
ee9e6297a6
3
.dockerignore
Normal file
3
.dockerignore
Normal file
@ -0,0 +1,3 @@
|
||||
**/.git
|
||||
target
|
||||
Cargo.lock
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
/target
|
||||
Cargo.lock
|
17
Cargo.toml
Normal file
17
Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "toy"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "toy"
|
||||
path = "src/main.rs"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
log = "0.4.17"
|
||||
nom = "7.1.1"
|
||||
pretty_env_logger = "0.4.0"
|
||||
|
||||
[features]
|
13
rustfmt.toml
Normal file
13
rustfmt.toml
Normal file
@ -0,0 +1,13 @@
|
||||
imports_granularity = "Item"
|
||||
|
||||
# In rustfmt 2.0 I will want to adjust these settings.
|
||||
#
|
||||
# max_width controls the max length of a line before rustfmt gives up
|
||||
# but that also scales the length of a bunch of other lines
|
||||
# automaticaly due to width_heuristics. I want to find a way to enable
|
||||
# rustfmt to work on longer lines when necessary without making my
|
||||
# regular code too wide.
|
||||
#
|
||||
# max_width = 100
|
||||
# error_on_line_overflow = true
|
||||
# width_heuristics = "Off"
|
14
src/main.rs
Normal file
14
src/main.rs
Normal file
@ -0,0 +1,14 @@
|
||||
use nom::multi::many1;
|
||||
|
||||
use crate::parser::paragraph;
|
||||
|
||||
mod parser;
|
||||
|
||||
const TEST_DOC: &'static str = include_str!("../toy_language.txt");
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
pretty_env_logger::init();
|
||||
println!("{}\n\n\n", TEST_DOC);
|
||||
println!("{:#?}", many1(paragraph)(TEST_DOC));
|
||||
Ok(())
|
||||
}
|
16
src/parser/bold_parser.rs
Normal file
16
src/parser/bold_parser.rs
Normal file
@ -0,0 +1,16 @@
|
||||
//! Text between asterisks to make it bold.
|
||||
use super::failable_sequence::failable_sequence;
|
||||
use super::nom_context::NomContext;
|
||||
use super::text::bold_end;
|
||||
use super::text::bold_start;
|
||||
use super::text_element_parser::text_element;
|
||||
use nom::branch::alt;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::not;
|
||||
use nom::combinator::recognize;
|
||||
use nom::error::VerboseError;
|
||||
use nom::sequence::tuple;
|
||||
|
||||
// Sequence
|
||||
|
||||
failable_sequence!(bold, i, context, bold_start, text_element, bold_end);
|
52
src/parser/failable_sequence.rs
Normal file
52
src/parser/failable_sequence.rs
Normal file
@ -0,0 +1,52 @@
|
||||
macro_rules! failable_sequence {
|
||||
($name:ident,$inp:ident,$context:ident,$begin_matcher:expr,$element_matcher:expr,$success_matcher:expr) => {
|
||||
pub fn $name<'b, F>(
|
||||
$context: &'b NomContext<F>,
|
||||
) -> impl for<'a> FnMut(
|
||||
&'a str,
|
||||
) -> nom::IResult<
|
||||
&'a str,
|
||||
crate::parser::text::Sequence<'a>,
|
||||
VerboseError<&'a str>,
|
||||
> + 'b
|
||||
where
|
||||
F: for<'a> nom::Parser<&'a str, &'a str, VerboseError<&'a str>>,
|
||||
{
|
||||
let fail_matcher = $context.fail_matcher.clone();
|
||||
let new_fail_matcher = alt((
|
||||
|i| fail_matcher.borrow_mut().parse(i),
|
||||
recognize($success_matcher),
|
||||
));
|
||||
|
||||
move |$inp: &str| {
|
||||
let new_context = $context.with_no_bold();
|
||||
// let other_new_context = NomContext::with_additional_fail_matcher(
|
||||
// |i: &str| recognize($success_matcher)(i),
|
||||
// $context,
|
||||
// );
|
||||
let other_new_context = super::nom_context::NomContext::new(new_fail_matcher);
|
||||
let element_matcher = recognize($element_matcher(&new_context));
|
||||
let local_fail_matcher = $context.fail_matcher.clone();
|
||||
let ret = map(
|
||||
recognize(tuple((
|
||||
$begin_matcher,
|
||||
nom::multi::many_till(
|
||||
nom::sequence::preceded(
|
||||
not(|i| local_fail_matcher.borrow_mut().parse(i)),
|
||||
element_matcher,
|
||||
),
|
||||
nom::sequence::preceded(
|
||||
not(|i| local_fail_matcher.borrow_mut().parse(i)),
|
||||
$success_matcher,
|
||||
),
|
||||
),
|
||||
))),
|
||||
|s: &str| crate::parser::text::Sequence { contents: s },
|
||||
)($inp)?;
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) use failable_sequence;
|
7
src/parser/mod.rs
Normal file
7
src/parser/mod.rs
Normal file
@ -0,0 +1,7 @@
|
||||
mod bold_parser;
|
||||
mod failable_sequence;
|
||||
mod nom_context;
|
||||
mod parser_with_context;
|
||||
mod text;
|
||||
mod text_element_parser;
|
||||
pub use text::paragraph;
|
34
src/parser/nom_context.rs
Normal file
34
src/parser/nom_context.rs
Normal file
@ -0,0 +1,34 @@
|
||||
use nom::error::VerboseError;
|
||||
use nom::Parser;
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct NomContext<F> {
|
||||
pub fail_matcher: Rc<RefCell<F>>,
|
||||
|
||||
/// You can't have nested bolds in org-mode
|
||||
pub can_match_bold: bool,
|
||||
pub can_match_link: bool,
|
||||
}
|
||||
|
||||
impl<F> NomContext<F>
|
||||
where
|
||||
F: for<'a> Parser<&'a str, &'a str, VerboseError<&'a str>>,
|
||||
{
|
||||
pub fn new(fail_matcher: F) -> Self {
|
||||
NomContext {
|
||||
fail_matcher: Rc::new(RefCell::new(fail_matcher)),
|
||||
can_match_bold: true,
|
||||
can_match_link: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_no_bold(&self) -> NomContext<F> {
|
||||
NomContext {
|
||||
fail_matcher: self.fail_matcher.clone(),
|
||||
can_match_bold: false,
|
||||
can_match_link: self.can_match_link,
|
||||
}
|
||||
}
|
||||
}
|
14
src/parser/parser_with_context.rs
Normal file
14
src/parser/parser_with_context.rs
Normal file
@ -0,0 +1,14 @@
|
||||
macro_rules! parser_with_context {
|
||||
($name:ident,$typ:ty,$inp:ident,$context:ident,$fnbody:block) => {
|
||||
pub fn $name<F>(
|
||||
$context: &NomContext<F>,
|
||||
) -> impl for<'a> FnMut(&'a str) -> IResult<&'a str, $typ, VerboseError<&'a str>> + '_
|
||||
where
|
||||
F: for<'a> nom::Parser<&'a str, &'a str, VerboseError<&'a str>>,
|
||||
{
|
||||
|$inp: &str| $fnbody
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) use parser_with_context;
|
134
src/parser/text.rs
Normal file
134
src/parser/text.rs
Normal file
@ -0,0 +1,134 @@
|
||||
/*
|
||||
|
||||
hypothetical link:
|
||||
fn link = many_till(text_element, link_end)
|
||||
|
||||
but what if you start a bold?
|
||||
fn bold = many_till(text_element, bold_end) could eat the link_end
|
||||
|
||||
Do I pass along break-conditions? Passing link_end into bold's parser?
|
||||
|
||||
I'll try a very simple language first where asterisks always start/end bold and links are just between [ and ]. Paragraphs will have a blank line between them.
|
||||
|
||||
*/
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::alphanumeric1;
|
||||
use nom::character::complete::line_ending;
|
||||
use nom::character::complete::space1;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::recognize;
|
||||
use nom::error::VerboseError;
|
||||
use nom::multi::many_till;
|
||||
use nom::sequence::tuple;
|
||||
use nom::IResult;
|
||||
|
||||
pub type Res<T, U> = IResult<T, U, VerboseError<T>>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TextElement<'a> {
|
||||
Span(Span<'a>),
|
||||
Space(Space<'a>),
|
||||
LineBreak(LineBreak<'a>),
|
||||
Symbol(Symbol<'a>),
|
||||
Bold(Bold<'a>),
|
||||
Link(Link<'a>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Span<'a> {
|
||||
contents: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Space<'a> {
|
||||
contents: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LineBreak<'a> {
|
||||
contents: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Symbol<'a> {
|
||||
contents: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BlankLine<'a> {
|
||||
contents: Vec<TextElement<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Sequence<'a> {
|
||||
pub contents: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Bold<'a> {
|
||||
pub contents: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Link<'a> {
|
||||
contents: &'a str,
|
||||
}
|
||||
|
||||
pub fn line_break(input: &str) -> Res<&str, LineBreak> {
|
||||
map(line_ending, |s: &str| LineBreak { contents: s })(input)
|
||||
}
|
||||
|
||||
pub fn space(input: &str) -> Res<&str, Space> {
|
||||
map(space1, |s: &str| Space { contents: s })(input)
|
||||
}
|
||||
|
||||
pub fn span(input: &str) -> Res<&str, Span> {
|
||||
map(alphanumeric1, |s: &str| Span { contents: s })(input)
|
||||
}
|
||||
|
||||
pub fn symbol(symbol_tag: &'static str) -> impl for<'a> Fn(&'a str) -> Res<&'a str, Symbol<'a>> {
|
||||
move |i: &str| map(tag(symbol_tag), |s: &str| Symbol { contents: s })(i)
|
||||
}
|
||||
|
||||
/// A line containing only whitespace and then a line break
|
||||
///
|
||||
/// It is up to the caller to ensure this is called at the start of a line.
|
||||
fn blank_line(input: &str) -> Res<&str, BlankLine> {
|
||||
map(
|
||||
many_till(
|
||||
map(space, TextElement::Space),
|
||||
map(line_break, TextElement::LineBreak),
|
||||
),
|
||||
|(mut whitespace, end_of_line)| {
|
||||
whitespace.push(end_of_line);
|
||||
BlankLine {
|
||||
contents: whitespace,
|
||||
}
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
pub fn bold_start(input: &str) -> Res<&str, TextElement> {
|
||||
map(symbol("*"), TextElement::Symbol)(input)
|
||||
}
|
||||
|
||||
pub fn bold_end(input: &str) -> Res<&str, TextElement> {
|
||||
map(symbol("*"), TextElement::Symbol)(input)
|
||||
}
|
||||
|
||||
pub fn link_start(input: &str) -> Res<&str, TextElement> {
|
||||
map(symbol("["), TextElement::Symbol)(input)
|
||||
}
|
||||
|
||||
pub fn link_end(input: &str) -> Res<&str, TextElement> {
|
||||
map(symbol("]"), TextElement::Symbol)(input)
|
||||
}
|
||||
|
||||
pub fn paragraph(input: &str) -> Res<&str, (Vec<TextElement>, &str)> {
|
||||
todo!()
|
||||
// many_till(TextElementParser::new(paragraph_end), paragraph_end)(input)
|
||||
}
|
||||
|
||||
fn paragraph_end(input: &str) -> Res<&str, &str> {
|
||||
recognize(tuple((map(line_break, TextElement::LineBreak), blank_line)))(input)
|
||||
}
|
33
src/parser/text_element_parser.rs
Normal file
33
src/parser/text_element_parser.rs
Normal file
@ -0,0 +1,33 @@
|
||||
//! A single element of text.
|
||||
use super::nom_context::NomContext;
|
||||
use super::parser_with_context::parser_with_context;
|
||||
use super::text::line_break;
|
||||
use super::text::space;
|
||||
use super::text::span;
|
||||
use super::text::symbol;
|
||||
use super::text::TextElement;
|
||||
use nom::branch::alt;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::not;
|
||||
use nom::error::VerboseError;
|
||||
use nom::IResult;
|
||||
|
||||
parser_with_context!(text_element, TextElement, i, context, {
|
||||
not(|i| context.fail_matcher.borrow_mut().parse(i))(i)?;
|
||||
alt((
|
||||
// map(
|
||||
// BoldParser::new(slf.context.fail_matcher.clone()),
|
||||
// TextElement::Bold,
|
||||
// ),
|
||||
// map(
|
||||
// LinkParser::new(slf.context.fail_matcher.clone()),
|
||||
// TextElement::Link,
|
||||
// ),
|
||||
map(span, TextElement::Span),
|
||||
map(symbol("*"), TextElement::Symbol),
|
||||
map(symbol("["), TextElement::Symbol),
|
||||
map(symbol("]"), TextElement::Symbol),
|
||||
map(space, TextElement::Space),
|
||||
map(line_break, TextElement::LineBreak),
|
||||
))(i)
|
||||
});
|
11
toy_language.txt
Normal file
11
toy_language.txt
Normal file
@ -0,0 +1,11 @@
|
||||
prologue *goes here* I guess *bold
|
||||
text*
|
||||
|
||||
I guess *regular
|
||||
|
||||
text*
|
||||
|
||||
[foo *bar] baz* car
|
||||
|
||||
|
||||
*nesting *bold entrances* and* exits
|
Loading…
Reference in New Issue
Block a user