organic/src/parser/token.rs

113 lines
2.7 KiB
Rust
Raw Normal View History

2022-12-04 04:53:52 +00:00
#[derive(Debug)]
pub enum Token<'a> {
TextElement(TextElement<'a>),
Paragraph(Paragraph<'a>),
2022-12-04 04:53:52 +00:00
}
impl<'a> Into<Token<'a>> for TextElement<'a> {
fn into(self) -> Token<'a> {
Token::TextElement(self)
}
}
impl<'a> Into<Token<'a>> for Paragraph<'a> {
fn into(self) -> Token<'a> {
Token::Paragraph(self)
}
}
#[derive(Debug)]
pub enum TextElement<'a> {
Span(Span<'a>),
Space(Space<'a>),
LineBreak(LineBreak<'a>),
Symbol(Symbol<'a>),
Bold(Bold<'a>),
Link(Link<'a>),
}
#[derive(Debug)]
pub struct Span<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Space<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct LineBreak<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Symbol<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct BlankLine<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Sequence<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Bold<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Link<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Paragraph<'a> {
pub contents: Vec<TextElement<'a>>,
pub paragraph_end: &'a str,
}
pub trait Source<'a> {
fn get_source(&'a self) -> &'a str;
}
impl<'a> Source<'a> for TextElement<'a> {
fn get_source(&'a self) -> &'a str {
match self {
TextElement::Span(elem) => elem.contents,
TextElement::Space(elem) => elem.contents,
TextElement::LineBreak(elem) => elem.contents,
TextElement::Symbol(elem) => elem.contents,
TextElement::Bold(elem) => elem.contents,
TextElement::Link(elem) => elem.contents,
}
}
}
impl<'a> Source<'a> for Paragraph<'a> {
fn get_source(&'a self) -> &'a str {
if self.contents.is_empty() {
return self.paragraph_end;
}
// TODO: Is there a better way to do this? At a minimum I should be checking that the pointers are contiguous instead of blindly adding their lengths but maybe theres a good way in nom to get both the recognize() value and the parsed values so we can just store a &str to the source.
let start = self.contents[0].get_source().as_ptr();
let len = self
.contents
.iter()
.map(|text_element| text_element.get_source().len())
.sum::<usize>()
+ self.paragraph_end.len();
let full_source = unsafe {
let slice = std::slice::from_raw_parts(start, len);
std::str::from_utf8(slice)
.expect("A token should always be made with valid utf-8 source material.")
};
full_source
}
}