From f7de564deb0c310b487c895bbbb66ef7f802ecf5 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 18 Dec 2022 03:43:13 -0500 Subject: [PATCH] Add a source trait to ensure we can re-create the source document using the parsed objects. --- src/parser/token.rs | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/parser/token.rs b/src/parser/token.rs index 90e8d90..db1bf19 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -71,3 +71,42 @@ pub struct Paragraph<'a> { pub contents: Vec>, pub paragraph_end: &'a str, } + +pub trait Source<'a> { + fn get_source(&'a self) -> &'a str; +} + +impl<'a> Source<'a> for TextElement<'a> { + fn get_source(&'a self) -> &'a str { + match self { + TextElement::Span(elem) => elem.contents, + TextElement::Space(elem) => elem.contents, + TextElement::LineBreak(elem) => elem.contents, + TextElement::Symbol(elem) => elem.contents, + TextElement::Bold(elem) => elem.contents, + TextElement::Link(elem) => elem.contents, + } + } +} + +impl<'a> Source<'a> for Paragraph<'a> { + fn get_source(&'a self) -> &'a str { + if self.contents.is_empty() { + return self.paragraph_end; + } + // TODO: Is there a better way to do this? At a minimum I should be checking that the pointers are contiguous instead of blindly adding their lengths but maybe theres a good way in nom to get both the recognize() value and the parsed values so we can just store a &str to the source. + let start = self.contents[0].get_source().as_ptr(); + let len = self + .contents + .iter() + .map(|text_element| text_element.get_source().len()) + .sum::() + + self.paragraph_end.len(); + let full_source = unsafe { + let slice = std::slice::from_raw_parts(start, len); + std::str::from_utf8(slice) + .expect("A token should always be made with valid utf-8 source material.") + }; + full_source + } +}