Compare commits

...

9 Commits

Author SHA1 Message Date
Tom Alexander
e608b73d1a
Implement all-token iteration.
Some checks failed
rust-test Build rust-test has failed
Radio targets are now being properly detected and they trigger re-parses but the tests do not yet pass.
2023-07-14 20:45:31 -04:00
Tom Alexander
b27f911ff3
Finish implementing token iteration. 2023-07-14 20:24:06 -04:00
Tom Alexander
08e6efe5f5
Filling in more of the iter_tokens tree. 2023-07-14 20:18:30 -04:00
Tom Alexander
0e73b83bf3
Filling in more of the iter_tokens tree. 2023-07-14 20:09:24 -04:00
Tom Alexander
793e560bd5
Boxing made it work. 2023-07-14 19:57:27 -04:00
Tom Alexander
0073af19e2
Running into an issue returning different iterators from the same function. 2023-07-14 19:54:41 -04:00
Tom Alexander
76187a0cb9
Enable radio_link_simple test.
This test does not yet pass, but this is goal-setting.
2023-07-14 19:11:51 -04:00
Tom Alexander
688779ba40
Fix tests. 2023-07-14 19:09:54 -04:00
Tom Alexander
bd04451d58
Implement the second parsing pass. 2023-07-14 19:06:58 -04:00
7 changed files with 206 additions and 39 deletions

View File

@ -80,7 +80,6 @@ fn is_expect_fail(name: &str) -> Option<&str> {
"element_container_priority_section_greater_block" => Some("Need to implement subscript."),
"paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
"radio_link_before_and_after" => Some("Matching the contents of radio targets not yet implemented."),
"radio_link_simple" => Some("Matching the contents of radio targets not yet implemented."),
"radio_link_identical_or_semantically_identical" => Some("Would require having the 2-pass parsing implemented."),
_ => None,
}

View File

@ -18,6 +18,7 @@ use super::element::Element;
use super::object::Object;
use super::parser_with_context::parser_with_context;
use super::source::Source;
use super::token::AllTokensIterator;
use super::token::Token;
use super::util::exit_matcher_parser;
use super::util::get_consumed;
@ -96,8 +97,35 @@ pub fn document(input: &str) -> Res<&str, Document> {
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let zeroth_section_matcher = parser_with_context!(zeroth_section)(&document_context);
let heading_matcher = parser_with_context!(heading)(&document_context);
let (remaining, document) = _document(&document_context, input)?;
{
// If there are radio targets in this document then we need to parse the entire document again with the knowledge of the radio targets.
let all_radio_targets: Vec<&Vec<Object<'_>>> = document
.iter_tokens()
.filter_map(|tkn| match tkn {
Token::Object(obj) => Some(obj),
_ => None,
})
.filter_map(|obj| match obj {
Object::RadioTarget(rt) => Some(rt),
_ => None,
})
.map(|rt| &rt.children)
.collect();
if !all_radio_targets.is_empty() {
let document_context = document_context
.with_additional_node(ContextElement::RadioTarget(all_radio_targets));
let (remaining, document) = _document(&document_context, input)?;
return Ok((remaining, document));
}
}
Ok((remaining, document))
}
#[tracing::instrument(ret, level = "debug")]
pub fn _document<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Document<'s>> {
let zeroth_section_matcher = parser_with_context!(zeroth_section)(context);
let heading_matcher = parser_with_context!(heading)(context);
let (remaining, _blank_lines) = many0(blank_line)(input)?;
let (remaining, zeroth_section) = opt(zeroth_section_matcher)(remaining)?;
let (remaining, children) = many0(heading_matcher)(remaining)?;
@ -259,28 +287,6 @@ fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s st
impl<'s> Document<'s> {
pub fn iter_tokens<'r>(&'r self) -> impl Iterator<Item = Token<'r, 's>> {
self.zeroth_section
.iter()
.map(Token::Section)
.chain(self.children.iter().map(Token::Heading))
}
}
impl<'s> Heading<'s> {
pub fn iter_tokens<'r>(&'r self) -> impl Iterator<Item = Token<'r, 's>> {
self.title.iter().map(Token::Object).chain(self.children.iter().map(
|de| {
match de {
DocumentElement::Heading(obj) => Token::Heading(obj),
DocumentElement::Section(obj) => Token::Section(obj),
}
}
))
}
}
impl<'s> Section<'s> {
pub fn iter_tokens<'r>(&'r self) -> impl Iterator<Item = Token<'r, 's>> {
self.children.iter().map(Token::Element)
AllTokensIterator::new(Token::Document(self))
}
}

View File

@ -20,6 +20,7 @@ use super::lesser_element::SrcBlock;
use super::lesser_element::VerseBlock;
use super::source::SetSource;
use super::source::Source;
use super::token::Token;
use super::Drawer;
#[derive(Debug)]

View File

@ -140,7 +140,7 @@ pub enum ContextElement<'r, 's> {
/// If any are found, this will force a 2nd parse through the
/// org-mode document since text needs to be re-parsed to look for
/// radio links matching the contents of radio targets.
RadioTarget(Vec<Vec<Object<'s>>>),
RadioTarget(Vec<&'r Vec<Object<'s>>>),
}
pub struct ExitMatcherNode<'r> {

View File

@ -131,12 +131,11 @@ mod tests {
#[test]
fn plain_text_radio_target() {
let input = "foo bar baz";
let radio_target_match = vec![Object::PlainText(PlainText { source: "bar" })];
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context = initial_context
.with_additional_node(ContextElement::DocumentRoot(input))
.with_additional_node(ContextElement::RadioTarget(vec![vec![Object::PlainText(
PlainText { source: "bar" },
)]]));
.with_additional_node(ContextElement::RadioTarget(vec![&radio_target_match]));
let paragraph_matcher = parser_with_context!(element(true))(&document_context);
let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph");
let first_paragraph = match first_paragraph {
@ -161,15 +160,14 @@ mod tests {
#[test]
fn bold_radio_target() {
let input = "foo *bar* baz";
let radio_target_match = vec![Object::Bold(Bold {
source: "*bar*",
children: vec![Object::PlainText(PlainText { source: "bar" })],
})];
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context = initial_context
.with_additional_node(ContextElement::DocumentRoot(input))
.with_additional_node(ContextElement::RadioTarget(vec![vec![Object::Bold(
Bold {
source: "*bar*",
children: vec![Object::PlainText(PlainText { source: "bar" })],
},
)]]));
.with_additional_node(ContextElement::RadioTarget(vec![&radio_target_match]));
let paragraph_matcher = parser_with_context!(element(true))(&document_context);
let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph");
let first_paragraph = match first_paragraph {

View File

@ -1,12 +1,177 @@
use std::collections::VecDeque;
use super::Document;
use super::Element;
use super::Heading;
use super::Object;
use super::PlainListItem;
use super::Section;
use super::TableCell;
use super::TableRow;
use crate::parser::DocumentElement;
pub enum Token<'r, 's> {
Document(&'r Document<'s>),
Heading(&'r Heading<'s>),
Section(&'r Section<'s>),
Object(&'r Object<'s>),
Element(&'r Element<'s>),
PlainListItem(&'r PlainListItem<'s>),
TableRow(&'r TableRow<'s>),
TableCell(&'r TableCell<'s>),
}
impl<'r, 's> Token<'r, 's> {
pub fn iter_tokens(&self) -> Box<dyn Iterator<Item = Token<'r, 's>> + '_> {
match self {
Token::Document(document) => Box::new(
document
.zeroth_section
.iter()
.map(Token::Section)
.chain(document.children.iter().map(Token::Heading)),
),
Token::Heading(heading) => Box::new(heading.title.iter().map(Token::Object).chain(
heading.children.iter().map(|de| match de {
DocumentElement::Heading(ref obj) => Token::Heading(obj),
DocumentElement::Section(ref obj) => Token::Section(obj),
}),
)),
Token::Section(section) => Box::new(section.children.iter().map(Token::Element)),
Token::Object(obj) => match obj {
Object::Bold(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::Italic(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::Underline(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::StrikeThrough(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::Code(_) => Box::new(std::iter::empty()),
Object::Verbatim(_) => Box::new(std::iter::empty()),
Object::PlainText(_) => Box::new(std::iter::empty()),
Object::RegularLink(_) => Box::new(std::iter::empty()),
Object::RadioLink(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::RadioTarget(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::PlainLink(_) => Box::new(std::iter::empty()),
Object::AngleLink(_) => Box::new(std::iter::empty()),
Object::OrgMacro(_) => Box::new(std::iter::empty()),
},
Token::Element(elem) => match elem {
Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)),
Element::PlainList(inner) => {
Box::new(inner.children.iter().map(Token::PlainListItem))
}
Element::GreaterBlock(inner) => Box::new(inner.children.iter().map(Token::Element)),
Element::DynamicBlock(inner) => Box::new(inner.children.iter().map(Token::Element)),
Element::FootnoteDefinition(inner) => {
Box::new(inner.children.iter().map(Token::Element))
}
Element::Comment(_) => Box::new(std::iter::empty()),
Element::Drawer(inner) => Box::new(inner.children.iter().map(Token::Element)),
Element::PropertyDrawer(_) => Box::new(std::iter::empty()),
Element::Table(inner) => Box::new(inner.children.iter().map(Token::TableRow)),
Element::VerseBlock(inner) => Box::new(inner.children.iter().map(Token::Object)),
Element::CommentBlock(_) => Box::new(std::iter::empty()),
Element::ExampleBlock(_) => Box::new(std::iter::empty()),
Element::ExportBlock(_) => Box::new(std::iter::empty()),
Element::SrcBlock(_) => Box::new(std::iter::empty()),
Element::Clock(_) => Box::new(std::iter::empty()),
Element::DiarySexp(_) => Box::new(std::iter::empty()),
Element::Planning(_) => Box::new(std::iter::empty()),
Element::FixedWidthArea(_) => Box::new(std::iter::empty()),
Element::HorizontalRule(_) => Box::new(std::iter::empty()),
Element::Keyword(_) => Box::new(std::iter::empty()),
Element::LatexEnvironment(_) => Box::new(std::iter::empty()),
},
Token::PlainListItem(elem) => Box::new(elem.children.iter().map(Token::Element)),
Token::TableRow(elem) => Box::new(elem.children.iter().map(Token::TableCell)),
Token::TableCell(elem) => Box::new(elem.children.iter().map(Token::Object)),
}
}
pub fn all_tokens_no_order(&self) -> Box<dyn Iterator<Item = Token<'r, 's>> + '_> {
match self {
Token::Document(document) => Box::new(
document
.zeroth_section
.iter()
.map(Token::Section)
.chain(document.children.iter().map(Token::Heading)),
),
Token::Heading(heading) => Box::new(heading.title.iter().map(Token::Object).chain(
heading.children.iter().map(|de| match de {
DocumentElement::Heading(ref obj) => Token::Heading(obj),
DocumentElement::Section(ref obj) => Token::Section(obj),
}),
)),
Token::Section(section) => Box::new(section.children.iter().map(Token::Element)),
Token::Object(obj) => match obj {
Object::Bold(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::Italic(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::Underline(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::StrikeThrough(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::Code(_) => Box::new(std::iter::empty()),
Object::Verbatim(_) => Box::new(std::iter::empty()),
Object::PlainText(_) => Box::new(std::iter::empty()),
Object::RegularLink(_) => Box::new(std::iter::empty()),
Object::RadioLink(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::RadioTarget(inner) => Box::new(inner.children.iter().map(Token::Object)),
Object::PlainLink(_) => Box::new(std::iter::empty()),
Object::AngleLink(_) => Box::new(std::iter::empty()),
Object::OrgMacro(_) => Box::new(std::iter::empty()),
},
Token::Element(elem) => match elem {
Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)),
Element::PlainList(inner) => {
Box::new(inner.children.iter().map(Token::PlainListItem))
}
Element::GreaterBlock(inner) => Box::new(inner.children.iter().map(Token::Element)),
Element::DynamicBlock(inner) => Box::new(inner.children.iter().map(Token::Element)),
Element::FootnoteDefinition(inner) => {
Box::new(inner.children.iter().map(Token::Element))
}
Element::Comment(_) => Box::new(std::iter::empty()),
Element::Drawer(inner) => Box::new(inner.children.iter().map(Token::Element)),
Element::PropertyDrawer(_) => Box::new(std::iter::empty()),
Element::Table(inner) => Box::new(inner.children.iter().map(Token::TableRow)),
Element::VerseBlock(inner) => Box::new(inner.children.iter().map(Token::Object)),
Element::CommentBlock(_) => Box::new(std::iter::empty()),
Element::ExampleBlock(_) => Box::new(std::iter::empty()),
Element::ExportBlock(_) => Box::new(std::iter::empty()),
Element::SrcBlock(_) => Box::new(std::iter::empty()),
Element::Clock(_) => Box::new(std::iter::empty()),
Element::DiarySexp(_) => Box::new(std::iter::empty()),
Element::Planning(_) => Box::new(std::iter::empty()),
Element::FixedWidthArea(_) => Box::new(std::iter::empty()),
Element::HorizontalRule(_) => Box::new(std::iter::empty()),
Element::Keyword(_) => Box::new(std::iter::empty()),
Element::LatexEnvironment(_) => Box::new(std::iter::empty()),
},
Token::PlainListItem(elem) => Box::new(elem.children.iter().map(Token::Element)),
Token::TableRow(elem) => Box::new(elem.children.iter().map(Token::TableCell)),
Token::TableCell(elem) => Box::new(elem.children.iter().map(Token::Object)),
}
}
}
pub struct AllTokensIterator<'r, 's> {
queued_tokens: VecDeque<Token<'r, 's>>,
}
impl<'r, 's> AllTokensIterator<'r, 's> {
pub fn new(tkn: Token<'r, 's>) -> Self {
let mut queued_tokens = VecDeque::new();
queued_tokens.push_back(tkn);
AllTokensIterator { queued_tokens }
}
}
impl<'r, 's> Iterator for AllTokensIterator<'r, 's> {
type Item = Token<'r, 's>;
fn next(&mut self) -> Option<Self::Item> {
let next_token = match self.queued_tokens.pop_front() {
Some(tkn) => tkn,
None => return None,
};
self.queued_tokens.extend(next_token.iter_tokens());
Some(next_token)
}
}

View File

@ -1,3 +1 @@
foo *bar /baz *lorem* ipsum/ dolar* alpha
foo *bar /baz _lorem_ ipsum/ dolar* alpha
foo <<<*bar* baz>>> lorem ipsum *bar* baz dolar.