diff --git a/build.rs b/build.rs index 615d8a7..fe47ad4 100644 --- a/build.rs +++ b/build.rs @@ -2,6 +2,7 @@ use std::env; use std::fs::File; use std::io::Write; use std::path::Path; + use walkdir::WalkDir; fn main() { @@ -72,6 +73,14 @@ fn is_expect_fail(name: &str) -> Option<&str> { match name { "drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."), "element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."), + "element_container_priority_drawer_greater_block" => Some("Need to implement subscript."), + "element_container_priority_dynamic_block_greater_block" => Some("Need to implement subscript."), + "element_container_priority_footnote_definition_greater_block" => Some("Need to implement subscript."), + "element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."), + "element_container_priority_section_greater_block" => Some("Need to implement subscript."), + "keyword_affiliated_keyword" => Some("Need to implement link."), + "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), + "text_markup_opening_and_closing" => Some("Need to implement bold and link."), _ => None, } } diff --git a/org_mode_samples/element_container_priority/paragraph_drawer.org b/org_mode_samples/element_container_priority/paragraph_drawer.org index 574a2f2..717fc3d 100644 --- a/org_mode_samples/element_container_priority/paragraph_drawer.org +++ b/org_mode_samples/element_container_priority/paragraph_drawer.org @@ -2,4 +2,12 @@ foo :drawername: + + + + + + + + :end: diff --git a/org_mode_samples/text_markup/all_variants.org b/org_mode_samples/text_markup/all_variants.org new file mode 100644 index 0000000..caa226b --- /dev/null +++ b/org_mode_samples/text_markup/all_variants.org @@ -0,0 +1,6 @@ +*bold* +/italic/ +_underline_ +=verbatim= +~code~ ++strike-through+ diff --git a/org_mode_samples/text_markup/opening_and_closing.org b/org_mode_samples/text_markup/opening_and_closing.org new file mode 100644 index 0000000..fc07a3b --- /dev/null +++ b/org_mode_samples/text_markup/opening_and_closing.org @@ -0,0 +1,13 @@ +prologue *goes here* I guess *bold +text* + +bold*wont* start *or stop*when there is text outside it + +I guess *regular + +text* + +[[foo][foo *bar]] baz* car + + +*nesting *bold entrances* and* exits diff --git a/org_mode_samples/text_markup/simple.org b/org_mode_samples/text_markup/simple.org new file mode 100644 index 0000000..6c2c984 --- /dev/null +++ b/org_mode_samples/text_markup/simple.org @@ -0,0 +1 @@ +foo *bar* baz diff --git a/rustfmt.toml b/rustfmt.toml index da3d1e7..5795ff1 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,4 +1,5 @@ imports_granularity = "Item" +group_imports = "StdExternalCrate" # In rustfmt 2.0 I will want to adjust these settings. # diff --git a/scripts/run_integration_test.bash b/scripts/run_integration_test.bash index 9185c65..ca16183 100755 --- a/scripts/run_integration_test.bash +++ b/scripts/run_integration_test.bash @@ -13,9 +13,9 @@ function get_test_names { test_file_full_path=$(readlink -f "$test_file") relative_to_samples=$(realpath --relative-to "$samples_dir" "$test_file_full_path") without_extension="${relative_to_samples%.org}" - echo "${without_extension/\//_}" + echo "${without_extension/\//_}" | tr '[:upper:]' '[:lower:]' else - echo "$test_file" + echo "$test_file" | tr '[:upper:]' '[:lower:]' fi done } diff --git a/src/compare/diff.rs b/src/compare/diff.rs index b68ec67..1f13463 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,7 +1,9 @@ use super::util::assert_bounds; use super::util::assert_name; use crate::parser::sexp::Token; +use crate::parser::Bold; use crate::parser::Clock; +use crate::parser::Code; use crate::parser::Comment; use crate::parser::CommentBlock; use crate::parser::DiarySexp; @@ -17,18 +19,25 @@ use crate::parser::FootnoteDefinition; use crate::parser::GreaterBlock; use crate::parser::Heading; use crate::parser::HorizontalRule; +use crate::parser::Italic; use crate::parser::Keyword; use crate::parser::LatexEnvironment; +use crate::parser::Object; use crate::parser::Paragraph; use crate::parser::PlainList; use crate::parser::PlainListItem; +use crate::parser::PlainText; use crate::parser::Planning; use crate::parser::PropertyDrawer; +use crate::parser::RegularLink; use crate::parser::Section; use crate::parser::SrcBlock; +use crate::parser::StrikeThrough; use crate::parser::Table; use crate::parser::TableCell; use crate::parser::TableRow; +use crate::parser::Underline; +use crate::parser::Verbatim; use crate::parser::VerseBlock; #[derive(Debug)] @@ -64,7 +73,13 @@ impl DiffResult { DiffStatus::Bad => "BAD", } }; - println!("{}{} {}", " ".repeat(indentation), status_text, self.name); + println!( + "{}{} {} {}", + " ".repeat(indentation), + status_text, + self.name, + self.message.as_ref().map(|m| m.as_str()).unwrap_or("") + ); for child in self.children.iter() { child.print_indented(indentation + 1)?; } @@ -85,6 +100,53 @@ impl DiffResult { } } +fn compare_element<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Element<'s>, +) -> Result> { + match rust { + Element::Paragraph(obj) => compare_paragraph(source, emacs, obj), + Element::PlainList(obj) => compare_plain_list(source, emacs, obj), + Element::GreaterBlock(obj) => compare_greater_block(source, emacs, obj), + Element::DynamicBlock(obj) => compare_dynamic_block(source, emacs, obj), + Element::FootnoteDefinition(obj) => compare_footnote_definition(source, emacs, obj), + Element::Comment(obj) => compare_comment(source, emacs, obj), + Element::Drawer(obj) => compare_drawer(source, emacs, obj), + Element::PropertyDrawer(obj) => compare_property_drawer(source, emacs, obj), + Element::Table(obj) => compare_table(source, emacs, obj), + Element::VerseBlock(obj) => compare_verse_block(source, emacs, obj), + Element::CommentBlock(obj) => compare_comment_block(source, emacs, obj), + Element::ExampleBlock(obj) => compare_example_block(source, emacs, obj), + Element::ExportBlock(obj) => compare_export_block(source, emacs, obj), + Element::SrcBlock(obj) => compare_src_block(source, emacs, obj), + Element::Clock(obj) => compare_clock(source, emacs, obj), + Element::DiarySexp(obj) => compare_diary_sexp(source, emacs, obj), + Element::Planning(obj) => compare_planning(source, emacs, obj), + Element::FixedWidthArea(obj) => compare_fixed_width_area(source, emacs, obj), + Element::HorizontalRule(obj) => compare_horizontal_rule(source, emacs, obj), + Element::Keyword(obj) => compare_keyword(source, emacs, obj), + Element::LatexEnvironment(obj) => compare_latex_environment(source, emacs, obj), + } +} + +fn compare_object<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Object<'s>, +) -> Result> { + match rust { + Object::Bold(obj) => compare_bold(source, emacs, obj), + Object::Italic(obj) => compare_italic(source, emacs, obj), + Object::Underline(obj) => compare_underline(source, emacs, obj), + Object::Verbatim(obj) => compare_verbatim(source, emacs, obj), + Object::Code(obj) => compare_code(source, emacs, obj), + Object::StrikeThrough(obj) => compare_strike_through(source, emacs, obj), + Object::PlainText(obj) => compare_plain_text(source, emacs, obj), + Object::RegularLink(obj) => compare_regular_link(source, emacs, obj), + } +} + pub fn compare_document<'s>( emacs: &'s Token<'s>, rust: &'s Document<'s>, @@ -181,6 +243,22 @@ fn compare_heading<'s>( this_status = DiffStatus::Bad; } + let title = { + let children = emacs.as_list()?; + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let title = attributes_map + .get(":title") + .ok_or("Missing :title attribute."); + *title? + }; + for (emacs_child, rust_child) in title.as_list()?.iter().zip(rust.title.iter()) { + child_status.push(compare_object(source, emacs_child, rust_child)?); + } + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { match rust_child { DocumentElement::Heading(rust_heading) => { @@ -200,43 +278,13 @@ fn compare_heading<'s>( }) } -fn compare_element<'s>( - source: &'s str, - emacs: &'s Token<'s>, - rust: &'s Element<'s>, -) -> Result> { - match rust { - Element::Paragraph(obj) => compare_paragraph(source, emacs, obj), - Element::PlainList(obj) => compare_plain_list(source, emacs, obj), - Element::GreaterBlock(obj) => compare_greater_block(source, emacs, obj), - Element::DynamicBlock(obj) => compare_dynamic_block(source, emacs, obj), - Element::FootnoteDefinition(obj) => compare_footnote_definition(source, emacs, obj), - Element::Comment(obj) => compare_comment(source, emacs, obj), - Element::Drawer(obj) => compare_drawer(source, emacs, obj), - Element::PropertyDrawer(obj) => compare_property_drawer(source, emacs, obj), - Element::Table(obj) => compare_table(source, emacs, obj), - Element::VerseBlock(obj) => compare_verse_block(source, emacs, obj), - Element::CommentBlock(obj) => compare_comment_block(source, emacs, obj), - Element::ExampleBlock(obj) => compare_example_block(source, emacs, obj), - Element::ExportBlock(obj) => compare_export_block(source, emacs, obj), - Element::SrcBlock(obj) => compare_src_block(source, emacs, obj), - Element::Clock(obj) => compare_clock(source, emacs, obj), - Element::DiarySexp(obj) => compare_diary_sexp(source, emacs, obj), - Element::Planning(obj) => compare_planning(source, emacs, obj), - Element::FixedWidthArea(obj) => compare_fixed_width_area(source, emacs, obj), - Element::HorizontalRule(obj) => compare_horizontal_rule(source, emacs, obj), - Element::Keyword(obj) => compare_keyword(source, emacs, obj), - Element::LatexEnvironment(obj) => compare_latex_environment(source, emacs, obj), - } -} - fn compare_paragraph<'s>( source: &'s str, emacs: &'s Token<'s>, rust: &'s Paragraph<'s>, ) -> Result> { let children = emacs.as_list()?; - let child_status = Vec::new(); + let mut child_status = Vec::new(); let mut this_status = DiffStatus::Good; let emacs_name = "paragraph"; if assert_name(emacs, emacs_name).is_err() { @@ -247,7 +295,9 @@ fn compare_paragraph<'s>( this_status = DiffStatus::Bad; } - for (_emacs_child, _rust_child) in children.iter().skip(2).zip(rust.children.iter()) {} + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { + child_status.push(compare_object(source, emacs_child, rust_child)?); + } Ok(DiffResult { status: this_status, @@ -856,3 +906,179 @@ fn compare_latex_environment<'s>( children: child_status, }) } + +fn compare_plain_text<'s>( + _source: &'s str, + emacs: &'s Token<'s>, + rust: &'s PlainText<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let mut message = None; + let text = emacs.as_text()?; + let unquoted_text = text.unquote()?; + if unquoted_text != rust.source { + this_status = DiffStatus::Bad; + message = Some(format!( + "(emacs != rust) {:?} != {:?}", + unquoted_text, rust.source + )); + } + + Ok(DiffResult { + status: this_status, + name: "plain-text".to_owned(), + message, + children: Vec::new(), + }) +} + +fn compare_bold<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Bold<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "bold"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_italic<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Italic<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "italic"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_underline<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Underline<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "underline"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_verbatim<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Verbatim<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "verbatim"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_code<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Code<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "code"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_strike_through<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s StrikeThrough<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "strike-through"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_regular_link<'s>( + _source: &'s str, + _emacs: &'s Token<'s>, + _rust: &'s RegularLink<'s>, +) -> Result> { + Ok(DiffResult { + status: DiffStatus::Good, + name: "regular-link".to_owned(), + message: None, + children: Vec::new(), + }) +} diff --git a/src/main.rs b/src/main.rs index 0916f22..59a2cd1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,8 @@ #![feature(round_char_boundary)] +use ::organic::parser::document; + use crate::init_tracing::init_telemetry; use crate::init_tracing::shutdown_telemetry; -use ::organic::parser::document; mod init_tracing; const TEST_DOC: &'static str = include_str!("../toy_language.txt"); diff --git a/src/parser/clock.rs b/src/parser/clock.rs index 22d2e2c..361f114 100644 --- a/src/parser/clock.rs +++ b/src/parser/clock.rs @@ -15,7 +15,6 @@ use super::Context; use crate::error::Res; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::get_consumed; - use crate::parser::util::start_of_line; use crate::parser::Clock; diff --git a/src/parser/comment.rs b/src/parser/comment.rs index 15a5abc..b5ce097 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -1,6 +1,3 @@ -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag; @@ -16,11 +13,13 @@ use nom::sequence::tuple; use super::util::get_consumed; use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; use crate::parser::parser_context::ContextElement; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::immediate_in_section; - use crate::parser::util::start_of_line; use crate::parser::Comment; @@ -57,12 +56,11 @@ fn comment_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str #[cfg(test)] mod tests { + use super::*; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_with_context::parser_with_context; - use super::*; - #[test] fn require_space_after_hash() { let input = "# Comment line diff --git a/src/parser/diary_sexp.rs b/src/parser/diary_sexp.rs index 856648b..87f4e83 100644 --- a/src/parser/diary_sexp.rs +++ b/src/parser/diary_sexp.rs @@ -10,7 +10,6 @@ use super::sexp::sexp; use super::Context; use crate::error::Res; use crate::parser::util::get_consumed; - use crate::parser::util::start_of_line; use crate::parser::DiarySexp; diff --git a/src/parser/document.rs b/src/parser/document.rs index 853db11..9954fab 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,15 +1,3 @@ -use crate::error::Res; -use crate::parser::comment::comment; -use crate::parser::element_parser::element; -use crate::parser::exiting::ExitClass; -use crate::parser::object_parser::standard_set_object; -use crate::parser::parser_context::ContextElement; -use crate::parser::parser_context::ContextTree; -use crate::parser::parser_context::ExitMatcherNode; -use crate::parser::planning::planning; -use crate::parser::property_drawer::property_drawer; -use crate::parser::util::blank_line; -use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::line_ending; @@ -34,6 +22,18 @@ use super::util::exit_matcher_parser; use super::util::get_consumed; use super::util::start_of_line; use super::Context; +use crate::error::Res; +use crate::parser::comment::comment; +use crate::parser::element_parser::element; +use crate::parser::exiting::ExitClass; +use crate::parser::object_parser::standard_set_object; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ContextTree; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::planning::planning; +use crate::parser::property_drawer::property_drawer; +use crate::parser::util::blank_line; +use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; #[derive(Debug)] pub struct Document<'s> { diff --git a/src/parser/drawer.rs b/src/parser/drawer.rs index 9c93a94..69ebd4a 100644 --- a/src/parser/drawer.rs +++ b/src/parser/drawer.rs @@ -1,29 +1,29 @@ -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; use nom::branch::alt; use nom::bytes::complete::tag; use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while; use nom::character::complete::line_ending; use nom::character::complete::space0; -use nom::combinator::consumed; use nom::combinator::eof; +use nom::combinator::not; use nom::combinator::recognize; use nom::multi::many_till; use nom::sequence::tuple; use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; use crate::parser::element_parser::element; use crate::parser::exiting::ExitClass; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::source::SetSource; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::immediate_in_section; - use crate::parser::util::start_of_line; use crate::parser::util::WORD_CONSTITUENT_CHARACTERS; use crate::parser::Drawer; @@ -56,11 +56,18 @@ pub fn drawer<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, children) = match consumed(many_till(blank_line, exit_matcher))(remaining) { - Ok((remaining, (whitespace, (_children, _exit_contents)))) => ( - remaining, - vec![Element::Paragraph(Paragraph::of_text(whitespace))], - ), + let (remaining, children) = match tuple(( + not(exit_matcher), + blank_line, + many_till(blank_line, exit_matcher), + ))(remaining) + { + Ok((remain, (_not_immediate_exit, first_line, (_trailing_whitespace, _exit_contents)))) => { + let mut element = Element::Paragraph(Paragraph::of_text(first_line)); + let source = get_consumed(remaining, remain); + element.set_source(source); + (remain, vec![element]) + } Err(_) => { let (remaining, (children, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; diff --git a/src/parser/dynamic_block.rs b/src/parser/dynamic_block.rs index d523920..d9eac68 100644 --- a/src/parser/dynamic_block.rs +++ b/src/parser/dynamic_block.rs @@ -1,3 +1,16 @@ +use nom::branch::alt; +use nom::bytes::complete::is_not; +use nom::bytes::complete::tag_no_case; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::character::complete::space1; +use nom::combinator::eof; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::recognize; +use nom::multi::many_till; +use nom::sequence::tuple; + use super::Context; use crate::error::CustomError; use crate::error::MyError; @@ -9,25 +22,13 @@ use crate::parser::lesser_element::Paragraph; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::source::SetSource; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::immediate_in_section; - use crate::parser::util::start_of_line; use crate::parser::Element; -use nom::branch::alt; -use nom::bytes::complete::is_not; -use nom::bytes::complete::tag_no_case; -use nom::character::complete::line_ending; -use nom::character::complete::space0; -use nom::character::complete::space1; -use nom::combinator::consumed; -use nom::combinator::eof; -use nom::combinator::opt; -use nom::combinator::recognize; -use nom::multi::many_till; -use nom::sequence::tuple; #[tracing::instrument(ret, level = "debug")] pub fn dynamic_block<'r, 's>( @@ -61,11 +62,18 @@ pub fn dynamic_block<'r, 's>( }; let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, children) = match consumed(many_till(blank_line, exit_matcher))(remaining) { - Ok((remaining, (whitespace, (_children, _exit_contents)))) => ( - remaining, - vec![Element::Paragraph(Paragraph::of_text(whitespace))], - ), + let (remaining, children) = match tuple(( + not(exit_matcher), + blank_line, + many_till(blank_line, exit_matcher), + ))(remaining) + { + Ok((remain, (_not_immediate_exit, first_line, (_trailing_whitespace, _exit_contents)))) => { + let mut element = Element::Paragraph(Paragraph::of_text(first_line)); + let source = get_consumed(remaining, remain); + element.set_source(source); + (remain, vec![element]) + } Err(_) => { let (remaining, (children, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; diff --git a/src/parser/element_parser.rs b/src/parser/element_parser.rs index 626a117..0612a78 100644 --- a/src/parser/element_parser.rs +++ b/src/parser/element_parser.rs @@ -1,3 +1,7 @@ +use nom::branch::alt; +use nom::combinator::map; +use nom::multi::many0; + use super::clock::clock; use super::comment::comment; use super::diary_sexp::diary_sexp; @@ -20,84 +24,86 @@ use super::plain_list::plain_list; use super::source::SetSource; use super::util::get_consumed; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; - use super::Context; use crate::error::Res; use crate::parser::parser_with_context::parser_with_context; use crate::parser::table::org_mode_table; -use nom::branch::alt; -use nom::combinator::map; - -use nom::multi::many0; pub fn element( can_be_paragraph: bool, ) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, Element<'s>> { - move |context: Context, input: &str| { - let plain_list_matcher = parser_with_context!(plain_list)(context); - let greater_block_matcher = parser_with_context!(greater_block)(context); - let dynamic_block_matcher = parser_with_context!(dynamic_block)(context); - let footnote_definition_matcher = parser_with_context!(footnote_definition)(context); - let comment_matcher = parser_with_context!(comment)(context); - let drawer_matcher = parser_with_context!(drawer)(context); - let table_matcher = parser_with_context!(org_mode_table)(context); - let verse_block_matcher = parser_with_context!(verse_block)(context); - let comment_block_matcher = parser_with_context!(comment_block)(context); - let example_block_matcher = parser_with_context!(example_block)(context); - let export_block_matcher = parser_with_context!(export_block)(context); - let src_block_matcher = parser_with_context!(src_block)(context); - let clock_matcher = parser_with_context!(clock)(context); - let diary_sexp_matcher = parser_with_context!(diary_sexp)(context); - let fixed_width_area_matcher = parser_with_context!(fixed_width_area)(context); - let horizontal_rule_matcher = parser_with_context!(horizontal_rule)(context); - let keyword_matcher = parser_with_context!(keyword)(context); - let paragraph_matcher = parser_with_context!(paragraph)(context); - let latex_environment_matcher = parser_with_context!(latex_environment)(context); - - let (remaining, mut affiliated_keywords) = many0(keyword_matcher)(input)?; - let (remaining, mut element) = match alt(( - map(plain_list_matcher, Element::PlainList), - map(greater_block_matcher, Element::GreaterBlock), - map(dynamic_block_matcher, Element::DynamicBlock), - map(footnote_definition_matcher, Element::FootnoteDefinition), - map(comment_matcher, Element::Comment), - map(drawer_matcher, Element::Drawer), - map(table_matcher, Element::Table), - map(verse_block_matcher, Element::VerseBlock), - map(comment_block_matcher, Element::CommentBlock), - map(example_block_matcher, Element::ExampleBlock), - map(export_block_matcher, Element::ExportBlock), - map(src_block_matcher, Element::SrcBlock), - map(clock_matcher, Element::Clock), - map(diary_sexp_matcher, Element::DiarySexp), - map(fixed_width_area_matcher, Element::FixedWidthArea), - map(horizontal_rule_matcher, Element::HorizontalRule), - map(latex_environment_matcher, Element::LatexEnvironment), - ))(remaining) - { - the_ok @ Ok(_) => the_ok, - Err(_) => { - if can_be_paragraph { - match map(paragraph_matcher, Element::Paragraph)(remaining) { - the_ok @ Ok(_) => the_ok, - Err(_) => { - affiliated_keywords.clear(); - map(keyword_matcher, Element::Keyword)(input) - } - } - } else { - affiliated_keywords.clear(); - map(keyword_matcher, Element::Keyword)(input) - } - } - }?; - - let (remaining, _trailing_ws) = - maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; - - let source = get_consumed(input, remaining); - element.set_source(source); - - Ok((remaining, element)) - } + move |context: Context, input: &str| _element(context, input, can_be_paragraph) +} + +#[tracing::instrument(ret, level = "debug")] +fn _element<'r, 's>( + context: Context<'r, 's>, + input: &'s str, + can_be_paragraph: bool, +) -> Res<&'s str, Element<'s>> { + let plain_list_matcher = parser_with_context!(plain_list)(context); + let greater_block_matcher = parser_with_context!(greater_block)(context); + let dynamic_block_matcher = parser_with_context!(dynamic_block)(context); + let footnote_definition_matcher = parser_with_context!(footnote_definition)(context); + let comment_matcher = parser_with_context!(comment)(context); + let drawer_matcher = parser_with_context!(drawer)(context); + let table_matcher = parser_with_context!(org_mode_table)(context); + let verse_block_matcher = parser_with_context!(verse_block)(context); + let comment_block_matcher = parser_with_context!(comment_block)(context); + let example_block_matcher = parser_with_context!(example_block)(context); + let export_block_matcher = parser_with_context!(export_block)(context); + let src_block_matcher = parser_with_context!(src_block)(context); + let clock_matcher = parser_with_context!(clock)(context); + let diary_sexp_matcher = parser_with_context!(diary_sexp)(context); + let fixed_width_area_matcher = parser_with_context!(fixed_width_area)(context); + let horizontal_rule_matcher = parser_with_context!(horizontal_rule)(context); + let keyword_matcher = parser_with_context!(keyword)(context); + let paragraph_matcher = parser_with_context!(paragraph)(context); + let latex_environment_matcher = parser_with_context!(latex_environment)(context); + + let (remaining, mut affiliated_keywords) = many0(keyword_matcher)(input)?; + let (remaining, mut element) = match alt(( + map(plain_list_matcher, Element::PlainList), + map(greater_block_matcher, Element::GreaterBlock), + map(dynamic_block_matcher, Element::DynamicBlock), + map(footnote_definition_matcher, Element::FootnoteDefinition), + map(comment_matcher, Element::Comment), + map(drawer_matcher, Element::Drawer), + map(table_matcher, Element::Table), + map(verse_block_matcher, Element::VerseBlock), + map(comment_block_matcher, Element::CommentBlock), + map(example_block_matcher, Element::ExampleBlock), + map(export_block_matcher, Element::ExportBlock), + map(src_block_matcher, Element::SrcBlock), + map(clock_matcher, Element::Clock), + map(diary_sexp_matcher, Element::DiarySexp), + map(fixed_width_area_matcher, Element::FixedWidthArea), + map(horizontal_rule_matcher, Element::HorizontalRule), + map(latex_environment_matcher, Element::LatexEnvironment), + ))(remaining) + { + the_ok @ Ok(_) => the_ok, + Err(_) => { + if can_be_paragraph { + match map(paragraph_matcher, Element::Paragraph)(remaining) { + the_ok @ Ok(_) => the_ok, + Err(_) => { + affiliated_keywords.clear(); + map(keyword_matcher, Element::Keyword)(input) + } + } + } else { + affiliated_keywords.clear(); + map(keyword_matcher, Element::Keyword)(input) + } + } + }?; + + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; + + let source = get_consumed(input, remaining); + element.set_source(source); + + Ok((remaining, element)) } diff --git a/src/parser/fixed_width_area.rs b/src/parser/fixed_width_area.rs index 147ef87..293aa0d 100644 --- a/src/parser/fixed_width_area.rs +++ b/src/parser/fixed_width_area.rs @@ -16,7 +16,6 @@ use crate::error::Res; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; - use crate::parser::util::start_of_line; use crate::parser::FixedWidthArea; diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index 00f1077..833360e 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -1,3 +1,15 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::bytes::complete::tag_no_case; +use nom::bytes::complete::take_while; +use nom::character::complete::digit1; +use nom::character::complete::space0; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many1; +use nom::multi::many_till; +use nom::sequence::tuple; + use super::util::WORD_CONSTITUENT_CHARACTERS; use super::Context; use crate::error::CustomError; @@ -14,19 +26,7 @@ use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::immediate_in_section; use crate::parser::util::maybe_consume_trailing_whitespace; - use crate::parser::util::start_of_line; -use nom::branch::alt; -use nom::bytes::complete::tag; -use nom::bytes::complete::tag_no_case; -use nom::bytes::complete::take_while; -use nom::character::complete::digit1; -use nom::character::complete::space0; -use nom::combinator::recognize; -use nom::combinator::verify; -use nom::multi::many1; -use nom::multi::many_till; -use nom::sequence::tuple; #[tracing::instrument(ret, level = "debug")] pub fn footnote_definition<'r, 's>( @@ -107,13 +107,12 @@ fn footnote_definition_end<'r, 's>( #[cfg(test)] mod tests { + use super::*; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_with_context::parser_with_context; use crate::parser::Source; - use super::*; - #[test] fn two_paragraphs() { let input = "[fn:1] A footnote. diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index 8ca1fe1..4adb620 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -1,3 +1,16 @@ +use nom::branch::alt; +use nom::bytes::complete::is_not; +use nom::bytes::complete::tag_no_case; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::character::complete::space1; +use nom::combinator::eof; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::verify; +use nom::multi::many_till; +use nom::sequence::tuple; + use super::Context; use crate::error::CustomError; use crate::error::MyError; @@ -8,26 +21,14 @@ use crate::parser::greater_element::GreaterBlock; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::source::SetSource; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::immediate_in_section; - use crate::parser::util::start_of_line; use crate::parser::Element; use crate::parser::Paragraph; -use nom::branch::alt; -use nom::bytes::complete::is_not; -use nom::bytes::complete::tag_no_case; -use nom::character::complete::line_ending; -use nom::character::complete::space0; -use nom::character::complete::space1; -use nom::combinator::consumed; -use nom::combinator::eof; -use nom::combinator::opt; -use nom::combinator::verify; -use nom::multi::many_till; -use nom::sequence::tuple; #[tracing::instrument(ret, level = "debug")] pub fn greater_block<'r, 's>( @@ -72,11 +73,18 @@ pub fn greater_block<'r, 's>( let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); // Check for a completely empty block - let (remaining, children) = match consumed(many_till(blank_line, exit_matcher))(remaining) { - Ok((remaining, (whitespace, (_children, _exit_contents)))) => ( - remaining, - vec![Element::Paragraph(Paragraph::of_text(whitespace))], - ), + let (remaining, children) = match tuple(( + not(exit_matcher), + blank_line, + many_till(blank_line, exit_matcher), + ))(remaining) + { + Ok((remain, (_not_immediate_exit, first_line, (_trailing_whitespace, _exit_contents)))) => { + let mut element = Element::Paragraph(Paragraph::of_text(first_line)); + let source = get_consumed(remaining, remain); + element.set_source(source); + (remain, vec![element]) + } Err(_) => { let (remaining, (children, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; diff --git a/src/parser/latex_environment.rs b/src/parser/latex_environment.rs index 78d0209..a1353f7 100644 --- a/src/parser/latex_environment.rs +++ b/src/parser/latex_environment.rs @@ -60,16 +60,25 @@ fn latex_environment_end( ) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> { let current_name_lower = current_name.to_lowercase(); move |context: Context, input: &str| { - start_of_line(context, input)?; - let (remaining, _leading_whitespace) = space0(input)?; - let (remaining, (_begin, _name, _close_brace, _ws, _line_ending)) = tuple(( - tag_no_case(r#"\end{"#), - tag_no_case(current_name_lower.as_str()), - tag("}"), - space0, - alt((eof, line_ending)), - ))(remaining)?; - let source = get_consumed(input, remaining); - Ok((remaining, source)) + _latex_environment_end(context, input, current_name_lower.as_str()) } } + +#[tracing::instrument(ret, level = "debug")] +fn _latex_environment_end<'r, 's, 'x>( + context: Context<'r, 's>, + input: &'s str, + current_name_lower: &'x str, +) -> Res<&'s str, &'s str> { + start_of_line(context, input)?; + let (remaining, _leading_whitespace) = space0(input)?; + let (remaining, (_begin, _name, _close_brace, _ws, _line_ending)) = tuple(( + tag_no_case(r#"\end{"#), + tag_no_case(current_name_lower), + tag("}"), + space0, + alt((eof, line_ending)), + ))(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} diff --git a/src/parser/lesser_block.rs b/src/parser/lesser_block.rs index 764a556..ace7a8b 100644 --- a/src/parser/lesser_block.rs +++ b/src/parser/lesser_block.rs @@ -1,4 +1,3 @@ -use crate::error::Res; use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag_no_case; @@ -7,13 +6,13 @@ use nom::character::complete::space0; use nom::character::complete::space1; use nom::combinator::consumed; use nom::combinator::eof; -use nom::combinator::map; use nom::combinator::opt; use nom::combinator::verify; use nom::multi::many_till; use nom::sequence::tuple; use super::Context; +use crate::error::Res; use crate::parser::exiting::ExitClass; use crate::parser::lesser_element::CommentBlock; use crate::parser::lesser_element::ExampleBlock; @@ -26,12 +25,11 @@ use crate::parser::object_parser::standard_set_object; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; -use crate::parser::plain_text::plain_text; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; - use crate::parser::util::start_of_line; +use crate::parser::util::text_until_exit; #[tracing::instrument(ret, level = "debug")] pub fn verse_block<'r, 's>( @@ -103,9 +101,7 @@ pub fn comment_block<'r, 's>( None => None, }; - let (remaining, contents) = map(parser_with_context!(plain_text)(&parser_context), |obj| { - obj.source - })(remaining)?; + let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let source = get_consumed(input, remaining); @@ -141,9 +137,7 @@ pub fn example_block<'r, 's>( None => None, }; - let (remaining, contents) = map(parser_with_context!(plain_text)(&parser_context), |obj| { - obj.source - })(remaining)?; + let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let source = get_consumed(input, remaining); @@ -180,9 +174,7 @@ pub fn export_block<'r, 's>( None => None, }; - let (remaining, contents) = map(parser_with_context!(plain_text)(&parser_context), |obj| { - obj.source - })(remaining)?; + let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let source = get_consumed(input, remaining); @@ -216,9 +208,7 @@ pub fn src_block<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st None => None, }; - let (remaining, contents) = map(parser_with_context!(plain_text)(&parser_context), |obj| { - obj.source - })(remaining)?; + let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; let source = get_consumed(input, remaining); diff --git a/src/parser/lesser_element.rs b/src/parser/lesser_element.rs index 9c9a67f..cac3adf 100644 --- a/src/parser/lesser_element.rs +++ b/src/parser/lesser_element.rs @@ -1,6 +1,6 @@ use super::object::Object; -use super::object::TextMarkup; use super::source::Source; +use super::PlainText; #[derive(Debug)] pub struct Paragraph<'s> { @@ -97,7 +97,7 @@ pub struct LatexEnvironment<'s> { impl<'s> Paragraph<'s> { pub fn of_text(input: &'s str) -> Self { let mut objects = Vec::with_capacity(1); - objects.push(Object::TextMarkup(TextMarkup { source: input })); + objects.push(Object::PlainText(PlainText { source: input })); Paragraph { source: input, children: objects, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 48c4cb6..0d5e2a6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -29,6 +29,7 @@ mod property_drawer; pub mod sexp; mod source; mod table; +mod text_markup; mod util; pub use document::document; pub use document::Document; @@ -60,5 +61,14 @@ pub use lesser_element::Planning; pub use lesser_element::SrcBlock; pub use lesser_element::TableCell; pub use lesser_element::VerseBlock; +pub use object::Bold; +pub use object::Code; +pub use object::Italic; +pub use object::Object; +pub use object::PlainText; +pub use object::RegularLink; +pub use object::StrikeThrough; +pub use object::Underline; +pub use object::Verbatim; pub use source::Source; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; diff --git a/src/parser/object.rs b/src/parser/object.rs index db0534e..5a3ec31 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -2,8 +2,12 @@ use super::source::Source; #[derive(Debug)] pub enum Object<'s> { - #[allow(dead_code)] - TextMarkup(TextMarkup<'s>), + Bold(Bold<'s>), + Italic(Italic<'s>), + Underline(Underline<'s>), + StrikeThrough(StrikeThrough<'s>), + Code(Code<'s>), + Verbatim(Verbatim<'s>), PlainText(PlainText<'s>), @@ -12,8 +16,39 @@ pub enum Object<'s> { } #[derive(Debug)] -pub struct TextMarkup<'s> { +pub struct Bold<'s> { pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct Italic<'s> { + pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct Underline<'s> { + pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct StrikeThrough<'s> { + pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct Code<'s> { + pub source: &'s str, + pub contents: &'s str, +} + +#[derive(Debug)] +pub struct Verbatim<'s> { + pub source: &'s str, + pub contents: &'s str, } #[derive(Debug)] @@ -29,9 +64,50 @@ pub struct RegularLink<'s> { impl<'s> Source<'s> for Object<'s> { fn get_source(&'s self) -> &'s str { match self { - Object::TextMarkup(obj) => obj.source, + Object::Bold(obj) => obj.source, + Object::Italic(obj) => obj.source, + Object::Underline(obj) => obj.source, + Object::StrikeThrough(obj) => obj.source, + Object::Code(obj) => obj.source, + Object::Verbatim(obj) => obj.source, Object::PlainText(obj) => obj.source, Object::RegularLink(obj) => obj.source, } } } + +impl<'s> Source<'s> for Bold<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Italic<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Underline<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for StrikeThrough<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Code<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Verbatim<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index a6ce342..a4ba457 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -1,12 +1,13 @@ -use crate::error::Res; +use nom::branch::alt; use nom::combinator::map; use nom::combinator::not; -use crate::parser::object::Object; - use super::parser_with_context::parser_with_context; use super::plain_text::plain_text; use super::Context; +use crate::error::Res; +use crate::parser::object::Object; +use crate::parser::text_markup::text_markup; #[tracing::instrument(ret, level = "debug")] pub fn standard_set_object<'r, 's>( @@ -16,9 +17,10 @@ pub fn standard_set_object<'r, 's>( // TODO: add entities, LaTeX fragments, export snippets, footnote references, citations (NOT citation references), inline babel calls, inline source blocks, line breaks, links, macros, targets and radio targets, statistics cookies, subscript and superscript, timestamps, and text markup. not(|i| context.check_exit_matcher(i))(input)?; - let plain_text_matcher = parser_with_context!(plain_text)(context); - - map(plain_text_matcher, Object::PlainText)(input) + alt(( + parser_with_context!(text_markup)(context), + map(parser_with_context!(plain_text)(context), Object::PlainText), + ))(input) } #[tracing::instrument(ret, level = "debug")] @@ -29,7 +31,17 @@ pub fn minimal_set_object<'r, 's>( // TODO: add text markup, entities, LaTeX fragments, superscripts and subscripts not(|i| context.check_exit_matcher(i))(input)?; - let plain_text_matcher = parser_with_context!(plain_text)(context); - - map(plain_text_matcher, Object::PlainText)(input) + alt(( + parser_with_context!(text_markup)(context), + map(parser_with_context!(plain_text)(context), Object::PlainText), + ))(input) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn any_object_except_plain_text<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Object<'s>> { + // Used for exit matchers so this does not check exit matcher condition. + alt((parser_with_context!(text_markup)(context),))(input) } diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index ba9c557..36ee0d8 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -1,5 +1,3 @@ -use crate::error::Res; -use crate::parser::element_parser::element; use nom::branch::alt; use nom::combinator::eof; use nom::combinator::recognize; @@ -8,20 +6,20 @@ use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; +use super::lesser_element::Paragraph; +use super::util::blank_line; +use super::util::get_consumed; +use super::Context; +use crate::error::Res; +use crate::parser::element_parser::element; use crate::parser::exiting::ExitClass; use crate::parser::object_parser::standard_set_object; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; - use crate::parser::util::start_of_line; -use super::lesser_element::Paragraph; -use super::util::blank_line; -use super::util::get_consumed; -use super::Context; - #[tracing::instrument(ret, level = "debug")] pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Paragraph<'s>> { let parser_context = diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 1943eda..232db31 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -1,14 +1,15 @@ -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; use std::rc::Rc; +use nom::combinator::eof; +use nom::IResult; + use super::list::List; use super::list::Node; use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; use crate::parser::exiting::ExitClass; -use nom::combinator::eof; -use nom::IResult; type Matcher = dyn for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str>; diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index d84b04a..b606d76 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -1,20 +1,3 @@ -use super::greater_element::PlainList; -use super::greater_element::PlainListItem; -use super::parser_with_context::parser_with_context; - -use super::util::non_whitespace_character; -use super::Context; -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; -use crate::parser::element_parser::element; -use crate::parser::exiting::ExitClass; -use crate::parser::parser_context::ContextElement; -use crate::parser::parser_context::ExitMatcherNode; -use crate::parser::util::blank_line; -use crate::parser::util::exit_matcher_parser; -use crate::parser::util::get_consumed; -use crate::parser::util::start_of_line; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::digit1; @@ -33,6 +16,23 @@ use nom::sequence::terminated; use nom::sequence::tuple; use tracing::span; +use super::greater_element::PlainList; +use super::greater_element::PlainListItem; +use super::parser_with_context::parser_with_context; +use super::util::non_whitespace_character; +use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; +use crate::parser::element_parser::element; +use crate::parser::exiting::ExitClass; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::util::blank_line; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::util::start_of_line; + #[tracing::instrument(ret, level = "debug")] pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> { let parser_context = context @@ -278,13 +278,12 @@ fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize #[cfg(test)] mod tests { + use super::*; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_with_context::parser_with_context; use crate::parser::Source; - use super::*; - #[test] fn plain_list_item_empty() { let input = "1."; diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 6b515aa..e65faaa 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -1,9 +1,16 @@ +use nom::combinator::not; +use nom::combinator::recognize; + use super::object::PlainText; use super::Context; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; -use nom::combinator::not; +use crate::parser::exiting::ExitClass; +use crate::parser::object_parser::any_object_except_plain_text; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; #[tracing::instrument(ret, level = "debug")] pub fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainText<'s>> { @@ -12,12 +19,17 @@ pub fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s "Zero input length to plain_text.", )))); } + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &plain_text_end, + })); let mut current_input = input.char_indices(); loop { match current_input.next() { Some((offset, _char)) => { let remaining = &input[offset..]; - let exit_matcher_status = not(|i| context.check_exit_matcher(i))(remaining); + let exit_matcher_status = not(|i| parser_context.check_exit_matcher(i))(remaining); if exit_matcher_status.is_err() { if offset == 0 { // If we're at the start of the input, then nothing is plain text, so fire an error for zero-length match. @@ -40,18 +52,22 @@ pub fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s } } +#[tracing::instrument(ret, level = "debug")] +fn plain_text_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + recognize(parser_with_context!(any_object_except_plain_text)(context))(input) +} + #[cfg(test)] mod tests { use nom::combinator::map; + use super::*; use crate::parser::object::Object; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_with_context::parser_with_context; use crate::parser::source::Source; - use super::*; - #[test] fn plain_text_simple() { let input = "foobarbaz"; diff --git a/src/parser/property_drawer.rs b/src/parser/property_drawer.rs index fb347f8..ff8edbb 100644 --- a/src/parser/property_drawer.rs +++ b/src/parser/property_drawer.rs @@ -1,6 +1,3 @@ -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag; @@ -16,6 +13,9 @@ use nom::multi::many_till; use nom::sequence::tuple; use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; use crate::parser::exiting::ExitClass; use crate::parser::greater_element::NodeProperty; use crate::parser::greater_element::PropertyDrawer; diff --git a/src/parser/sexp.rs b/src/parser/sexp.rs index ea4a5a6..fc97365 100644 --- a/src/parser/sexp.rs +++ b/src/parser/sexp.rs @@ -1,4 +1,3 @@ -use crate::error::Res; use std::collections::HashMap; use nom::branch::alt; @@ -17,6 +16,8 @@ use nom::sequence::delimited; use nom::sequence::preceded; use nom::sequence::tuple; +use crate::error::Res; + #[derive(Debug)] pub enum Token<'s> { Atom(&'s str), @@ -27,9 +28,48 @@ pub enum Token<'s> { #[derive(Debug)] pub struct TextWithProperties<'s> { #[allow(dead_code)] - text: &'s str, + pub text: &'s str, #[allow(dead_code)] - properties: Vec>, + pub properties: Vec>, +} + +impl<'s> TextWithProperties<'s> { + pub fn unquote(&self) -> Result> { + let mut out = String::with_capacity(self.text.len()); + if !self.text.starts_with(r#"""#) { + return Err("Quoted text does not start with quote.".into()); + } + if !self.text.ends_with(r#"""#) { + return Err("Quoted text does not end with quote.".into()); + } + let interior_text = &self.text[1..(self.text.len() - 1)]; + let mut state = ParseState::Normal; + for current_char in interior_text.chars().into_iter() { + state = match (state, current_char) { + (ParseState::Normal, '\\') => ParseState::Escape, + (ParseState::Normal, _) => { + out.push(current_char); + ParseState::Normal + } + (ParseState::Escape, 'n') => { + out.push('\n'); + ParseState::Normal + } + (ParseState::Escape, '\\') => { + out.push('\\'); + ParseState::Normal + } + _ => todo!(), + }; + } + + Ok(out) + } +} + +enum ParseState { + Normal, + Escape, } impl<'s> Token<'s> { @@ -47,6 +87,13 @@ impl<'s> Token<'s> { }?) } + pub fn as_text<'p>(&'p self) -> Result<&'p TextWithProperties<'s>, Box> { + Ok(match self { + Token::TextWithProperties(body) => Ok(body), + _ => Err(format!("wrong token type {:?}", self)), + }?) + } + pub fn as_map<'p>( &'p self, ) -> Result>, Box> { diff --git a/src/parser/table.rs b/src/parser/table.rs index 275e754..9c2025c 100644 --- a/src/parser/table.rs +++ b/src/parser/table.rs @@ -1,4 +1,3 @@ -use crate::error::Res; use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag; @@ -14,6 +13,7 @@ use nom::multi::many_till; use nom::sequence::tuple; use super::Context; +use crate::error::Res; use crate::parser::exiting::ExitClass; use crate::parser::greater_element::TableRow; use crate::parser::lesser_element::TableCell; @@ -24,7 +24,6 @@ use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; - use crate::parser::util::start_of_line; use crate::parser::Table; diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs new file mode 100644 index 0000000..1c36c51 --- /dev/null +++ b/src/parser/text_markup.rs @@ -0,0 +1,223 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::anychar; +use nom::character::complete::line_ending; +use nom::character::complete::multispace1; +use nom::character::complete::one_of; +use nom::character::complete::space0; +use nom::combinator::map; +use nom::combinator::not; +use nom::combinator::peek; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many_till; +use nom::sequence::terminated; + +use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; +use crate::parser::exiting::ExitClass; +use crate::parser::object_parser::standard_set_object; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::util::get_one_before; +use crate::parser::Bold; +use crate::parser::Code; +use crate::parser::Italic; +use crate::parser::Object; +use crate::parser::StrikeThrough; +use crate::parser::Underline; +use crate::parser::Verbatim; +use crate::parser::util::preceded_by_whitespace; + +#[tracing::instrument(ret, level = "debug")] +pub fn text_markup<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Object<'s>> { + alt(( + map(parser_with_context!(bold)(context), Object::Bold), + map(parser_with_context!(italic)(context), Object::Italic), + map(parser_with_context!(underline)(context), Object::Underline), + map( + parser_with_context!(strike_through)(context), + Object::StrikeThrough, + ), + map(parser_with_context!(verbatim)(context), Object::Verbatim), + map(parser_with_context!(code)(context), Object::Code), + ))(input) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn bold<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Bold<'s>> { + let text_markup_object_specialized = text_markup_object("*"); + let (remaining, children) = text_markup_object_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Bold { source, children })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn italic<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Italic<'s>> { + let text_markup_object_specialized = text_markup_object("/"); + let (remaining, children) = text_markup_object_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Italic { source, children })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn underline<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Underline<'s>> { + let text_markup_object_specialized = text_markup_object("_"); + let (remaining, children) = text_markup_object_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Underline { source, children })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn strike_through<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, StrikeThrough<'s>> { + let text_markup_object_specialized = text_markup_object("+"); + let (remaining, children) = text_markup_object_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, StrikeThrough { source, children })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn verbatim<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Verbatim<'s>> { + let text_markup_string_specialized = text_markup_string("="); + let (remaining, contents) = text_markup_string_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Verbatim { source, contents })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn code<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Code<'s>> { + let text_markup_string_specialized = text_markup_string("~"); + let (remaining, contents) = text_markup_string_specialized(context, input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Code { source, contents })) +} + +fn text_markup_object( + marker_symbol: &str, +) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, Vec>> { + let marker_symbol = marker_symbol.to_owned(); + move |context: Context, input: &str| _text_markup_object(context, input, marker_symbol.as_str()) +} + +#[tracing::instrument(ret, level = "debug")] +fn _text_markup_object<'r, 's, 'x>( + context: Context<'r, 's>, + input: &'s str, + marker_symbol: &'x str, +) -> Res<&'s str, Vec>> { + let (remaining, _) = pre(context, input)?; + let (remaining, open) = tag(marker_symbol)(remaining)?; + let (remaining, _peek_not_whitespace) = peek(not(multispace1))(remaining)?; + let text_markup_end_specialized = text_markup_end(open); + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &text_markup_end_specialized, + })); + + let (remaining, (children, _exit_contents)) = verify( + many_till( + parser_with_context!(standard_set_object)(&parser_context), + parser_with_context!(exit_matcher_parser)(&parser_context), + ), + |(children, _exit_contents)| !children.is_empty(), + )(remaining)?; + + // TODO: Sometimes its plain text, not objects + let (remaining, _close) = text_markup_end_specialized(context, remaining)?; + let (remaining, _trailing_whitespace) = space0(remaining)?; + Ok((remaining, children)) +} + +fn text_markup_string( + marker_symbol: &str, +) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> { + let marker_symbol = marker_symbol.to_owned(); + move |context: Context, input: &str| _text_markup_string(context, input, marker_symbol.as_str()) +} + +#[tracing::instrument(ret, level = "debug")] +fn _text_markup_string<'r, 's, 'x>( + context: Context<'r, 's>, + input: &'s str, + marker_symbol: &'x str, +) -> Res<&'s str, &'s str> { + let (remaining, _) = pre(context, input)?; + let (remaining, open) = tag(marker_symbol)(remaining)?; + let (remaining, _peek_not_whitespace) = peek(not(multispace1))(remaining)?; + let text_markup_end_specialized = text_markup_end(open); + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &text_markup_end_specialized, + })); + + let (remaining, contents) = recognize(verify( + many_till( + anychar, + parser_with_context!(exit_matcher_parser)(&parser_context), + ), + |(children, _exit_contents)| !children.is_empty(), + ))(remaining)?; + + // TODO: Sometimes its plain text, not objects + let (remaining, _close) = text_markup_end_specialized(context, remaining)?; + let (remaining, _trailing_whitespace) = space0(remaining)?; + Ok((remaining, contents)) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + // If None, we are at the start of the file which is technically the beginning of a line. + None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(') + | Some('{') | Some('\'') | Some('"') => {} + Some(_) => { + // Not at start of line, cannot be a heading + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not a valid pre character for text markup.", + )))); + } + }; + Ok((input, ())) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn post<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let (remaining, _) = alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"")), line_ending))(input)?; + Ok((remaining, ())) +} + +fn text_markup_end( + marker_symbol: &str, +) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> { + let marker_symbol = marker_symbol.to_owned(); + move |context: Context, input: &str| _text_markup_end(context, input, marker_symbol.as_str()) +} + +#[tracing::instrument(ret, level = "debug")] +fn _text_markup_end<'r, 's, 'x>( + context: Context<'r, 's>, + input: &'s str, + marker_symbol: &'x str, +) -> Res<&'s str, &'s str> { + not(parser_with_context!(preceded_by_whitespace)(context))(input)?; + let (remaining, _marker) = terminated( + tag(marker_symbol), + peek(parser_with_context!(post)(context)), + )(input)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} diff --git a/src/parser/util.rs b/src/parser/util.rs index b1ed386..fe1169e 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,11 +1,5 @@ -use crate::parser::parser_with_context::parser_with_context; - -use super::parser_context::ContextElement; -use super::Context; -use crate::error::CustomError; -use crate::error::MyError; -use crate::error::Res; use nom::branch::alt; +use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::multispace0; use nom::character::complete::none_of; @@ -15,9 +9,18 @@ use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; +use nom::combinator::verify; use nom::multi::many0; +use nom::multi::many_till; use nom::sequence::tuple; +use super::parser_context::ContextElement; +use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; +use crate::parser::parser_with_context::parser_with_context; + pub const WORD_CONSTITUENT_CHARACTERS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; @@ -148,6 +151,28 @@ pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&' Ok((input, ())) } +/// Check that we are at the start of a line +#[tracing::instrument(ret, level = "debug")] +pub fn preceded_by_whitespace<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + Some('\n') | Some('\r') | Some(' ') | Some('\t') => {} + // If None, we are at the start of the file which is not allowed + None | Some(_) => { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not preceded by whitespace.", + )))); + } + }; + Ok((input, ())) +} + /// Pull one non-whitespace character. /// /// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace. @@ -203,6 +228,14 @@ pub fn whitespace_eof(input: &str) -> Res<&str, &str> { recognize(tuple((multispace0, eof)))(input) } +#[tracing::instrument(ret, level = "debug")] +pub fn text_until_exit<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + recognize(verify( + many_till(anychar, parser_with_context!(exit_matcher_parser)(context)), + |(children, _exit_contents)| !children.is_empty(), + ))(input) +} + #[cfg(test)] mod tests { use super::*; diff --git a/toy_language.txt b/toy_language.txt index 2323b99..d990b43 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -1,8 +1 @@ -#+name: foo -#+caption: bar -#+caption: baz -[[file:lorem/ipsum.png]] - -#+name: cat -#+caption: dog -[[file:lorem/ipsum.png]] +foo *bar baz * lorem* ipsum