From b0b287cd479df057e398b92e14c8529e44d01abd Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 18 Oct 2023 11:57:39 -0400 Subject: [PATCH 1/7] Handle bullshitium for :end:. --- build.rs | 2 - src/parser/bullshitium.rs | 71 ++++++++++++++++++++++++++++++++++++ src/parser/element_parser.rs | 10 +++++ src/parser/mod.rs | 1 + src/parser/org_source.rs | 10 +++++ 5 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 src/parser/bullshitium.rs diff --git a/build.rs b/build.rs index 6cd4838..b64c6a9 100644 --- a/build.rs +++ b/build.rs @@ -68,8 +68,6 @@ fn write_test(test_file: &mut File, test: &walkdir::DirEntry) { #[cfg(feature = "compare")] fn is_expect_fail(name: &str) -> Option<&str> { match name { - "greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."), - "element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."), _ => None, } } diff --git a/src/parser/bullshitium.rs b/src/parser/bullshitium.rs new file mode 100644 index 0000000..f6982b6 --- /dev/null +++ b/src/parser/bullshitium.rs @@ -0,0 +1,71 @@ +use nom::bytes::complete::tag_no_case; +use nom::character::complete::space0; +use nom::sequence::tuple; + +use super::paragraph::paragraph; +use super::util::get_consumed; +use super::util::org_line_ending; +use super::util::start_of_line; +use super::OrgSource; +use crate::context::RefContext; +use crate::error::Res; +use crate::types::Object; +use crate::types::Paragraph; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn bullshitium<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Paragraph<'s>> { + broken_end(context, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn detect_bullshitium<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + bullshitium(context, input)?; + Ok((input, ())) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn broken_end<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Paragraph<'s>> { + start_of_line(input)?; + let (remaining, _) = space0(input)?; + let (remaining, _) = tag_no_case(":end:")(remaining)?; + let (lead_in_remaining, _) = tuple((space0, org_line_ending))(remaining)?; + if let Ok((remaining, mut paragraph)) = + paragraph(std::iter::empty(), lead_in_remaining, context, input) + { + match paragraph.children.first_mut() { + Some(Object::PlainText(plain_text)) => { + plain_text.source = input.get_until_end_of_str(plain_text.source).into(); + } + Some(obj) => { + panic!("Unhandled first object type inside bullshitium {:?}", obj); + } + None => { + unreachable!("Paragraph must have children."); + } + }; + Ok((remaining, paragraph)) + } else { + Ok(( + lead_in_remaining, + Paragraph::of_text(input.get_until(lead_in_remaining).into()), + )) + } +} diff --git a/src/parser/element_parser.rs b/src/parser/element_parser.rs index 92988db..774abd8 100644 --- a/src/parser/element_parser.rs +++ b/src/parser/element_parser.rs @@ -32,6 +32,8 @@ use crate::event_count::record_event; #[cfg(feature = "event_count")] use crate::event_count::EventType; use crate::parser::affiliated_keyword::affiliated_keywords; +use crate::parser::bullshitium::bullshitium; +use crate::parser::bullshitium::detect_bullshitium; use crate::parser::macros::ak_element; use crate::parser::macros::element; use crate::parser::table::org_mode_table; @@ -242,6 +244,9 @@ fn _element<'b, 'g, 'r, 's>( ); if can_be_paragraph { + // Fake paragraphs + element!(bullshitium, context, input, Element::Paragraph); + // Paragraph without affiliated keyword ak_element!( paragraph, @@ -319,6 +324,11 @@ fn _detect_element<'b, 'g, 'r, 's>( input ); + // Fake paragraphs + if !can_be_paragraph { + element!(detect_bullshitium, context, input); + } + if _element(context, input, can_be_paragraph).is_ok() { return Ok((input, ())); } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dff7f19..73e323e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,6 +1,7 @@ mod affiliated_keyword; mod angle_link; mod babel_call; +mod bullshitium; mod citation; mod citation_reference; mod clock; diff --git a/src/parser/org_source.rs b/src/parser/org_source.rs index be27b48..01dfcc3 100644 --- a/src/parser/org_source.rs +++ b/src/parser/org_source.rs @@ -82,6 +82,16 @@ impl<'s> OrgSource<'s> { self.slice(..(other.end - self.start)) } + pub(crate) fn get_until_end_of_str(&self, other: &'s str) -> OrgSource<'s> { + let full_source_start = self.full_source.as_ptr() as usize; + let full_found_end = full_source_start + self.full_source.len(); + let other_start = other.as_ptr() as usize - full_source_start; + let other_end = other_start + other.len(); + debug_assert!(other_start >= self.start); + debug_assert!(other_end <= self.end); + self.slice(..(other_end - self.start)) + } + pub(crate) fn get_start_of_line(&self) -> OrgSource<'s> { let skipped_text = self.text_since_line_break(); let mut bracket_depth = self.bracket_depth; From cf5d3ed7450229c699237c9f0dc4c5c0b7b43f77 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 18 Oct 2023 11:59:55 -0400 Subject: [PATCH 2/7] Add tests for the :end: bullshitium. --- org_mode_samples/bullshitium/fake_paragraph/broken_end.org | 3 +++ .../bullshitium/fake_paragraph/broken_end_isolated.org | 2 ++ src/parser/bullshitium.rs | 1 - src/parser/org_source.rs | 1 - 4 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 org_mode_samples/bullshitium/fake_paragraph/broken_end.org create mode 100644 org_mode_samples/bullshitium/fake_paragraph/broken_end_isolated.org diff --git a/org_mode_samples/bullshitium/fake_paragraph/broken_end.org b/org_mode_samples/bullshitium/fake_paragraph/broken_end.org new file mode 100644 index 0000000..c9c354a --- /dev/null +++ b/org_mode_samples/bullshitium/fake_paragraph/broken_end.org @@ -0,0 +1,3 @@ +foo +:end: +bar diff --git a/org_mode_samples/bullshitium/fake_paragraph/broken_end_isolated.org b/org_mode_samples/bullshitium/fake_paragraph/broken_end_isolated.org new file mode 100644 index 0000000..8209301 --- /dev/null +++ b/org_mode_samples/bullshitium/fake_paragraph/broken_end_isolated.org @@ -0,0 +1,2 @@ +foo +:end: diff --git a/src/parser/bullshitium.rs b/src/parser/bullshitium.rs index f6982b6..10f5694 100644 --- a/src/parser/bullshitium.rs +++ b/src/parser/bullshitium.rs @@ -3,7 +3,6 @@ use nom::character::complete::space0; use nom::sequence::tuple; use super::paragraph::paragraph; -use super::util::get_consumed; use super::util::org_line_ending; use super::util::start_of_line; use super::OrgSource; diff --git a/src/parser/org_source.rs b/src/parser/org_source.rs index 01dfcc3..ca2d1a4 100644 --- a/src/parser/org_source.rs +++ b/src/parser/org_source.rs @@ -84,7 +84,6 @@ impl<'s> OrgSource<'s> { pub(crate) fn get_until_end_of_str(&self, other: &'s str) -> OrgSource<'s> { let full_source_start = self.full_source.as_ptr() as usize; - let full_found_end = full_source_start + self.full_source.len(); let other_start = other.as_ptr() as usize - full_source_start; let other_end = other_start + other.len(); debug_assert!(other_start >= self.start); From 94dec311309e71cd2a5e41b72c507f560837cc2a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 18 Oct 2023 12:17:57 -0400 Subject: [PATCH 3/7] Consuming trailing whitespace for :end: bullshitium. --- src/parser/bullshitium.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/parser/bullshitium.rs b/src/parser/bullshitium.rs index 10f5694..3a7afc0 100644 --- a/src/parser/bullshitium.rs +++ b/src/parser/bullshitium.rs @@ -3,13 +3,16 @@ use nom::character::complete::space0; use nom::sequence::tuple; use super::paragraph::paragraph; +use super::util::maybe_consume_trailing_whitespace_if_not_exiting; use super::util::org_line_ending; use super::util::start_of_line; use super::OrgSource; use crate::context::RefContext; use crate::error::Res; +use crate::types::AffiliatedKeywords; use crate::types::Object; use crate::types::Paragraph; +use crate::types::PlainText; #[cfg_attr( feature = "tracing", @@ -62,9 +65,18 @@ pub(crate) fn broken_end<'b, 'g, 'r, 's>( }; Ok((remaining, paragraph)) } else { + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, lead_in_remaining)?; + Ok(( - lead_in_remaining, - Paragraph::of_text(input.get_until(lead_in_remaining).into()), + remaining, + Paragraph { + source: input.get_until(remaining).into(), + affiliated_keywords: AffiliatedKeywords::default(), + children: vec![Object::PlainText(PlainText { + source: input.get_until(lead_in_remaining).into(), + })], + }, )) } } From 353ff07420020b5c464dc00419ec195afbee9ee7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 18 Oct 2023 12:32:48 -0400 Subject: [PATCH 4/7] Handle bullshitium for broken dynamic blocks. --- src/parser/bullshitium.rs | 53 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/src/parser/bullshitium.rs b/src/parser/bullshitium.rs index 3a7afc0..ca6dda9 100644 --- a/src/parser/bullshitium.rs +++ b/src/parser/bullshitium.rs @@ -1,5 +1,8 @@ +use nom::branch::alt; use nom::bytes::complete::tag_no_case; +use nom::character::complete::anychar; use nom::character::complete::space0; +use nom::multi::many_till; use nom::sequence::tuple; use super::paragraph::paragraph; @@ -7,6 +10,7 @@ use super::util::maybe_consume_trailing_whitespace_if_not_exiting; use super::util::org_line_ending; use super::util::start_of_line; use super::OrgSource; +use crate::context::bind_context; use crate::context::RefContext; use crate::error::Res; use crate::types::AffiliatedKeywords; @@ -22,7 +26,10 @@ pub(crate) fn bullshitium<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Paragraph<'s>> { - broken_end(context, input) + alt(( + bind_context!(broken_end, context), + bind_context!(broken_dynamic_block, context), + ))(input) } #[cfg_attr( @@ -80,3 +87,47 @@ pub(crate) fn broken_end<'b, 'g, 'r, 's>( )) } } + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn broken_dynamic_block<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Paragraph<'s>> { + start_of_line(input)?; + let (remaining, _) = space0(input)?; + let (remaining, _) = tag_no_case("#+BEGIN:")(remaining)?; + let (lead_in_remaining, _) = many_till(anychar, org_line_ending)(remaining)?; + if let Ok((remaining, mut paragraph)) = + paragraph(std::iter::empty(), lead_in_remaining, context, input) + { + match paragraph.children.first_mut() { + Some(Object::PlainText(plain_text)) => { + plain_text.source = input.get_until_end_of_str(plain_text.source).into(); + } + Some(obj) => { + panic!("Unhandled first object type inside bullshitium {:?}", obj); + } + None => { + unreachable!("Paragraph must have children."); + } + }; + Ok((remaining, paragraph)) + } else { + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, lead_in_remaining)?; + + Ok(( + remaining, + Paragraph { + source: input.get_until(remaining).into(), + affiliated_keywords: AffiliatedKeywords::default(), + children: vec![Object::PlainText(PlainText { + source: input.get_until(lead_in_remaining).into(), + })], + }, + )) + } +} From e111b8b9b87fca73537dc961e58984cce57d3015 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 18 Oct 2023 12:36:06 -0400 Subject: [PATCH 5/7] Performance optimization. --- src/parser/bullshitium.rs | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/parser/bullshitium.rs b/src/parser/bullshitium.rs index ca6dda9..27151c0 100644 --- a/src/parser/bullshitium.rs +++ b/src/parser/bullshitium.rs @@ -12,7 +12,9 @@ use super::util::start_of_line; use super::OrgSource; use crate::context::bind_context; use crate::context::RefContext; +use crate::error::CustomError; use crate::error::Res; +use crate::parser::macros::element; use crate::types::AffiliatedKeywords; use crate::types::Object; use crate::types::Paragraph; @@ -40,8 +42,9 @@ pub(crate) fn detect_bullshitium<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { - bullshitium(context, input)?; - Ok((input, ())) + element!(detect_broken_end, context, input); + element!(detect_broken_dynamic_block, context, input); + Err(nom::Err::Error(CustomError::Static("No bullshitium."))) } #[cfg_attr( @@ -88,6 +91,21 @@ pub(crate) fn broken_end<'b, 'g, 'r, 's>( } } +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn detect_broken_end<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + start_of_line(input)?; + let (remaining, _) = space0(input)?; + let (remaining, _) = tag_no_case(":end:")(remaining)?; + let (_remaining, _) = tuple((space0, org_line_ending))(remaining)?; + Ok((input, ())) +} + #[cfg_attr( feature = "tracing", tracing::instrument(ret, level = "debug", skip(context)) @@ -131,3 +149,17 @@ pub(crate) fn broken_dynamic_block<'b, 'g, 'r, 's>( )) } } + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn detect_broken_dynamic_block<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + start_of_line(input)?; + let (remaining, _) = space0(input)?; + let (_remaining, _) = tag_no_case("#+BEGIN:")(remaining)?; + Ok((input, ())) +} From 269e23c1b1172676fa8b593e2f76f6cc380fd2f4 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 18 Oct 2023 12:41:12 -0400 Subject: [PATCH 6/7] No more expect-fail tests! --- build.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/build.rs b/build.rs index b64c6a9..98b7bc7 100644 --- a/build.rs +++ b/build.rs @@ -66,8 +66,6 @@ fn write_test(test_file: &mut File, test: &walkdir::DirEntry) { } #[cfg(feature = "compare")] -fn is_expect_fail(name: &str) -> Option<&str> { - match name { - _ => None, - } +fn is_expect_fail(_name: &str) -> Option<&str> { + None } From 68f3f2e1592b1b5400208e4852c6596ee248676f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 18 Oct 2023 12:42:09 -0400 Subject: [PATCH 7/7] Clippy fixes. --- src/parser/bullshitium.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/bullshitium.rs b/src/parser/bullshitium.rs index 27151c0..4e562d7 100644 --- a/src/parser/bullshitium.rs +++ b/src/parser/bullshitium.rs @@ -93,7 +93,7 @@ pub(crate) fn broken_end<'b, 'g, 'r, 's>( #[cfg_attr( feature = "tracing", - tracing::instrument(ret, level = "debug", skip(context)) + tracing::instrument(ret, level = "debug", skip(_context)) )] pub(crate) fn detect_broken_end<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, @@ -152,7 +152,7 @@ pub(crate) fn broken_dynamic_block<'b, 'g, 'r, 's>( #[cfg_attr( feature = "tracing", - tracing::instrument(ret, level = "debug", skip(context)) + tracing::instrument(ret, level = "debug", skip(_context)) )] pub(crate) fn detect_broken_dynamic_block<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>,