diff --git a/build.rs b/build.rs index 6cd4838..98b7bc7 100644 --- a/build.rs +++ b/build.rs @@ -66,10 +66,6 @@ fn write_test(test_file: &mut File, test: &walkdir::DirEntry) { } #[cfg(feature = "compare")] -fn is_expect_fail(name: &str) -> Option<&str> { - match name { - "greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."), - "element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."), - _ => None, - } +fn is_expect_fail(_name: &str) -> Option<&str> { + None } diff --git a/org_mode_samples/bullshitium/fake_paragraph/broken_end.org b/org_mode_samples/bullshitium/fake_paragraph/broken_end.org new file mode 100644 index 0000000..c9c354a --- /dev/null +++ b/org_mode_samples/bullshitium/fake_paragraph/broken_end.org @@ -0,0 +1,3 @@ +foo +:end: +bar diff --git a/org_mode_samples/bullshitium/fake_paragraph/broken_end_isolated.org b/org_mode_samples/bullshitium/fake_paragraph/broken_end_isolated.org new file mode 100644 index 0000000..8209301 --- /dev/null +++ b/org_mode_samples/bullshitium/fake_paragraph/broken_end_isolated.org @@ -0,0 +1,2 @@ +foo +:end: diff --git a/src/parser/bullshitium.rs b/src/parser/bullshitium.rs new file mode 100644 index 0000000..4e562d7 --- /dev/null +++ b/src/parser/bullshitium.rs @@ -0,0 +1,165 @@ +use nom::branch::alt; +use nom::bytes::complete::tag_no_case; +use nom::character::complete::anychar; +use nom::character::complete::space0; +use nom::multi::many_till; +use nom::sequence::tuple; + +use super::paragraph::paragraph; +use super::util::maybe_consume_trailing_whitespace_if_not_exiting; +use super::util::org_line_ending; +use super::util::start_of_line; +use super::OrgSource; +use crate::context::bind_context; +use crate::context::RefContext; +use crate::error::CustomError; +use crate::error::Res; +use crate::parser::macros::element; +use crate::types::AffiliatedKeywords; +use crate::types::Object; +use crate::types::Paragraph; +use crate::types::PlainText; + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn bullshitium<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Paragraph<'s>> { + alt(( + bind_context!(broken_end, context), + bind_context!(broken_dynamic_block, context), + ))(input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn detect_bullshitium<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + element!(detect_broken_end, context, input); + element!(detect_broken_dynamic_block, context, input); + Err(nom::Err::Error(CustomError::Static("No bullshitium."))) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn broken_end<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Paragraph<'s>> { + start_of_line(input)?; + let (remaining, _) = space0(input)?; + let (remaining, _) = tag_no_case(":end:")(remaining)?; + let (lead_in_remaining, _) = tuple((space0, org_line_ending))(remaining)?; + if let Ok((remaining, mut paragraph)) = + paragraph(std::iter::empty(), lead_in_remaining, context, input) + { + match paragraph.children.first_mut() { + Some(Object::PlainText(plain_text)) => { + plain_text.source = input.get_until_end_of_str(plain_text.source).into(); + } + Some(obj) => { + panic!("Unhandled first object type inside bullshitium {:?}", obj); + } + None => { + unreachable!("Paragraph must have children."); + } + }; + Ok((remaining, paragraph)) + } else { + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, lead_in_remaining)?; + + Ok(( + remaining, + Paragraph { + source: input.get_until(remaining).into(), + affiliated_keywords: AffiliatedKeywords::default(), + children: vec![Object::PlainText(PlainText { + source: input.get_until(lead_in_remaining).into(), + })], + }, + )) + } +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(_context)) +)] +pub(crate) fn detect_broken_end<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + start_of_line(input)?; + let (remaining, _) = space0(input)?; + let (remaining, _) = tag_no_case(":end:")(remaining)?; + let (_remaining, _) = tuple((space0, org_line_ending))(remaining)?; + Ok((input, ())) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn broken_dynamic_block<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Paragraph<'s>> { + start_of_line(input)?; + let (remaining, _) = space0(input)?; + let (remaining, _) = tag_no_case("#+BEGIN:")(remaining)?; + let (lead_in_remaining, _) = many_till(anychar, org_line_ending)(remaining)?; + if let Ok((remaining, mut paragraph)) = + paragraph(std::iter::empty(), lead_in_remaining, context, input) + { + match paragraph.children.first_mut() { + Some(Object::PlainText(plain_text)) => { + plain_text.source = input.get_until_end_of_str(plain_text.source).into(); + } + Some(obj) => { + panic!("Unhandled first object type inside bullshitium {:?}", obj); + } + None => { + unreachable!("Paragraph must have children."); + } + }; + Ok((remaining, paragraph)) + } else { + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, lead_in_remaining)?; + + Ok(( + remaining, + Paragraph { + source: input.get_until(remaining).into(), + affiliated_keywords: AffiliatedKeywords::default(), + children: vec![Object::PlainText(PlainText { + source: input.get_until(lead_in_remaining).into(), + })], + }, + )) + } +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(_context)) +)] +pub(crate) fn detect_broken_dynamic_block<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + start_of_line(input)?; + let (remaining, _) = space0(input)?; + let (_remaining, _) = tag_no_case("#+BEGIN:")(remaining)?; + Ok((input, ())) +} diff --git a/src/parser/element_parser.rs b/src/parser/element_parser.rs index 92988db..774abd8 100644 --- a/src/parser/element_parser.rs +++ b/src/parser/element_parser.rs @@ -32,6 +32,8 @@ use crate::event_count::record_event; #[cfg(feature = "event_count")] use crate::event_count::EventType; use crate::parser::affiliated_keyword::affiliated_keywords; +use crate::parser::bullshitium::bullshitium; +use crate::parser::bullshitium::detect_bullshitium; use crate::parser::macros::ak_element; use crate::parser::macros::element; use crate::parser::table::org_mode_table; @@ -242,6 +244,9 @@ fn _element<'b, 'g, 'r, 's>( ); if can_be_paragraph { + // Fake paragraphs + element!(bullshitium, context, input, Element::Paragraph); + // Paragraph without affiliated keyword ak_element!( paragraph, @@ -319,6 +324,11 @@ fn _detect_element<'b, 'g, 'r, 's>( input ); + // Fake paragraphs + if !can_be_paragraph { + element!(detect_bullshitium, context, input); + } + if _element(context, input, can_be_paragraph).is_ok() { return Ok((input, ())); } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dff7f19..73e323e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,6 +1,7 @@ mod affiliated_keyword; mod angle_link; mod babel_call; +mod bullshitium; mod citation; mod citation_reference; mod clock; diff --git a/src/parser/org_source.rs b/src/parser/org_source.rs index be27b48..ca2d1a4 100644 --- a/src/parser/org_source.rs +++ b/src/parser/org_source.rs @@ -82,6 +82,15 @@ impl<'s> OrgSource<'s> { self.slice(..(other.end - self.start)) } + pub(crate) fn get_until_end_of_str(&self, other: &'s str) -> OrgSource<'s> { + let full_source_start = self.full_source.as_ptr() as usize; + let other_start = other.as_ptr() as usize - full_source_start; + let other_end = other_start + other.len(); + debug_assert!(other_start >= self.start); + debug_assert!(other_end <= self.end); + self.slice(..(other_end - self.start)) + } + pub(crate) fn get_start_of_line(&self) -> OrgSource<'s> { let skipped_text = self.text_since_line_break(); let mut bracket_depth = self.bracket_depth;