diff --git a/org_mode_samples/document/empty.org b/org_mode_samples/document/empty.org new file mode 100644 index 0000000..e69de29 diff --git a/org_mode_samples/document/only_line_breaks.org b/org_mode_samples/document/only_line_breaks.org new file mode 100644 index 0000000..fd40910 --- /dev/null +++ b/org_mode_samples/document/only_line_breaks.org @@ -0,0 +1,4 @@ + + + + diff --git a/org_mode_samples/document/post_blank.org b/org_mode_samples/document/post_blank.org new file mode 100644 index 0000000..a473d61 --- /dev/null +++ b/org_mode_samples/document/post_blank.org @@ -0,0 +1,5 @@ +* foo + + + + diff --git a/org_mode_samples/greater_element/property_drawer/empty.org b/org_mode_samples/greater_element/property_drawer/empty.org index d86fd5b..cf5bb6c 100644 --- a/org_mode_samples/greater_element/property_drawer/empty.org +++ b/org_mode_samples/greater_element/property_drawer/empty.org @@ -1,3 +1,32 @@ +* Empty +:PROPERTIES: +:END: +* Single new line :PROPERTIES: +:END: +* Single line with spaces +:PROPERTIES: + +:END: +* Many lines, first line without spaces +:PROPERTIES: + + + + +:END: +* Many lines, first line with spaces +:PROPERTIES: + + + + +:END: +* Many lines, first line with spaces, later line with spaces +:PROPERTIES: + + + + :END: diff --git a/org_mode_samples/lesser_element/babel_call/simple.org b/org_mode_samples/lesser_element/babel_call/simple.org index bafcb01..27d102c 100644 --- a/org_mode_samples/lesser_element/babel_call/simple.org +++ b/org_mode_samples/lesser_element/babel_call/simple.org @@ -5,3 +5,5 @@ #+call: dolar cat(dog) #+call: (bat) + +#+call: diff --git a/org_mode_samples/lesser_element/fixed_width_area/blank_line_in_middle.org b/org_mode_samples/lesser_element/fixed_width_area/blank_line_in_middle.org new file mode 100644 index 0000000..589a196 --- /dev/null +++ b/org_mode_samples/lesser_element/fixed_width_area/blank_line_in_middle.org @@ -0,0 +1,3 @@ +: foo +: +: bar diff --git a/org_mode_samples/lesser_element/fixed_width_area/list_post_blank.org b/org_mode_samples/lesser_element/fixed_width_area/list_post_blank.org new file mode 100644 index 0000000..1d5bb9d --- /dev/null +++ b/org_mode_samples/lesser_element/fixed_width_area/list_post_blank.org @@ -0,0 +1,6 @@ +1. foo + #+begin_src text + + #+end_src + +2. baz diff --git a/org_mode_samples/object/target/simple.org b/org_mode_samples/object/target/simple.org new file mode 100644 index 0000000..2a5e00b --- /dev/null +++ b/org_mode_samples/object/target/simple.org @@ -0,0 +1,3 @@ +<> bar + +[[FOO][baz]] diff --git a/org_mode_samples/sections_and_headings/heading_with_subheading_post_blank.org b/org_mode_samples/sections_and_headings/heading_with_subheading_post_blank.org new file mode 100644 index 0000000..e60807f --- /dev/null +++ b/org_mode_samples/sections_and_headings/heading_with_subheading_post_blank.org @@ -0,0 +1,5 @@ +* foo + +** bar + +* baz diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 0dbf851..6475a05 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -57,7 +57,6 @@ use crate::types::FixedWidthArea; use crate::types::FootnoteDefinition; use crate::types::FootnoteReference; use crate::types::FootnoteReferenceType; -use crate::types::GetStandardProperties; use crate::types::Heading; use crate::types::HorizontalRule; use crate::types::Hour; @@ -413,7 +412,7 @@ pub(crate) fn compare_ast_node<'b, 's>( name: rust.get_elisp_fact().get_elisp_name(), message: Some(e.to_string()), children: Vec::new(), - rust_source: rust.get_standard_properties().get_source(), + rust_source: rust.get_source(), emacs_token: emacs, } .into() @@ -1576,7 +1575,7 @@ fn compare_example_block<'b, 's>( [], ( EmacsField::Required(":value"), - |r| Some(r.get_contents()), + |r| Some(r.get_value()), compare_property_quoted_string ), ( @@ -1654,7 +1653,7 @@ fn compare_export_block<'b, 's>( ), ( EmacsField::Required(":value"), - |r| Some(r.get_contents()), + |r| Some(r.get_value()), compare_property_quoted_string ) ) { @@ -1702,7 +1701,7 @@ fn compare_src_block<'b, 's>( ), ( EmacsField::Required(":value"), - |r| Some(r.get_contents()), + |r| Some(r.get_value()), compare_property_quoted_string ), ( diff --git a/src/compare/util.rs b/src/compare/util.rs index b7c09fb..9029b24 100644 --- a/src/compare/util.rs +++ b/src/compare/util.rs @@ -15,7 +15,6 @@ use crate::compare::sexp::unquote; use crate::types::AffiliatedKeywordValue; use crate::types::AstNode; use crate::types::GetAffiliatedKeywords; -use crate::types::GetStandardProperties; use crate::types::StandardProperties; /// Check if the child string slice is a slice of the parent string slice. @@ -30,32 +29,29 @@ fn is_slice_of(parent: &str, child: &str) -> bool { /// Get the byte offset into source that the rust object exists at. /// /// These offsets are zero-based unlike the elisp ones. -fn get_rust_byte_offsets<'b, 's, S: StandardProperties<'s> + ?Sized>( - original_document: &'s str, - rust_ast_node: &'b S, -) -> (usize, usize) { - let rust_object_source = rust_ast_node.get_source(); - debug_assert!(is_slice_of(original_document, rust_object_source)); - let offset = rust_object_source.as_ptr() as usize - original_document.as_ptr() as usize; - let end = offset + rust_object_source.len(); +fn get_rust_byte_offsets(original_document: &str, subset: &str) -> (usize, usize) { + debug_assert!(is_slice_of(original_document, subset)); + let offset = subset.as_ptr() as usize - original_document.as_ptr() as usize; + let end = offset + subset.len(); (offset, end) } pub(crate) fn compare_standard_properties< 'b, 's, - S: GetStandardProperties<'s> + GetElispFact<'s> + ?Sized, + S: StandardProperties<'s> + GetElispFact<'s> + ?Sized, >( original_document: &'s str, emacs: &'b Token<'s>, rust: &'b S, ) -> Result<(), Box> { assert_name(emacs, rust.get_elisp_fact().get_elisp_name())?; - assert_bounds(original_document, emacs, rust.get_standard_properties())?; + assert_bounds(original_document, emacs, rust)?; + assert_post_blank(emacs, rust)?; Ok(()) } -pub(crate) fn assert_name>( +fn assert_name>( emacs: &Token<'_>, name: S, ) -> Result<(), Box> { @@ -78,24 +74,72 @@ pub(crate) fn assert_name>( /// Assert that the character ranges defined by upstream org-mode's :standard-properties match the slices in Organic's StandardProperties. /// /// This does **not** handle plain text because plain text is a special case. -pub(crate) fn assert_bounds<'b, 's, S: StandardProperties<'s> + ?Sized>( +fn assert_bounds<'b, 's, S: StandardProperties<'s> + ?Sized>( original_document: &'s str, emacs: &'b Token<'s>, rust: &'b S, ) -> Result<(), Box> { let standard_properties = get_emacs_standard_properties(emacs)?; // 1-based - let (begin, end) = ( - standard_properties - .begin - .ok_or("Token should have a begin.")?, - standard_properties.end.ok_or("Token should have an end.")?, - ); - let (rust_begin, rust_end) = get_rust_byte_offsets(original_document, rust); // 0-based - let rust_begin_char_offset = original_document[..rust_begin].chars().count() + 1; // 1-based - let rust_end_char_offset = - rust_begin_char_offset + original_document[rust_begin..rust_end].chars().count(); // 1-based - if rust_begin_char_offset != begin || rust_end_char_offset != end { - Err(format!("Rust bounds (in chars) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin_char_offset, rust_end = rust_end_char_offset, emacs_begin=begin, emacs_end=end))?; + + // Check begin/end + { + let (begin, end) = ( + standard_properties + .begin + .ok_or("Token should have a begin.")?, + standard_properties.end.ok_or("Token should have an end.")?, + ); + let (rust_begin, rust_end) = get_rust_byte_offsets(original_document, rust.get_source()); // 0-based + let rust_begin_char_offset = original_document[..rust_begin].chars().count() + 1; // 1-based + let rust_end_char_offset = + rust_begin_char_offset + original_document[rust_begin..rust_end].chars().count(); // 1-based + if rust_begin_char_offset != begin || rust_end_char_offset != end { + Err(format!("Rust bounds (in chars) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin_char_offset, rust_end = rust_end_char_offset, emacs_begin=begin, emacs_end=end))?; + } + } + + // Check contents-begin/contents-end + { + if let Some(rust_contents) = rust.get_contents() { + let (begin, end) = ( + standard_properties + .contents_begin + .ok_or("Token should have a contents-begin.")?, + standard_properties + .contents_end + .ok_or("Token should have an contents-end.")?, + ); + let (rust_begin, rust_end) = get_rust_byte_offsets(original_document, rust_contents); // 0-based + let rust_begin_char_offset = original_document[..rust_begin].chars().count() + 1; // 1-based + let rust_end_char_offset = + rust_begin_char_offset + original_document[rust_begin..rust_end].chars().count(); // 1-based + if rust_begin_char_offset != begin || rust_end_char_offset != end { + Err(format!("Rust contents bounds (in chars) ({rust_begin}, {rust_end}) do not match emacs contents bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin_char_offset, rust_end = rust_end_char_offset, emacs_begin=begin, emacs_end=end))?; + } + } else if standard_properties.contents_begin.is_some() + || standard_properties.contents_end.is_some() + { + Err(format!("Rust contents is None but emacs contents bounds are ({emacs_begin:?}, {emacs_end:?})", emacs_begin=standard_properties.contents_begin, emacs_end=standard_properties.contents_end))?; + } + } + + Ok(()) +} + +/// Assert that the post blank matches between emacs and organic. +/// +/// This does **not** handle plain text because plain text is a special case. +fn assert_post_blank<'b, 's, S: StandardProperties<'s> + ?Sized>( + emacs: &'b Token<'s>, + rust: &'b S, +) -> Result<(), Box> { + let standard_properties = get_emacs_standard_properties(emacs)?; // 1-based + let rust_post_blank = rust.get_post_blank(); + let emacs_post_blank = standard_properties + .post_blank + .ok_or("Token should have a post-blank.")?; + if rust_post_blank as usize != emacs_post_blank { + Err(format!("Rust post-blank {rust_post_blank} does not match emacs post-blank ({emacs_post_blank})", rust_post_blank = rust_post_blank, emacs_post_blank = emacs_post_blank))?; } Ok(()) @@ -241,7 +285,7 @@ where pub(crate) fn compare_children<'b, 's, 'x, RC>( source: &'s str, emacs: &'b Token<'s>, - rust_children: &'x Vec, + rust_children: &'x [RC], child_status: &mut Vec>, this_status: &mut DiffStatus, message: &mut Option, diff --git a/src/event_count/database.rs b/src/event_count/database.rs index 1b763e0..c9adbcf 100644 --- a/src/event_count/database.rs +++ b/src/event_count/database.rs @@ -27,7 +27,7 @@ pub(crate) fn record_event(event_type: EventType, input: OrgSource<'_>) { pub fn report(original_document: &str) { let mut db = GLOBAL_DATA.lock().unwrap(); let db = db.get_or_insert_with(HashMap::new); - let mut results: Vec<_> = db.iter().map(|(k, v)| (k, v)).collect(); + let mut results: Vec<_> = db.iter().collect(); results.sort_by_key(|(_k, v)| *v); // This would put the most common at the top, but that is a pain when there is already a lot of output from the parser. // results.sort_by(|(_ak, av), (_bk, bv)| bv.cmp(av)); diff --git a/src/parser/angle_link.rs b/src/parser/angle_link.rs index 8cd21a7..444270b 100644 --- a/src/parser/angle_link.rs +++ b/src/parser/angle_link.rs @@ -47,7 +47,7 @@ pub(crate) fn angle_link<'b, 'g, 'r, 's>( parser_with_context!(parse_angle_link)(context), ))(remaining)?; let (remaining, _) = tag(">")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -59,6 +59,7 @@ pub(crate) fn angle_link<'b, 'g, 'r, 's>( raw_link: raw_link.into(), search_option: parsed_link.search_option, application: parsed_link.application, + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/babel_call.rs b/src/parser/babel_call.rs index fdfdac5..22b3925 100644 --- a/src/parser/babel_call.rs +++ b/src/parser/babel_call.rs @@ -4,7 +4,6 @@ use nom::bytes::complete::tag_no_case; use nom::character::complete::anychar; use nom::character::complete::one_of; use nom::character::complete::space0; -use nom::combinator::consumed; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; @@ -43,32 +42,10 @@ where start_of_line(remaining)?; let (remaining, _) = tuple((space0, tag("#+"), tag_no_case("call"), tag(":")))(remaining)?; - if let Ok((remaining, (_, line_break))) = tuple((space0, org_line_ending))(remaining) { - let (remaining, _trailing_ws) = - maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; - let source = get_consumed(input, remaining); - return Ok(( - remaining, - BabelCall { - source: Into::<&str>::into(source), - affiliated_keywords: parse_affiliated_keywords( - context.get_global_settings(), - affiliated_keywords, - ), - value: Into::<&str>::into(line_break.take(0)), - call: None, - inside_header: None, - arguments: None, - end_header: None, - }, - )); - } - let (remaining, _ws) = space0(remaining)?; - let (remaining, (value, babel_call_value)) = consumed(babel_call_value)(remaining)?; - let (remaining, _ws) = tuple((space0, org_line_ending))(remaining)?; + let (remaining, babel_call_value) = babel_call_value(remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -80,17 +57,22 @@ where context.get_global_settings(), affiliated_keywords, ), - value: Into::<&str>::into(value).trim_end(), + value: Into::<&str>::into(babel_call_value.value), call: babel_call_value.call.map(Into::<&str>::into), inside_header: babel_call_value.inside_header.map(Into::<&str>::into), arguments: babel_call_value.arguments.map(Into::<&str>::into), end_header: babel_call_value.end_header.map(Into::<&str>::into), + post_blank: post_blank.map(Into::<&str>::into), }, )) } #[derive(Debug)] struct BabelCallValue<'s> { + /// The entire string to the right of "#+call: " without the trailing line break. + value: OrgSource<'s>, + + /// The function name which may contain a line break if there are no headers/arguments. call: Option>, inside_header: Option>, arguments: Option>, @@ -99,13 +81,45 @@ struct BabelCallValue<'s> { #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn babel_call_value<'s>(input: OrgSource<'s>) -> Res, BabelCallValue<'s>> { - let (remaining, call) = opt(babel_call_call)(input)?; - let (remaining, inside_header) = opt(inside_header)(remaining)?; - let (remaining, arguments) = opt(arguments)(remaining)?; - let (remaining, end_header) = opt(end_header)(remaining)?; + alt(( + babel_call_value_without_headers, + babel_call_value_with_headers, + ))(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn babel_call_value_without_headers<'s>( + input: OrgSource<'s>, +) -> Res, BabelCallValue<'s>> { + let (remaining, value) = babel_call_call_with_headers(input)?; + let (remaining, _ws) = tuple((space0, org_line_ending))(remaining)?; + let call = get_consumed(input, remaining); Ok(( remaining, BabelCallValue { + value, + call: Some(call), + inside_header: None, + arguments: None, + end_header: None, + }, + )) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn babel_call_value_with_headers<'s>( + input: OrgSource<'s>, +) -> Res, BabelCallValue<'s>> { + let (remaining, call) = opt(babel_call_call_with_headers)(input)?; + let (remaining, inside_header) = opt(inside_header)(remaining)?; + let (remaining, arguments) = opt(arguments)(remaining)?; + let (remaining, end_header) = opt(end_header)(remaining)?; + let value = get_consumed(input, remaining); + let (remaining, _ws) = tuple((space0, org_line_ending))(remaining)?; + Ok(( + remaining, + BabelCallValue { + value, call, inside_header, arguments: arguments.flatten(), @@ -115,14 +129,15 @@ fn babel_call_value<'s>(input: OrgSource<'s>) -> Res, BabelCallVal } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn babel_call_call<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { +fn babel_call_call_with_headers<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + // When babel call contains no arguments or headers (for example: "#+call: lorem ipsum\n") then the trailing line break is part of the call. Otherwise, it is not. verify( recognize(many_till( anychar, - alt(( - peek(recognize(one_of("[("))), + peek(alt(( + recognize(one_of("[(")), recognize(tuple((space0, org_line_ending))), - )), + ))), )), |s| s.len() > 0, )(input) @@ -232,19 +247,3 @@ fn impl_balanced_bracket< }; Ok((remaining, contents)) } - -#[cfg(test)] -mod tests { - use nom::combinator::opt; - - use super::*; - - #[test] - fn simple_call() -> Result<(), Box> { - let input = OrgSource::new("()"); - let (remaining, call) = opt(babel_call_call)(input)?; - assert_eq!(Into::<&str>::into(remaining), "()"); - assert!(call.is_none()); - Ok(()) - } -} diff --git a/src/parser/bullshitium.rs b/src/parser/bullshitium.rs index 1757fcb..597d433 100644 --- a/src/parser/bullshitium.rs +++ b/src/parser/bullshitium.rs @@ -63,6 +63,7 @@ pub(crate) fn broken_end<'b, 'g, 'r, 's>( match paragraph.children.first_mut() { Some(Object::PlainText(plain_text)) => { plain_text.source = input.get_until_end_of_str(plain_text.source).into(); + paragraph.contents = Some(input.get_until_end_of_str(plain_text.source).into()); } Some(obj) => { panic!("Unhandled first object type inside bullshitium {:?}", obj); @@ -73,14 +74,18 @@ pub(crate) fn broken_end<'b, 'g, 'r, 's>( }; Ok((remaining, paragraph)) } else { - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, lead_in_remaining)?; + let body = Into::<&str>::into(input.get_until(lead_in_remaining)); + Ok(( remaining, Paragraph::of_text( input.get_until(remaining).into(), - input.get_until(lead_in_remaining).into(), + body, + if !body.is_empty() { Some(body) } else { None }, + post_blank.map(Into::<&str>::into), ), )) } @@ -119,6 +124,7 @@ pub(crate) fn broken_dynamic_block<'b, 'g, 'r, 's>( match paragraph.children.first_mut() { Some(Object::PlainText(plain_text)) => { plain_text.source = input.get_until_end_of_str(plain_text.source).into(); + paragraph.contents = Some(input.get_until_end_of_str(plain_text.source).into()); } Some(obj) => { panic!("Unhandled first object type inside bullshitium {:?}", obj); @@ -129,14 +135,18 @@ pub(crate) fn broken_dynamic_block<'b, 'g, 'r, 's>( }; Ok((remaining, paragraph)) } else { - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, lead_in_remaining)?; + let body = Into::<&str>::into(input.get_until(lead_in_remaining)); + Ok(( remaining, Paragraph::of_text( input.get_until(remaining).into(), - input.get_until(lead_in_remaining).into(), + body, + if !body.is_empty() { Some(body) } else { None }, + post_blank.map(Into::<&str>::into), ), )) } diff --git a/src/parser/citation.rs b/src/parser/citation.rs index e7f7afe..983048f 100644 --- a/src/parser/citation.rs +++ b/src/parser/citation.rs @@ -46,16 +46,22 @@ pub(crate) fn citation<'b, 'g, 'r, 's>( let (remaining, prefix) = must_balance_bracket(opt(parser_with_context!(global_prefix)(context)))(remaining)?; + let contents_begin = remaining; let (remaining, references) = separated_list1(tag(";"), parser_with_context!(citation_reference)(context))(remaining)?; + let contents_end = { + let (rem, _) = opt(tag(";"))(remaining)?; + rem + }; let (remaining, suffix) = must_balance_bracket(opt(map( tuple((tag(";"), parser_with_context!(global_suffix)(context))), |(_, suffix)| suffix, )))(remaining)?; let (remaining, _) = tag("]")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); + let contents = contents_begin.get_until(contents_end); Ok(( remaining, Citation { @@ -64,6 +70,8 @@ pub(crate) fn citation<'b, 'g, 'r, 's>( prefix: prefix.unwrap_or(Vec::new()), suffix: suffix.unwrap_or(Vec::new()), children: references, + contents: Into::<&str>::into(contents), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -211,7 +219,6 @@ mod tests { use crate::context::List; use crate::parser::element_parser::element; use crate::types::Element; - use crate::types::GetStandardProperties; use crate::types::StandardProperties; #[test] @@ -227,10 +234,7 @@ mod tests { _ => panic!("Should be a paragraph!"), }; assert_eq!(Into::<&str>::into(remaining), ""); - assert_eq!( - first_paragraph.get_standard_properties().get_source(), - "[cite:@foo]" - ); + assert_eq!(first_paragraph.get_source(), "[cite:@foo]"); assert_eq!(first_paragraph.children.len(), 1); match first_paragraph diff --git a/src/parser/clock.rs b/src/parser/clock.rs index e25dd39..e702640 100644 --- a/src/parser/clock.rs +++ b/src/parser/clock.rs @@ -40,7 +40,7 @@ pub(crate) fn clock<'b, 'g, 'r, 's>( let (remaining, (timestamp, duration)) = clock_timestamp(context, remaining)?; let (remaining, _) = tuple((space0, org_line_ending))(remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -54,6 +54,7 @@ pub(crate) fn clock<'b, 'g, 'r, 's>( } else { ClockStatus::Running }, + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -81,7 +82,7 @@ fn clock_timestamp<'b, 'g, 'r, 's>( |(timestamp, duration)| (timestamp, duration.map(Into::<&str>::into)), ), map( - parser_with_context!(inactive_timestamp)(context), + parser_with_context!(inactive_timestamp(true))(context), |timestamp| (timestamp, None), ), ))(input) diff --git a/src/parser/comment.rs b/src/parser/comment.rs index 7523037..80bf16a 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -46,7 +46,7 @@ pub(crate) fn comment<'b, 'g, 'r, 's>( let (remaining, mut remaining_lines) = many0(preceded(not(exit_matcher), comment_line_matcher))(remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); let mut value = Vec::with_capacity(remaining_lines.len() + 1); @@ -67,6 +67,7 @@ pub(crate) fn comment<'b, 'g, 'r, 's>( Comment { source: source.into(), value, + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/diary_sexp.rs b/src/parser/diary_sexp.rs index c58f0d3..5230e2f 100644 --- a/src/parser/diary_sexp.rs +++ b/src/parser/diary_sexp.rs @@ -31,7 +31,7 @@ where let (remaining, value) = recognize(tuple((tag("%%("), is_not("\r\n"))))(remaining)?; let (remaining, _eol) = org_line_ending(remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -43,6 +43,7 @@ where affiliated_keywords, ), value: Into::<&str>::into(value), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/document.rs b/src/parser/document.rs index 0f11d88..8c783cc 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -3,6 +3,7 @@ use std::path::Path; use nom::combinator::all_consuming; use nom::combinator::opt; use nom::multi::many0; +use nom::InputTake; use super::headline::heading; use super::in_buffer_settings::apply_in_buffer_settings; @@ -181,8 +182,10 @@ fn _document<'b, 'g, 'r, 's>( let zeroth_section_matcher = parser_with_context!(zeroth_section)(context); let heading_matcher = parser_with_context!(heading(0))(context); let (remaining, _blank_lines) = many0(blank_line)(input)?; + let contents_begin = remaining; let (remaining, zeroth_section) = opt(zeroth_section_matcher)(remaining)?; let (remaining, children) = many0(heading_matcher)(remaining)?; + let contents = get_consumed(contents_begin, remaining); let source = get_consumed(input, remaining); Ok(( remaining, @@ -192,6 +195,11 @@ fn _document<'b, 'g, 'r, 's>( path: None, zeroth_section, children, + contents: if contents.len() > 0 { + Into::<&str>::into(contents) + } else { + Into::<&str>::into(remaining.take(0)) + }, }, )) } diff --git a/src/parser/drawer.rs b/src/parser/drawer.rs index ecbc618..7afd553 100644 --- a/src/parser/drawer.rs +++ b/src/parser/drawer.rs @@ -4,6 +4,7 @@ use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while; use nom::character::complete::line_ending; use nom::character::complete::space0; +use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::recognize; @@ -12,7 +13,9 @@ use nom::sequence::tuple; use super::affiliated_keyword::parse_affiliated_keywords; use super::org_source::OrgSource; +use super::paragraph::empty_paragraph; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; +use crate::context::bind_context; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ExitClass; @@ -21,7 +24,6 @@ use crate::context::RefContext; use crate::error::CustomError; use crate::error::Res; use crate::parser::element_parser::element; -use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::immediate_in_section; @@ -30,7 +32,6 @@ use crate::parser::util::WORD_CONSTITUENT_CHARACTERS; use crate::types::Drawer; use crate::types::Element; use crate::types::Keyword; -use crate::types::Paragraph; #[cfg_attr( feature = "tracing", @@ -70,29 +71,12 @@ where let parser_context = context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); let parser_context = parser_context.with_additional_node(&contexts[2]); + let (remaining, (contents, children)) = + consumed(parser_with_context!(children)(&parser_context))(remaining)?; - let element_matcher = parser_with_context!(element(true))(&parser_context); - let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, children) = match tuple(( - not(exit_matcher), - blank_line, - many_till(blank_line, exit_matcher), - ))(remaining) - { - Ok((remain, (_not_immediate_exit, first_line, (_trailing_whitespace, _exit_contents)))) => { - let source = get_consumed(remaining, remain); - let element = Element::Paragraph(Paragraph::of_text(source.into(), first_line.into())); - (remain, vec![element]) - } - Err(_) => { - let (remaining, (children, _exit_contents)) = - many_till(element_matcher, exit_matcher)(remaining)?; - (remaining, children) - } - }; let (remaining, _end) = drawer_end(&parser_context, remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -106,10 +90,34 @@ where ), drawer_name: drawer_name.into(), children, + contents: Some(contents.into()), + post_blank: post_blank.map(Into::<&str>::into), }, )) } +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +fn children<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Vec>> { + let element_matcher = parser_with_context!(element(true))(context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(context); + + if let Ok((remaining, (_not_exit, empty_para))) = + tuple((not(exit_matcher), bind_context!(empty_paragraph, context)))(input) + { + return Ok((remaining, vec![Element::Paragraph(empty_para)])); + } + + let (remaining, (children, _exit_contents)) = many_till(element_matcher, exit_matcher)(input)?; + + Ok((remaining, children)) +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn name<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { take_while(|c| WORD_CONSTITUENT_CHARACTERS.contains(c) || "-_".contains(c))(input) diff --git a/src/parser/dynamic_block.rs b/src/parser/dynamic_block.rs index e1eb68b..1f59f9d 100644 --- a/src/parser/dynamic_block.rs +++ b/src/parser/dynamic_block.rs @@ -6,20 +6,20 @@ use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; -use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; -use nom::multi::many0; use nom::multi::many_till; -use nom::sequence::preceded; use nom::sequence::tuple; use super::affiliated_keyword::parse_affiliated_keywords; +use super::greater_block::leading_blank_lines_end; use super::org_source::OrgSource; +use super::paragraph::empty_paragraph; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; +use crate::context::bind_context; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ExitClass; @@ -28,7 +28,6 @@ use crate::context::RefContext; use crate::error::CustomError; use crate::error::Res; use crate::parser::element_parser::element; -use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::immediate_in_section; @@ -36,7 +35,6 @@ use crate::parser::util::start_of_line; use crate::types::DynamicBlock; use crate::types::Element; use crate::types::Keyword; -use crate::types::Paragraph; #[cfg_attr( feature = "tracing", @@ -81,23 +79,25 @@ where let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); not(exit_matcher)(remaining)?; - let (remaining, leading_blank_lines) = opt(consumed(tuple(( - blank_line, - many0(preceded(not(exit_matcher), blank_line)), - ))))(remaining)?; - let leading_blank_lines = - leading_blank_lines.map(|(source, (first_line, _remaining_lines))| { - Element::Paragraph(Paragraph::of_text(source.into(), first_line.into())) - }); + let contents_begin = remaining; + let blank_line_context = ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Alpha, + exit_matcher: &leading_blank_lines_end, + }); + let blank_line_context = parser_context.with_additional_node(&blank_line_context); + + let (remaining, leading_blank_lines) = + opt(bind_context!(empty_paragraph, &blank_line_context))(remaining)?; let (remaining, (mut children, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; if let Some(lines) = leading_blank_lines { - children.insert(0, lines); + children.insert(0, Element::Paragraph(lines)); } + let contents = get_consumed(contents_begin, remaining); let (remaining, _end) = dynamic_block_end(&parser_context, remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -111,6 +111,12 @@ where block_name: name.into(), parameters: parameters.map(|val| val.into()), children, + contents: if contents.len() > 0 { + Some(Into::<&str>::into(contents)) + } else { + None + }, + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/entity.rs b/src/parser/entity.rs index 84c271b..94c91a2 100644 --- a/src/parser/entity.rs +++ b/src/parser/entity.rs @@ -28,7 +28,7 @@ pub(crate) fn entity<'b, 'g, 'r, 's>( let (remaining, _) = tag("\\")(input)?; let (remaining, (entity_definition, entity_name, use_brackets)) = name(context, remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -43,6 +43,7 @@ pub(crate) fn entity<'b, 'g, 'r, 's>( ascii: entity_definition.ascii, utf8: entity_definition.utf8, use_brackets, + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/export_snippet.rs b/src/parser/export_snippet.rs index c588f5f..bff0f37 100644 --- a/src/parser/export_snippet.rs +++ b/src/parser/export_snippet.rs @@ -39,7 +39,7 @@ pub(crate) fn export_snippet<'b, 'g, 'r, 's>( parser_with_context!(contents)(&parser_context), )))(remaining)?; let (remaining, _) = tag("@@")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -48,6 +48,7 @@ pub(crate) fn export_snippet<'b, 'g, 'r, 's>( source: source.into(), backend: backend_name.into(), contents: backend_contents.map(|(_colon, backend_contents)| backend_contents.into()), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/fixed_width_area.rs b/src/parser/fixed_width_area.rs index 73f4d15..bbd44b0 100644 --- a/src/parser/fixed_width_area.rs +++ b/src/parser/fixed_width_area.rs @@ -2,12 +2,15 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::anychar; use nom::character::complete::space0; +use nom::combinator::map; use nom::combinator::not; +use nom::combinator::peek; use nom::combinator::recognize; use nom::multi::many0; use nom::multi::many_till; use nom::sequence::preceded; use nom::sequence::tuple; +use nom::InputTake; use super::affiliated_keyword::parse_affiliated_keywords; use super::org_source::OrgSource; @@ -35,28 +38,25 @@ pub(crate) fn fixed_width_area<'b, 'g, 'r, 's, AK>( where AK: IntoIterator>, { - let fixed_width_area_line_matcher = parser_with_context!(fixed_width_area_line)(context); let exit_matcher = parser_with_context!(exit_matcher_parser)(context); - let (remaining, first_line) = fixed_width_area_line_matcher(remaining)?; - let (remaining, mut remaining_lines) = - many0(preceded(not(exit_matcher), fixed_width_area_line_matcher))(remaining)?; + let (remaining, first_line) = fixed_width_area_line(remaining)?; + let (remaining, remaining_lines) = many0(preceded( + not(tuple((org_line_ending, exit_matcher))), + map( + tuple((org_line_ending, fixed_width_area_line)), + |(_line_ending, line_contents)| line_contents, + ), + ))(remaining)?; - let (remaining, _trailing_ws) = + let post_blank_begin = remaining; + let (remaining, _first_line_break) = org_line_ending(remaining)?; + let (remaining, _additional_post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; + let post_blank = get_consumed(post_blank_begin, remaining); let source = get_consumed(input, remaining); let mut value = Vec::with_capacity(remaining_lines.len() + 1); - let last_line = remaining_lines.pop(); - if let Some(last_line) = last_line { - value.push(Into::<&str>::into(first_line)); - value.extend(remaining_lines.into_iter().map(Into::<&str>::into)); - let last_line = Into::<&str>::into(last_line); - // Trim the line ending from the final line. - value.push(&last_line[..(last_line.len() - 1)]) - } else { - // Trim the line ending from the only line. - let only_line = Into::<&str>::into(first_line); - value.push(&only_line[..(only_line.len() - 1)]) - } + value.push(Into::<&str>::into(first_line)); + value.extend(remaining_lines.into_iter().map(Into::<&str>::into)); Ok(( remaining, FixedWidthArea { @@ -66,25 +66,24 @@ where affiliated_keywords, ), value, + post_blank: if post_blank.len() > 0 { + Some(Into::<&str>::into(post_blank)) + } else { + None + }, }, )) } -#[cfg_attr( - feature = "tracing", - tracing::instrument(ret, level = "debug", skip(_context)) -)] -fn fixed_width_area_line<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn fixed_width_area_line<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { start_of_line(input)?; let (remaining, _) = tuple((space0, tag(":")))(input)?; - if let Ok((remaining, line_break)) = org_line_ending(remaining) { - return Ok((remaining, line_break)); + if let Ok((_remain, _line_break)) = org_line_ending(remaining) { + return Ok((remaining, remaining.take(0))); } let (remaining, _) = tag(" ")(remaining)?; - let (remaining, value) = recognize(many_till(anychar, org_line_ending))(remaining)?; + let (remaining, value) = recognize(many_till(anychar, peek(org_line_ending)))(remaining)?; Ok((remaining, value)) } diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index e263c17..2cf2d1e 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -75,6 +75,7 @@ where let parser_context = parser_context.with_additional_node(&contexts[2]); let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let before_contents = remaining; let (mut remaining, (mut children, _exit_contents)) = many_till(include_input(element_matcher), exit_matcher)(remaining)?; @@ -90,13 +91,16 @@ where } } - let (remaining, _trailing_ws) = + let contents = get_consumed(before_contents, remaining); + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( remaining, FootnoteDefinition { source: source.into(), + contents: Some(contents.into()), + post_blank: post_blank.map(Into::<&str>::into), affiliated_keywords: parse_affiliated_keywords( context.get_global_settings(), affiliated_keywords, @@ -160,7 +164,7 @@ mod tests { use crate::context::Context; use crate::context::GlobalSettings; use crate::context::List; - use crate::types::GetStandardProperties; + use crate::types::StandardProperties; #[test] fn two_paragraphs() { @@ -181,17 +185,13 @@ line footnote.", footnote_definition_matcher(remaining).expect("Parse second footnote_definition."); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!( - first_footnote_definition - .get_standard_properties() - .get_source(), + first_footnote_definition.get_source(), "[fn:1] A footnote. " ); assert_eq!( - second_footnote_definition - .get_standard_properties() - .get_source(), + second_footnote_definition.get_source(), "[fn:2] A multi- line footnote." @@ -216,9 +216,7 @@ not in the footnote.", footnote_definition_matcher(input).expect("Parse first footnote_definition"); assert_eq!(Into::<&str>::into(remaining), "not in the footnote."); assert_eq!( - first_footnote_definition - .get_standard_properties() - .get_source(), + first_footnote_definition.get_source(), "[fn:2] A multi- line footnote. diff --git a/src/parser/footnote_reference.rs b/src/parser/footnote_reference.rs index 2823ad7..581f6a8 100644 --- a/src/parser/footnote_reference.rs +++ b/src/parser/footnote_reference.rs @@ -2,6 +2,7 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::bytes::complete::tag_no_case; use nom::combinator::all_consuming; +use nom::combinator::consumed; use nom::combinator::map_parser; use nom::combinator::verify; use nom::multi::many1; @@ -59,7 +60,7 @@ fn anonymous_footnote<'b, 'g, 'r, 's>( let initial_context = ContextElement::document_context(); let initial_context = Context::new(context.get_global_settings(), List::new(&initial_context)); - let (remaining, children) = map_parser( + let (remaining, (contents, children)) = consumed(map_parser( verify( parser_with_context!(text_until_exit)(&parser_context), |text| text.len() > 0, @@ -69,17 +70,19 @@ fn anonymous_footnote<'b, 'g, 'r, 's>( &initial_context, )))(i) }), - )(remaining)?; + ))(remaining)?; let (remaining, _) = tag("]")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( remaining, FootnoteReference { source: source.into(), + contents: Some(contents.into()), + post_blank: post_blank.map(Into::<&str>::into), label: None, definition: children, }, @@ -106,7 +109,7 @@ fn inline_footnote<'b, 'g, 'r, 's>( let initial_context = ContextElement::document_context(); let initial_context = Context::new(context.get_global_settings(), List::new(&initial_context)); - let (remaining, children) = map_parser( + let (remaining, (contents, children)) = consumed(map_parser( verify( parser_with_context!(text_until_exit)(&parser_context), |text| text.len() > 0, @@ -116,17 +119,19 @@ fn inline_footnote<'b, 'g, 'r, 's>( &initial_context, )))(i) }), - )(remaining)?; + ))(remaining)?; let (remaining, _) = tag("]")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( remaining, FootnoteReference { source: source.into(), + contents: Some(contents.into()), + post_blank: post_blank.map(Into::<&str>::into), label: Some(label_contents.into()), definition: children, }, @@ -144,13 +149,15 @@ fn footnote_reference_only<'b, 'g, 'r, 's>( let (remaining, _) = tag_no_case("[fn:")(input)?; let (remaining, label_contents) = label(remaining)?; let (remaining, _) = tag("]")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( remaining, FootnoteReference { source: source.into(), + contents: None, + post_blank: post_blank.map(Into::<&str>::into), label: Some(label_contents.into()), definition: Vec::with_capacity(0), }, diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index c8b8d77..c1b52c9 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -5,22 +5,21 @@ use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; -use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; -use nom::multi::many0; use nom::multi::many_till; -use nom::sequence::preceded; use nom::sequence::tuple; use super::affiliated_keyword::parse_affiliated_keywords; use super::org_source::OrgSource; +use super::paragraph::empty_paragraph; use super::util::in_section; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; +use crate::context::bind_context; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ContextMatcher; @@ -37,7 +36,6 @@ use crate::parser::util::start_of_line; use crate::types::CenterBlock; use crate::types::Element; use crate::types::Keyword; -use crate::types::Paragraph; use crate::types::QuoteBlock; use crate::types::SpecialBlock; @@ -102,7 +100,7 @@ fn center_block<'b, 'g, 'r, 's, AK>( where AK: IntoIterator>, { - let (remaining, (source, children)) = greater_block_body( + let (remaining, body) = greater_block_body( context, input, pre_affiliated_keywords_input, @@ -112,12 +110,14 @@ where Ok(( remaining, Element::CenterBlock(CenterBlock { - source, + source: body.source, affiliated_keywords: parse_affiliated_keywords( context.get_global_settings(), affiliated_keywords, ), - children, + children: body.children, + contents: body.contents, + post_blank: body.post_blank, }), )) } @@ -135,7 +135,7 @@ fn quote_block<'b, 'g, 'r, 's, AK>( where AK: IntoIterator>, { - let (remaining, (source, children)) = greater_block_body( + let (remaining, body) = greater_block_body( context, input, pre_affiliated_keywords_input, @@ -145,12 +145,14 @@ where Ok(( remaining, Element::QuoteBlock(QuoteBlock { - source, + source: body.source, affiliated_keywords: parse_affiliated_keywords( context.get_global_settings(), affiliated_keywords, ), - children, + children: body.children, + contents: body.contents, + post_blank: body.post_blank, }), )) } @@ -196,7 +198,7 @@ where AK: IntoIterator>, { let (remaining, parameters) = opt(tuple((space1, parameters)))(input)?; - let (remaining, (source, children)) = greater_block_body( + let (remaining, body) = greater_block_body( context, remaining, pre_affiliated_keywords_input, @@ -206,18 +208,28 @@ where Ok(( remaining, Element::SpecialBlock(SpecialBlock { - source, + source: body.source, affiliated_keywords: parse_affiliated_keywords( context.get_global_settings(), affiliated_keywords, ), - children, + children: body.children, block_type: name, parameters: parameters.map(|(_, parameters)| Into::<&str>::into(parameters)), + contents: body.contents, + post_blank: body.post_blank, }), )) } +#[derive(Debug)] +struct GreaterBlockBody<'s> { + source: &'s str, + children: Vec>, + contents: Option<&'s str>, + post_blank: Option<&'s str>, +} + #[cfg_attr( feature = "tracing", tracing::instrument(ret, level = "debug", skip(context)) @@ -228,7 +240,7 @@ fn greater_block_body<'c, 'b, 'g, 'r, 's>( pre_affiliated_keywords_input: OrgSource<'s>, name: &'c str, context_name: &'c str, -) -> Res, (&'s str, Vec>)> { +) -> Res, GreaterBlockBody<'s>> { if in_section(context, context_name) { return Err(nom::Err::Error(CustomError::Static( "Cannot nest objects of the same element", @@ -250,28 +262,43 @@ fn greater_block_body<'c, 'b, 'g, 'r, 's>( let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); not(exit_matcher)(remaining)?; - let (remaining, leading_blank_lines) = opt(consumed(tuple(( - blank_line, - many0(preceded(not(exit_matcher), blank_line)), - ))))(remaining)?; - let leading_blank_lines = - leading_blank_lines.map(|(source, (first_line, _remaining_lines))| { - Element::Paragraph(Paragraph::of_text(source.into(), first_line.into())) - }); + let contents_begin = remaining; + + let blank_line_context = ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Alpha, + exit_matcher: &leading_blank_lines_end, + }); + let blank_line_context = parser_context.with_additional_node(&blank_line_context); + + let (remaining, leading_blank_lines) = + opt(bind_context!(empty_paragraph, &blank_line_context))(remaining)?; let (remaining, (mut children, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; if let Some(lines) = leading_blank_lines { - children.insert(0, lines); + children.insert(0, Element::Paragraph(lines)); } + let contents = get_consumed(contents_begin, remaining); let (remaining, _end) = exit_with_name(&parser_context, remaining)?; // Not checking if parent exit matcher is causing exit because the greater_block_end matcher asserts we matched a full greater block - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(pre_affiliated_keywords_input, remaining); - Ok((remaining, (Into::<&str>::into(source), children))) + Ok(( + remaining, + GreaterBlockBody { + source: Into::<&str>::into(source), + children, + contents: if contents.len() > 0 { + Some(Into::<&str>::into(contents)) + } else { + None + }, + post_blank: post_blank.map(Into::<&str>::into), + }, + )) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -307,3 +334,14 @@ fn _greater_block_end<'b, 'g, 'r, 's, 'c>( let source = get_consumed(input, remaining); Ok((remaining, source)) } + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(_context)) +)] +pub(crate) fn leading_blank_lines_end<'b, 'g, 'r, 's, 'c>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + recognize(not(blank_line))(input) +} diff --git a/src/parser/headline.rs b/src/parser/headline.rs index aca0fa3..a836f3c 100644 --- a/src/parser/headline.rs +++ b/src/parser/headline.rs @@ -66,6 +66,8 @@ fn _heading<'b, 'g, 'r, 's>( let (remaining, pre_headline) = headline(context, input, parent_star_count)?; let section_matcher = bind_context!(section, context); let heading_matcher = bind_context!(heading(pre_headline.star_count), context); + let (contents_begin, _) = opt(many0(blank_line))(remaining)?; + let maybe_post_blank = get_consumed(remaining, contents_begin); let (remaining, maybe_section) = opt(map(section_matcher, DocumentElement::Section))(remaining)?; let (remaining, _ws) = opt(tuple((start_of_line, many0(blank_line))))(remaining)?; @@ -82,7 +84,8 @@ fn _heading<'b, 'g, 'r, 's>( } children.insert(0, section); } - let remaining = if children.is_empty() { + let has_children = !children.is_empty(); + let remaining = if !has_children { // Support empty headings let (remain, _ws) = many0(blank_line)(remaining)?; remain @@ -91,6 +94,7 @@ fn _heading<'b, 'g, 'r, 's>( }; let is_archived = pre_headline.tags.contains(&"ARCHIVE"); + let contents = get_consumed(contents_begin, remaining); let source = get_consumed(input, remaining); Ok(( remaining, @@ -112,6 +116,16 @@ fn _heading<'b, 'g, 'r, 's>( scheduled, deadline, closed, + contents: if contents.len() > 0 { + Some(Into::<&str>::into(contents)) + } else { + None + }, + post_blank: if has_children { + None + } else { + Some(Into::<&str>::into(maybe_post_blank)) + }, }, )) } diff --git a/src/parser/horizontal_rule.rs b/src/parser/horizontal_rule.rs index eb1e015..f71e416 100644 --- a/src/parser/horizontal_rule.rs +++ b/src/parser/horizontal_rule.rs @@ -38,7 +38,7 @@ where space0, alt((line_ending, eof)), )))(remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -49,6 +49,7 @@ where context.get_global_settings(), affiliated_keywords, ), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/inline_babel_call.rs b/src/parser/inline_babel_call.rs index f059da0..7f67a71 100644 --- a/src/parser/inline_babel_call.rs +++ b/src/parser/inline_babel_call.rs @@ -38,7 +38,7 @@ pub(crate) fn inline_babel_call<'b, 'g, 'r, 's>( let (remaining, arguments) = argument(context, remaining)?; let (remaining, end_header) = opt(parser_with_context!(header)(context))(remaining)?; let value = get_consumed(input, remaining); - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -54,6 +54,7 @@ pub(crate) fn inline_babel_call<'b, 'g, 'r, 's>( None }, end_header: end_header.map(Into::<&str>::into), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/inline_source_block.rs b/src/parser/inline_source_block.rs index f0e22a7..11ce69b 100644 --- a/src/parser/inline_source_block.rs +++ b/src/parser/inline_source_block.rs @@ -38,7 +38,7 @@ pub(crate) fn inline_source_block<'b, 'g, 'r, 's>( let (remaining, language) = lang(context, remaining)?; let (remaining, parameters) = opt(parser_with_context!(header)(context))(remaining)?; let (remaining, value) = body(context, remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -48,6 +48,7 @@ pub(crate) fn inline_source_block<'b, 'g, 'r, 's>( language: language.into(), parameters: parameters.map(Into::<&str>::into), value: value.into(), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index 32db361..e2c67b7 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -58,6 +58,7 @@ fn _filtered_keyword<'s, F: Fn(OrgSource<'s>) -> Res, OrgSource<'s affiliated_keywords: AffiliatedKeywords::default(), // To be populated by the caller if this keyword is in a context to support affiliated keywords. key: parsed_key.into(), value: "", + post_blank: None, }, )); } @@ -71,6 +72,7 @@ fn _filtered_keyword<'s, F: Fn(OrgSource<'s>) -> Res, OrgSource<'s affiliated_keywords: AffiliatedKeywords::default(), // To be populated by the caller if this keyword is in a context to support affiliated keywords. key: parsed_key.into(), value: parsed_value.into(), + post_blank: None, }, )) } @@ -89,12 +91,13 @@ where AK: IntoIterator>, { let (remaining, mut kw) = filtered_keyword(regular_keyword_key)(remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); kw.affiliated_keywords = parse_affiliated_keywords(context.get_global_settings(), affiliated_keywords); kw.source = Into::<&str>::into(source); + kw.post_blank = post_blank.map(Into::<&str>::into); Ok((remaining, kw)) } diff --git a/src/parser/latex_environment.rs b/src/parser/latex_environment.rs index 6503a66..cfcc344 100644 --- a/src/parser/latex_environment.rs +++ b/src/parser/latex_environment.rs @@ -57,7 +57,7 @@ where let (remaining, _end) = latex_environment_end_specialized(&parser_context, remaining)?; let value_end = remaining; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); let value = get_consumed(value_start, value_end); @@ -70,6 +70,7 @@ where affiliated_keywords, ), value: value.into(), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/latex_fragment.rs b/src/parser/latex_fragment.rs index e4ade8a..413981d 100644 --- a/src/parser/latex_fragment.rs +++ b/src/parser/latex_fragment.rs @@ -39,7 +39,7 @@ pub(crate) fn latex_fragment<'b, 'g, 'r, 's>( parser_with_context!(bordered_dollar_fragment)(context), ))(input)?; let value = get_consumed(input, remaining); - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -47,6 +47,7 @@ pub(crate) fn latex_fragment<'b, 'g, 'r, 's>( LatexFragment { source: source.into(), value: value.into(), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/lesser_block.rs b/src/parser/lesser_block.rs index 1241cf2..9f06497 100644 --- a/src/parser/lesser_block.rs +++ b/src/parser/lesser_block.rs @@ -80,22 +80,28 @@ where let object_matcher = parser_with_context!(standard_set_object)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); // Check for a completely empty block - let (remaining, children) = match consumed(many_till(blank_line, exit_matcher))(remaining) { - Ok((remaining, (whitespace, (_children, _exit_contents)))) => ( - remaining, - vec![Object::PlainText(PlainText { - source: whitespace.into(), - })], - ), - Err(_) => { - let (remaining, (children, _exit_contents)) = - many_till(object_matcher, exit_matcher)(remaining)?; - (remaining, children) - } - }; + let (remaining, contents, children) = + match consumed(many_till(blank_line, exit_matcher))(remaining) { + Ok((remaining, (whitespace, (_children, _exit_contents)))) => ( + remaining, + whitespace, + if whitespace.len() > 0 { + vec![Object::PlainText(PlainText { + source: whitespace.into(), + })] + } else { + Vec::new() + }, + ), + Err(_) => { + let (remaining, (contents, (children, _exit_contents))) = + consumed(many_till(object_matcher, exit_matcher))(remaining)?; + (remaining, contents, children) + } + }; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -108,6 +114,8 @@ where ), data: parameters.map(Into::<&str>::into), children, + contents: Into::<&str>::into(contents), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -144,7 +152,7 @@ where let (remaining, contents) = parser_with_context!(text_until_exit)(&parser_context)(remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -156,6 +164,7 @@ where affiliated_keywords, ), contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -205,7 +214,7 @@ where let (remaining, contents) = text_until_exit(&parser_context, remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); let (switches, number_lines, preserve_indent, retain_labels, use_labels, label_format) = { @@ -236,7 +245,8 @@ where retain_labels, use_labels, label_format, - contents: Into::<&str>::into(contents), + value: Into::<&str>::into(contents), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -279,7 +289,7 @@ where let (remaining, contents) = text_until_exit(&parser_context, remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -292,7 +302,8 @@ where ), export_type: export_type.map(Into::<&str>::into), data: parameters.map(Into::<&str>::into), - contents: Into::<&str>::into(contents), + value: Into::<&str>::into(contents), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -334,7 +345,7 @@ where let (remaining, contents) = text_until_exit(&parser_context, remaining)?; let (remaining, _end) = lesser_block_end_specialized(&parser_context, remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); let (switches, number_lines, preserve_indent, retain_labels, use_labels, label_format) = { @@ -371,7 +382,8 @@ where retain_labels, use_labels, label_format, - contents: Into::<&str>::into(contents), + value: Into::<&str>::into(contents), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/org_macro.rs b/src/parser/org_macro.rs index 92bfbad..c9af1e9 100644 --- a/src/parser/org_macro.rs +++ b/src/parser/org_macro.rs @@ -32,7 +32,7 @@ pub(crate) fn org_macro<'b, 'g, 'r, 's>( let (remaining, macro_args) = opt(parser_with_context!(org_macro_args)(context))(remaining)?; let (remaining, _) = tag("}}}")(remaining)?; let macro_value = get_consumed(input, remaining); - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -47,6 +47,7 @@ pub(crate) fn org_macro<'b, 'g, 'r, 's>( .map(|arg| arg.into()) .collect(), value: Into::<&str>::into(macro_value), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index 077e54b..0223fef 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -1,5 +1,8 @@ use nom::branch::alt; +use nom::character::complete::space1; +use nom::combinator::consumed; use nom::combinator::eof; +use nom::combinator::opt; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many1; @@ -12,6 +15,7 @@ use super::org_source::OrgSource; use super::util::blank_line; use super::util::get_consumed; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; +use super::util::org_line_ending; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ExitClass; @@ -45,14 +49,14 @@ where let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, (children, _exit_contents)) = verify( + let (remaining, (contents, (children, _exit_contents))) = consumed(verify( many_till(standard_set_object_matcher, exit_matcher), |(children, _exit_contents)| !children.is_empty(), - )(remaining)?; + ))(remaining)?; // Not checking parent exit matcher because if there are any children matched then we have a valid paragraph. - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -60,6 +64,8 @@ where remaining, Paragraph { source: source.into(), + contents: Some(contents.into()), + post_blank: post_blank.map(Into::<&str>::into), affiliated_keywords: parse_affiliated_keywords( context.get_global_settings(), affiliated_keywords, @@ -69,6 +75,57 @@ where )) } +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn empty_paragraph<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Paragraph<'s>> { + // If it is just a single newline then source, contents, and post-blank are "\n". + // If it has multiple newlines then contents is the first "\n" and post-blank is all the new lines. + // If there are any spaces on the first line then post-blank excludes the first line. + + let exit_matcher = parser_with_context!(exit_matcher_parser)(context); + + let (remaining, first_line_with_spaces) = + opt(recognize(tuple((space1, org_line_ending))))(input)?; + + let post_blank_begin = remaining; + + if let Some(first_line_with_spaces) = first_line_with_spaces { + let (remaining, _additional_lines) = + recognize(many_till(blank_line, exit_matcher))(remaining)?; + let post_blank = get_consumed(post_blank_begin, remaining); + let source = get_consumed(input, remaining); + Ok(( + remaining, + Paragraph::of_text( + Into::<&str>::into(source), + Into::<&str>::into(first_line_with_spaces), + Some(Into::<&str>::into(first_line_with_spaces)), + Some(Into::<&str>::into(post_blank)), + ), + )) + } else { + let (remaining, first_line) = blank_line(remaining)?; + let (remaining, _additional_lines) = + recognize(many_till(blank_line, exit_matcher))(remaining)?; + let post_blank = get_consumed(post_blank_begin, remaining); + let source = get_consumed(input, remaining); + Ok(( + remaining, + Paragraph::of_text( + Into::<&str>::into(source), + Into::<&str>::into(first_line), + Some(Into::<&str>::into(first_line)), + Some(Into::<&str>::into(post_blank)), + ), + )) + } +} + #[cfg_attr( feature = "tracing", tracing::instrument(ret, level = "debug", skip(context)) @@ -96,7 +153,8 @@ mod tests { use crate::context::List; use crate::parser::element_parser::element; use crate::parser::org_source::OrgSource; - use crate::types::GetStandardProperties; + use crate::parser::paragraph::empty_paragraph; + use crate::types::StandardProperties; #[test] fn two_paragraphs() { @@ -109,13 +167,20 @@ mod tests { let (remaining, second_paragraph) = paragraph_matcher(remaining).expect("Parse second paragraph."); assert_eq!(Into::<&str>::into(remaining), ""); - assert_eq!( - first_paragraph.get_standard_properties().get_source(), - "foo bar baz\n\n" - ); - assert_eq!( - second_paragraph.get_standard_properties().get_source(), - "lorem ipsum" - ); + assert_eq!(first_paragraph.get_source(), "foo bar baz\n\n"); + assert_eq!(second_paragraph.get_source(), "lorem ipsum"); + } + + #[test] + fn paragraph_whitespace() { + let input = OrgSource::new("\n"); + let global_settings = GlobalSettings::default(); + let initial_context = ContextElement::document_context(); + let initial_context = Context::new(&global_settings, List::new(&initial_context)); + let paragraph_matcher = bind_context!(empty_paragraph, &initial_context); + let (remaining, paragraph) = paragraph_matcher(input).expect("Parse paragraph"); + assert_eq!(Into::<&str>::into(remaining), ""); + assert_eq!(paragraph.get_source(), "\n"); + assert_eq!(paragraph.get_contents(), Some("\n")); } } diff --git a/src/parser/plain_link.rs b/src/parser/plain_link.rs index cc4815d..339a094 100644 --- a/src/parser/plain_link.rs +++ b/src/parser/plain_link.rs @@ -54,7 +54,7 @@ pub(crate) fn plain_link<'b, 'g, 'r, 's>( let (remaining, _) = pre(context, input)?; let (remaining, path_plain) = parse_path_plain(context, remaining)?; peek(parser_with_context!(post)(context))(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -66,6 +66,7 @@ pub(crate) fn plain_link<'b, 'g, 'r, 's>( raw_link: path_plain.raw_link, search_option: path_plain.search_option, application: path_plain.application, + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 91aeceb..cf1eae1 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -7,6 +7,7 @@ use nom::character::complete::multispace1; use nom::character::complete::one_of; use nom::character::complete::space0; use nom::character::complete::space1; +use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; @@ -152,6 +153,7 @@ where let mut children = Vec::new(); let mut first_item_indentation: Option = None; let mut first_item_list_type: Option = None; + let contents_begin = remaining; let mut remaining = remaining; // The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here: @@ -195,7 +197,8 @@ where ))); } - let (remaining, _trailing_ws) = + let contents = get_consumed(contents_begin, remaining); + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -208,6 +211,8 @@ where ), list_type: first_item_list_type.expect("Plain lists require at least one element."), children: children.into_iter().map(|(_start, item)| item).collect(), + contents: Some(Into::<&str>::into(contents)), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -265,7 +270,7 @@ fn plain_list_item<'b, 'g, 'r, 's>( let maybe_contentless_item: Res, ()> = detect_contentless_item_contents(&parser_context, remaining); if let Ok((_rem, _ws)) = maybe_contentless_item { - let (remaining, _trailing_ws) = if tuple(( + let (remaining, post_blank) = if tuple(( blank_line, bind_context!(final_item_whitespace_cutoff, context), ))(remaining) @@ -291,6 +296,12 @@ fn plain_list_item<'b, 'g, 'r, 's>( .unwrap_or(Vec::new()), pre_blank: 0, children: Vec::new(), + contents: None, + post_blank: if post_blank.len() > 0 { + Some(Into::<&str>::into(post_blank)) + } else { + None + }, }, ), )); @@ -301,13 +312,13 @@ fn plain_list_item<'b, 'g, 'r, 's>( .filter(|b| *b == b'\n') .count(); - let (remaining, (children, _exit_contents)) = many_till( + let (remaining, (contents, (children, _exit_contents))) = consumed(many_till( include_input(bind_context!(element(true), &parser_context)), bind_context!(exit_matcher_parser, &parser_context), - )(remaining)?; + ))(remaining)?; // We have to use the parser_context here to include the whitespace cut-off - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(&final_whitespace_context, remaining)?; let source = get_consumed(input, remaining); @@ -329,6 +340,12 @@ fn plain_list_item<'b, 'g, 'r, 's>( pre_blank: PlainListItemPreBlank::try_from(pre_blank) .expect("pre-blank cannot be larger than 2."), children: children.into_iter().map(|(_start, item)| item).collect(), + contents: if contents.len() > 0 { + Some(contents.into()) + } else { + None + }, + post_blank: post_blank.map(Into::<&str>::into), }, ), )); @@ -629,7 +646,7 @@ mod tests { use crate::context::Context; use crate::context::GlobalSettings; use crate::context::List; - use crate::types::GetStandardProperties; + use crate::types::StandardProperties; #[test] fn plain_list_item_empty() { @@ -640,7 +657,7 @@ mod tests { let plain_list_item_matcher = bind_context!(plain_list_item, &initial_context); let (remaining, (_, result)) = plain_list_item_matcher(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); - assert_eq!(result.get_standard_properties().get_source(), "1."); + assert_eq!(result.get_source(), "1."); } #[test] @@ -652,7 +669,7 @@ mod tests { let plain_list_item_matcher = bind_context!(plain_list_item, &initial_context); let (remaining, (_, result)) = plain_list_item_matcher(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); - assert_eq!(result.get_standard_properties().get_source(), "1. foo"); + assert_eq!(result.get_source(), "1. foo"); } #[test] @@ -664,7 +681,7 @@ mod tests { let (remaining, result) = plain_list(std::iter::empty(), input, &initial_context, input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); - assert_eq!(result.get_standard_properties().get_source(), "1."); + assert_eq!(result.get_source(), "1."); } #[test] @@ -676,7 +693,7 @@ mod tests { let (remaining, result) = plain_list(std::iter::empty(), input, &initial_context, input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); - assert_eq!(result.get_standard_properties().get_source(), "1. foo"); + assert_eq!(result.get_source(), "1. foo"); } #[test] @@ -721,7 +738,7 @@ mod tests { plain_list_matcher(input).expect("Should parse the plain list successfully."); assert_eq!(Into::<&str>::into(remaining), " ipsum\n"); assert_eq!( - result.get_standard_properties().get_source(), + result.get_source(), r#"1. foo 2. bar baz @@ -749,7 +766,7 @@ baz"#, plain_list_matcher(input).expect("Should parse the plain list successfully."); assert_eq!(Into::<&str>::into(remaining), "baz"); assert_eq!( - result.get_standard_properties().get_source(), + result.get_source(), r#"1. foo 1. bar @@ -782,7 +799,7 @@ dolar"#, plain_list_matcher(input).expect("Should parse the plain list successfully."); assert_eq!(Into::<&str>::into(remaining), "dolar"); assert_eq!( - result.get_standard_properties().get_source(), + result.get_source(), r#"1. foo bar diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 2558182..24858bd 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -143,7 +143,7 @@ mod tests { use crate::context::GlobalSettings; use crate::context::List; use crate::parser::object_parser::detect_standard_set_object_sans_plain_text; - use crate::types::GetStandardProperties; + use crate::types::StandardProperties; #[test] fn plain_text_simple() { @@ -160,9 +160,6 @@ mod tests { )(input) .unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); - assert_eq!( - result.get_standard_properties().get_source(), - Into::<&str>::into(input) - ); + assert_eq!(result.get_source(), Into::<&str>::into(input)); } } diff --git a/src/parser/planning.rs b/src/parser/planning.rs index caaf151..804e070 100644 --- a/src/parser/planning.rs +++ b/src/parser/planning.rs @@ -33,7 +33,7 @@ pub(crate) fn planning<'b, 'g, 'r, 's>( many1(parser_with_context!(planning_parameter)(context))(remaining)?; let (remaining, _trailing_ws) = tuple((space0, org_line_ending))(remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -62,6 +62,7 @@ pub(crate) fn planning<'b, 'g, 'r, 's>( scheduled, deadline, closed, + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/property_drawer.rs b/src/parser/property_drawer.rs index 5cf4d02..4191f60 100644 --- a/src/parser/property_drawer.rs +++ b/src/parser/property_drawer.rs @@ -6,6 +6,7 @@ use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; +use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::opt; use nom::combinator::recognize; @@ -64,14 +65,11 @@ pub(crate) fn property_drawer<'b, 'g, 'r, 's>( let parser_context = context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); let parser_context = parser_context.with_additional_node(&contexts[2]); - - let node_property_matcher = parser_with_context!(node_property)(&parser_context); - let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, (children, _exit_contents)) = - many_till(node_property_matcher, exit_matcher)(remaining)?; + let (remaining, (contents, children)) = + consumed(parser_with_context!(children)(&parser_context))(remaining)?; let (remaining, _end) = property_drawer_end(&parser_context, remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -80,10 +78,31 @@ pub(crate) fn property_drawer<'b, 'g, 'r, 's>( PropertyDrawer { source: source.into(), children, + contents: if contents.len() > 0 { + Some(contents.into()) + } else { + None + }, + post_blank: post_blank.map(Into::<&str>::into), }, )) } +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +fn children<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Vec>> { + let node_property_matcher = parser_with_context!(node_property)(context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(context); + let (remaining, (children, _exit_contents)) = + many_till(node_property_matcher, exit_matcher)(input)?; + Ok((remaining, children)) +} + #[cfg_attr( feature = "tracing", tracing::instrument(ret, level = "debug", skip(_context)) diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index dbe8dff..c1af7ee 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -39,7 +39,7 @@ pub(crate) fn radio_link<'b, 'g, 'r, 's>( let rematched_target = rematch_target(context, radio_target, input); if let Ok((remaining, rematched_target)) = rematched_target { let path = get_consumed(input, remaining); - let (remaining, _) = space0(remaining)?; + let (remaining, post_blank) = space0(remaining)?; let source = get_consumed(input, remaining); return Ok(( remaining, @@ -47,6 +47,11 @@ pub(crate) fn radio_link<'b, 'g, 'r, 's>( source: source.into(), children: rematched_target, path: path.into(), + post_blank: if post_blank.len() > 0 { + Some(Into::<&str>::into(post_blank)) + } else { + None + }, }, )); } @@ -134,7 +139,7 @@ pub(crate) fn radio_target<'b, 'g, 'r, 's>( ))(remaining)?; let (remaining, _closing) = tag(">>>")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -142,6 +147,7 @@ pub(crate) fn radio_target<'b, 'g, 'r, 's>( RadioTarget { source: source.into(), value: raw_value.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }, )) @@ -175,8 +181,8 @@ mod tests { use crate::parser::element_parser::element; use crate::types::Bold; use crate::types::Element; - use crate::types::GetStandardProperties; use crate::types::PlainText; + use crate::types::StandardProperties; #[test] fn plain_text_radio_target() -> Result<(), Box> { @@ -195,10 +201,7 @@ mod tests { _ => panic!("Should be a paragraph!"), }; assert_eq!(Into::<&str>::into(remaining), ""); - assert_eq!( - first_paragraph.get_standard_properties().get_source(), - "foo bar baz" - ); + assert_eq!(first_paragraph.get_source(), "foo bar baz"); assert_eq!(first_paragraph.children.len(), 3); match first_paragraph .children @@ -206,7 +209,7 @@ mod tests { .expect("Len already asserted to be 3.") { Object::RadioLink(inner) => { - assert_eq!(inner.get_standard_properties().get_source(), "bar "); + assert_eq!(inner.get_source(), "bar "); assert_eq!(inner.path, "bar"); assert_eq!(inner.children.len(), 1); let child = inner @@ -214,7 +217,7 @@ mod tests { .first() .expect("Length already asserted to be 1."); assert!(matches!(child, Object::PlainText(_))); - assert_eq!(child.get_standard_properties().get_source(), "bar"); + assert_eq!(child.get_source(), "bar"); } _ => { return Err("Child should be a radio link.".into()); @@ -228,6 +231,8 @@ mod tests { let input = OrgSource::new("foo *bar* baz"); let radio_target_match = vec![Object::Bold(Bold { source: "*bar*", + contents: "bar", + post_blank: Some(" "), children: vec![Object::PlainText(PlainText { source: "bar" })], })]; let global_settings = GlobalSettings { @@ -244,10 +249,7 @@ mod tests { _ => panic!("Should be a paragraph!"), }; assert_eq!(Into::<&str>::into(remaining), ""); - assert_eq!( - first_paragraph.get_standard_properties().get_source(), - "foo *bar* baz" - ); + assert_eq!(first_paragraph.get_source(), "foo *bar* baz"); assert_eq!(first_paragraph.children.len(), 3); match first_paragraph .children @@ -255,7 +257,7 @@ mod tests { .expect("Len already asserted to be 3.") { Object::RadioLink(inner) => { - assert_eq!(inner.get_standard_properties().get_source(), "*bar* "); + assert_eq!(inner.get_source(), "*bar* "); assert_eq!(inner.path, "*bar* "); assert_eq!(inner.children.len(), 1); let child = inner @@ -263,7 +265,7 @@ mod tests { .first() .expect("Length already asserted to be 1."); assert!(matches!(child, Object::Bold(_))); - assert_eq!(child.get_standard_properties().get_source(), "*bar* "); + assert_eq!(child.get_source(), "*bar* "); } _ => { return Err("Child should be a radio link.".into()); diff --git a/src/parser/regular_link.rs b/src/parser/regular_link.rs index 0097438..75216ae 100644 --- a/src/parser/regular_link.rs +++ b/src/parser/regular_link.rs @@ -73,7 +73,7 @@ fn regular_link_without_description<'b, 'g, 'r, 's>( let (remaining, _opening_bracket) = tag("[[")(input)?; let (remaining, path) = pathreg(context, remaining)?; let (remaining, _closing_bracket) = tag("]]")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -84,6 +84,8 @@ fn regular_link_without_description<'b, 'g, 'r, 's>( path: path.path, raw_link: path.raw_link, search_option: path.search_option, + contents: None, + post_blank: post_blank.map(Into::<&str>::into), children: Vec::new(), application: path.application, }, @@ -101,9 +103,10 @@ fn regular_link_with_description<'b, 'g, 'r, 's>( let (remaining, _opening_bracket) = tag("[[")(input)?; let (remaining, path) = pathreg(context, remaining)?; let (remaining, _closing_bracket) = tag("][")(remaining)?; - let (remaining, description) = description(context, remaining)?; + let (remaining, (contents, description)) = + consumed(parser_with_context!(description)(context))(remaining)?; let (remaining, _closing_bracket) = tag("]]")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -114,6 +117,8 @@ fn regular_link_with_description<'b, 'g, 'r, 's>( path: path.path, raw_link: path.raw_link, search_option: path.search_option, + contents: Some(Into::<&str>::into(contents)), + post_blank: post_blank.map(Into::<&str>::into), children: description, application: path.application, }, diff --git a/src/parser/section.rs b/src/parser/section.rs index 4c87545..487281c 100644 --- a/src/parser/section.rs +++ b/src/parser/section.rs @@ -72,7 +72,7 @@ pub(crate) fn zeroth_section<'b, 'g, 'r, 's>( } } - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -80,6 +80,7 @@ pub(crate) fn zeroth_section<'b, 'g, 'r, 's>( remaining, Section { source: source.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }, )) @@ -128,7 +129,7 @@ pub(crate) fn section<'b, 'g, 'r, 's>( children.insert(0, ele) } - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -136,6 +137,7 @@ pub(crate) fn section<'b, 'g, 'r, 's>( remaining, Section { source: source.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }, )) diff --git a/src/parser/statistics_cookie.rs b/src/parser/statistics_cookie.rs index af919d1..3d0fd22 100644 --- a/src/parser/statistics_cookie.rs +++ b/src/parser/statistics_cookie.rs @@ -40,7 +40,7 @@ fn percent_statistics_cookie<'b, 'g, 'r, 's>( tag("%]"), )))(input)?; let value = get_consumed(input, remaining); - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -48,6 +48,7 @@ fn percent_statistics_cookie<'b, 'g, 'r, 's>( StatisticsCookie { source: source.into(), value: value.into(), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -68,7 +69,7 @@ fn fraction_statistics_cookie<'b, 'g, 'r, 's>( tag("]"), )))(input)?; let value = get_consumed(input, remaining); - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -76,6 +77,7 @@ fn fraction_statistics_cookie<'b, 'g, 'r, 's>( StatisticsCookie { source: source.into(), value: value.into(), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index ac627d1..369881f 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -3,6 +3,7 @@ use nom::bytes::complete::tag; use nom::bytes::complete::take_while; use nom::character::complete::anychar; use nom::character::complete::one_of; +use nom::combinator::consumed; use nom::combinator::map; use nom::combinator::not; use nom::combinator::opt; @@ -54,17 +55,20 @@ pub(crate) fn subscript<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Subscript<'s>> { - // We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. let (remaining, _) = tag("_")(input)?; pre(input)?; let (remaining, body) = script_body(context, remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); - let (use_brackets, body) = match body { - ScriptBody::Braceless(text) => (false, vec![Object::PlainText(PlainText { source: text })]), - ScriptBody::WithBraces(body) => (true, body), + let (use_brackets, contents, body) = match body { + ScriptBody::Braceless(text) => ( + false, + text, + vec![Object::PlainText(PlainText { source: text })], + ), + ScriptBody::WithBraces(contents, body) => (true, contents, body), }; Ok(( @@ -72,6 +76,8 @@ pub(crate) fn subscript<'b, 'g, 'r, 's>( Subscript { source: source.into(), use_brackets, + contents, + post_blank: post_blank.map(Into::<&str>::into), children: body, }, )) @@ -89,13 +95,17 @@ pub(crate) fn superscript<'b, 'g, 'r, 's>( let (remaining, _) = tag("^")(input)?; pre(input)?; let (remaining, body) = script_body(context, remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); - let (use_brackets, body) = match body { - ScriptBody::Braceless(text) => (false, vec![Object::PlainText(PlainText { source: text })]), - ScriptBody::WithBraces(body) => (true, body), + let (use_brackets, contents, body) = match body { + ScriptBody::Braceless(text) => ( + false, + text, + vec![Object::PlainText(PlainText { source: text })], + ), + ScriptBody::WithBraces(contents, body) => (true, contents, body), }; Ok(( @@ -103,6 +113,8 @@ pub(crate) fn superscript<'b, 'g, 'r, 's>( Superscript { source: source.into(), use_brackets, + contents, + post_blank: post_blank.map(Into::<&str>::into), children: body, }, )) @@ -117,7 +129,7 @@ fn pre<'s>(input: OrgSource<'s>) -> Res, ()> { #[derive(Debug)] enum ScriptBody<'s> { Braceless(&'s str), - WithBraces(Vec>), + WithBraces(&'s str, Vec>), } #[cfg_attr( @@ -135,9 +147,10 @@ fn script_body<'b, 'g, 'r, 's>( map(parser_with_context!(script_alphanum)(context), |body| { ScriptBody::Braceless(body.into()) }), - map(parser_with_context!(script_with_braces)(context), |body| { - ScriptBody::WithBraces(body) - }), + map( + parser_with_context!(script_with_braces)(context), + |(contents, body)| ScriptBody::WithBraces(Into::<&str>::into(contents), body), + ), map( parser_with_context!(script_with_parenthesis)(context), |body| ScriptBody::Braceless(body.into()), @@ -195,7 +208,7 @@ fn end_script_alphanum_character<'s>(input: OrgSource<'s>) -> Res, fn script_with_braces<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, -) -> Res, Vec>> { +) -> Res, (OrgSource<'s>, Vec>)> { let (remaining, _) = tag("{")(input)?; let exit_with_depth = script_with_braces_end(remaining.get_brace_depth()); let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { @@ -204,13 +217,13 @@ fn script_with_braces<'b, 'g, 'r, 's>( }); let parser_context = context.with_additional_node(&parser_context); - let (remaining, (children, _exit_contents)) = many_till( + let (remaining, (contents, (children, _exit_contents))) = consumed(many_till( parser_with_context!(standard_set_object)(&parser_context), parser_with_context!(exit_matcher_parser)(&parser_context), - )(remaining)?; + ))(remaining)?; let (remaining, _) = tag("}")(remaining)?; - Ok((remaining, children)) + Ok((remaining, (contents, children))) } fn script_with_braces_end(starting_brace_depth: BracketDepth) -> impl ContextMatcher { diff --git a/src/parser/table.rs b/src/parser/table.rs index 775b4c6..0d294e7 100644 --- a/src/parser/table.rs +++ b/src/parser/table.rs @@ -3,6 +3,7 @@ use nom::bytes::complete::is_not; use nom::bytes::complete::tag; use nom::character::complete::line_ending; use nom::character::complete::space0; +use nom::combinator::consumed; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; @@ -67,13 +68,13 @@ where let org_mode_table_row_matcher = parser_with_context!(org_mode_table_row)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, (children, _exit_contents)) = - many_till(org_mode_table_row_matcher, exit_matcher)(remaining)?; + let (remaining, (contents, (children, _exit_contents))) = + consumed(many_till(org_mode_table_row_matcher, exit_matcher))(remaining)?; let (remaining, formulas) = many0(parser_with_context!(table_formula_keyword)(context))(remaining)?; - let (remaining, _trailing_ws) = + let (remaining, post_blank) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -87,6 +88,8 @@ where ), formulas, children, + contents: Into::<&str>::into(contents), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -150,6 +153,7 @@ fn org_mode_table_row_rule<'b, 'g, 'r, 's>( TableRow { source: source.into(), children: Vec::new(), + contents: None, }, )) } @@ -164,8 +168,8 @@ fn org_mode_table_row_regular<'b, 'g, 'r, 's>( ) -> Res, TableRow<'s>> { start_of_line(input)?; let (remaining, _) = tuple((space0, tag("|")))(input)?; - let (remaining, children) = - many1(parser_with_context!(org_mode_table_cell)(context))(remaining)?; + let (remaining, (contents, children)) = + consumed(many1(parser_with_context!(org_mode_table_cell)(context)))(remaining)?; let (remaining, _tail) = recognize(tuple((space0, org_line_ending)))(remaining)?; let source = get_consumed(input, remaining); Ok(( @@ -173,6 +177,11 @@ fn org_mode_table_row_regular<'b, 'g, 'r, 's>( TableRow { source: source.into(), children, + contents: if contents.len() > 0 { + Some(Into::<&str>::into(contents)) + } else { + None + }, }, )) } @@ -194,12 +203,12 @@ fn org_mode_table_cell<'b, 'g, 'r, 's>( parser_with_context!(table_cell_set_object)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let (remaining, _) = space0(input)?; - let (remaining, (children, _exit_contents)) = verify( + let (remaining, (contents, (children, _exit_contents))) = consumed(verify( many_till(table_cell_set_object_matcher, exit_matcher), |(children, exit_contents)| { !children.is_empty() || Into::<&str>::into(exit_contents).ends_with('|') }, - )(remaining)?; + ))(remaining)?; let (remaining, _tail) = org_mode_table_cell_end(&parser_context, remaining)?; @@ -210,6 +219,7 @@ fn org_mode_table_cell<'b, 'g, 'r, 's>( TableCell { source: source.into(), children, + contents: Into::<&str>::into(contents), }, )) } diff --git a/src/parser/target.rs b/src/parser/target.rs index d91fa2f..10dde09 100644 --- a/src/parser/target.rs +++ b/src/parser/target.rs @@ -46,7 +46,7 @@ pub(crate) fn target<'b, 'g, 'r, 's>( ))); } let (remaining, _) = tag(">>")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -55,6 +55,7 @@ pub(crate) fn target<'b, 'g, 'r, 's>( Target { source: source.into(), value: body.into(), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index cdb1763..c8195b4 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -3,11 +3,13 @@ use nom::bytes::complete::tag; use nom::character::complete::anychar; use nom::character::complete::multispace1; use nom::character::complete::one_of; -use nom::character::complete::space0; +use nom::character::complete::space1; use nom::combinator::all_consuming; +use nom::combinator::consumed; use nom::combinator::map; use nom::combinator::map_parser; use nom::combinator::not; +use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; @@ -76,12 +78,14 @@ fn bold<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Bold<'s>> { - let (remaining, children) = text_markup_object("*")(context, input)?; + let (remaining, (contents, children, post_blank)) = text_markup_object("*")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, Bold { source: source.into(), + contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }, )) @@ -95,12 +99,14 @@ fn italic<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Italic<'s>> { - let (remaining, children) = text_markup_object("/")(context, input)?; + let (remaining, (contents, children, post_blank)) = text_markup_object("/")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, Italic { source: source.into(), + contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }, )) @@ -114,12 +120,14 @@ fn underline<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Underline<'s>> { - let (remaining, children) = text_markup_object("_")(context, input)?; + let (remaining, (contents, children, post_blank)) = text_markup_object("_")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, Underline { source: source.into(), + contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }, )) @@ -133,12 +141,14 @@ fn strike_through<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, StrikeThrough<'s>> { - let (remaining, children) = text_markup_object("+")(context, input)?; + let (remaining, (contents, children, post_blank)) = text_markup_object("+")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, StrikeThrough { source: source.into(), + contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }, )) @@ -152,13 +162,14 @@ fn verbatim<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Verbatim<'s>> { - let (remaining, contents) = text_markup_string("=")(context, input)?; + let (remaining, (contents, post_blank)) = text_markup_string("=")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, Verbatim { source: source.into(), contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -171,13 +182,14 @@ fn code<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Code<'s>> { - let (remaining, contents) = text_markup_string("~")(context, input)?; + let (remaining, (contents, post_blank)) = text_markup_string("~")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, Code { source: source.into(), contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -187,8 +199,10 @@ fn text_markup_object( ) -> impl for<'b, 'g, 'r, 's> Fn( RefContext<'b, 'g, 'r, 's>, OrgSource<'s>, -) -> Res, Vec>> - + '_ { +) -> Res< + OrgSource<'s>, + (OrgSource<'s>, Vec>, Option>), +> + '_ { move |context, input: OrgSource<'_>| _text_markup_object(context, input, marker_symbol) } @@ -200,7 +214,7 @@ fn _text_markup_object<'b, 'g, 'r, 's, 'c>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, marker_symbol: &'c str, -) -> Res, Vec>> { +) -> Res, (OrgSource<'s>, Vec>, Option>)> { let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; let (remaining, _peek_not_whitespace) = @@ -215,7 +229,7 @@ fn _text_markup_object<'b, 'g, 'r, 's, 'c>( let initial_context = ContextElement::document_context(); let initial_context = Context::new(context.get_global_settings(), List::new(&initial_context)); - let (remaining, children) = map_parser( + let (remaining, (contents, children)) = consumed(map_parser( verify( parser_with_context!(text_until_exit)(&parser_context), |text| text.len() > 0, @@ -225,7 +239,7 @@ fn _text_markup_object<'b, 'g, 'r, 's, 'c>( &initial_context, )))(i) }), - )(remaining)?; + ))(remaining)?; { #[cfg(feature = "tracing")] @@ -240,9 +254,9 @@ fn _text_markup_object<'b, 'g, 'r, 's, 'c>( } let (remaining, _close) = text_markup_end_specialized(context, remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; - Ok((remaining, children)) + Ok((remaining, (contents, children, post_blank))) } fn text_markup_string( @@ -250,7 +264,7 @@ fn text_markup_string( ) -> impl for<'b, 'g, 'r, 's> Fn( RefContext<'b, 'g, 'r, 's>, OrgSource<'s>, -) -> Res, OrgSource<'s>> +) -> Res, (OrgSource<'s>, Option>)> + '_ { move |context, input: OrgSource<'_>| _text_markup_string(context, input, marker_symbol) } @@ -263,7 +277,7 @@ fn _text_markup_string<'b, 'g, 'r, 's, 'c>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, marker_symbol: &'c str, -) -> Res, OrgSource<'s>> { +) -> Res, (OrgSource<'s>, Option>)> { let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; let (remaining, _peek_not_whitespace) = @@ -296,9 +310,9 @@ fn _text_markup_string<'b, 'g, 'r, 's, 'c>( } let (remaining, _close) = text_markup_end_specialized(context, remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; - Ok((remaining, contents)) + Ok((remaining, (contents, post_blank))) } #[cfg_attr( @@ -382,13 +396,15 @@ impl<'x> RematchObject<'x> for Bold<'x> { _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Object<'s>> { - let (remaining, children) = + let (remaining, (contents, children, post_blank)) = _rematch_text_markup_object(_context, input, "*", &self.children)?; let source = get_consumed(input, remaining); Ok(( remaining, Object::Bold(Bold { source: source.into(), + contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }), )) @@ -405,13 +421,15 @@ impl<'x> RematchObject<'x> for Italic<'x> { _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Object<'s>> { - let (remaining, children) = + let (remaining, (contents, children, post_blank)) = _rematch_text_markup_object(_context, input, "/", &self.children)?; let source = get_consumed(input, remaining); Ok(( remaining, Object::Italic(Italic { source: source.into(), + contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }), )) @@ -428,13 +446,15 @@ impl<'x> RematchObject<'x> for Underline<'x> { _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Object<'s>> { - let (remaining, children) = + let (remaining, (contents, children, post_blank)) = _rematch_text_markup_object(_context, input, "_", &self.children)?; let source = get_consumed(input, remaining); Ok(( remaining, Object::Underline(Underline { source: source.into(), + contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }), )) @@ -451,13 +471,15 @@ impl<'x> RematchObject<'x> for StrikeThrough<'x> { _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Object<'s>> { - let (remaining, children) = + let (remaining, (contents, children, post_blank)) = _rematch_text_markup_object(_context, input, "+", &self.children)?; let source = get_consumed(input, remaining); Ok(( remaining, Object::StrikeThrough(StrikeThrough { source: source.into(), + contents: contents.into(), + post_blank: post_blank.map(Into::<&str>::into), children, }), )) @@ -473,7 +495,7 @@ fn _rematch_text_markup_object<'b, 'g, 'r, 's, 'x>( input: OrgSource<'s>, marker_symbol: &'static str, original_match_children: &'x Vec>, -) -> Res, Vec>> { +) -> Res, (OrgSource<'s>, Vec>, Option>)> { let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; let (remaining, _peek_not_whitespace) = peek(not(multispace1))(remaining)?; @@ -484,6 +506,7 @@ fn _rematch_text_markup_object<'b, 'g, 'r, 's, 'x>( }); let parser_context = context.with_additional_node(&parser_context); + let contents_begin = remaining; let (remaining, children) = // TODO: This doesn't really check the exit matcher between each object. I think it may be possible to construct an org document that parses incorrectly with the current code. rematch_target(&parser_context, original_match_children, remaining)?; @@ -499,8 +522,10 @@ fn _rematch_text_markup_object<'b, 'g, 'r, 's, 'x>( ))); } } + let contents_end = remaining; + let contents = contents_begin.get_until(contents_end); let (remaining, _close) = text_markup_end_specialized(context, remaining)?; - let (remaining, _trailing_whitespace) = space0(remaining)?; - Ok((remaining, children)) + let (remaining, post_blank) = opt(space1)(remaining)?; + Ok((remaining, (contents, children, post_blank))) } diff --git a/src/parser/timestamp.rs b/src/parser/timestamp.rs index 926e969..1342c25 100644 --- a/src/parser/timestamp.rs +++ b/src/parser/timestamp.rs @@ -53,8 +53,8 @@ pub(crate) fn timestamp<'b, 'g, 'r, 's>( parser_with_context!(inactive_time_range_timestamp)(context), parser_with_context!(active_date_range_timestamp)(context), parser_with_context!(inactive_date_range_timestamp)(context), - parser_with_context!(active_timestamp)(context), - parser_with_context!(inactive_timestamp)(context), + parser_with_context!(active_timestamp(true))(context), + parser_with_context!(inactive_timestamp(true))(context), ))(input) } @@ -69,7 +69,7 @@ fn diary_timestamp<'b, 'g, 'r, 's>( let (remaining, _) = tag("<%%(")(input)?; let (remaining, _body) = sexp(context, remaining)?; let (remaining, _) = tag(")>")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -85,6 +85,7 @@ fn diary_timestamp<'b, 'g, 'r, 's>( end_time: None, repeater: None, warning_delay: None, + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -125,13 +126,23 @@ fn sexp_end<'b, 'g, 'r, 's>( alt((tag(")>"), recognize(one_of(">\n"))))(input) } +const fn active_timestamp( + allow_post_blank: bool, +) -> impl for<'b, 'g, 'r, 's> Fn( + RefContext<'b, 'g, 'r, 's>, + OrgSource<'s>, +) -> Res, Timestamp<'s>> { + move |context, input| impl_active_timestamp(context, input, allow_post_blank) +} + #[cfg_attr( feature = "tracing", tracing::instrument(ret, level = "debug", skip(context)) )] -fn active_timestamp<'b, 'g, 'r, 's>( +fn impl_active_timestamp<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, + allow_post_blank: bool, ) -> Res, Timestamp<'s>> { let (remaining, _) = tag("<")(input)?; let (remaining, start) = date(context, remaining)?; @@ -159,8 +170,11 @@ fn active_timestamp<'b, 'g, 'r, 's>( )))(remaining)?; let (remaining, _) = tag(">")(remaining)?; - let (remaining, _trailing_whitespace) = - maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; + let (remaining, post_blank) = if allow_post_blank { + maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)? + } else { + (remaining, None) + }; let source = get_consumed(input, remaining); Ok(( @@ -175,17 +189,28 @@ fn active_timestamp<'b, 'g, 'r, 's>( end_time: time.map(|(_, time)| time), repeater: repeater.map(|(_, repeater)| repeater), warning_delay: warning_delay.map(|(_, warning_delay)| warning_delay), + post_blank: post_blank.map(Into::<&str>::into), }, )) } +pub(crate) const fn inactive_timestamp( + allow_post_blank: bool, +) -> impl for<'b, 'g, 'r, 's> Fn( + RefContext<'b, 'g, 'r, 's>, + OrgSource<'s>, +) -> Res, Timestamp<'s>> { + move |context, input| impl_inactive_timestamp(context, input, allow_post_blank) +} + #[cfg_attr( feature = "tracing", tracing::instrument(ret, level = "debug", skip(context)) )] -pub(crate) fn inactive_timestamp<'b, 'g, 'r, 's>( +fn impl_inactive_timestamp<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, + allow_post_blank: bool, ) -> Res, Timestamp<'s>> { let (remaining, _) = tag("[")(input)?; let (remaining, start) = date(context, remaining)?; @@ -213,8 +238,11 @@ pub(crate) fn inactive_timestamp<'b, 'g, 'r, 's>( )))(remaining)?; let (remaining, _) = tag("]")(remaining)?; - let (remaining, _trailing_whitespace) = - maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; + let (remaining, post_blank) = if allow_post_blank { + maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)? + } else { + (remaining, None) + }; let source = get_consumed(input, remaining); Ok(( @@ -229,6 +257,7 @@ pub(crate) fn inactive_timestamp<'b, 'g, 'r, 's>( end_time: time.map(|(_, time)| time), repeater: repeater.map(|(_, repeater)| repeater), warning_delay: warning_delay.map(|(_, warning_delay)| warning_delay), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -241,12 +270,12 @@ fn active_date_range_timestamp<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Timestamp<'s>> { - let (remaining, first_timestamp) = active_timestamp(context, input)?; + let (remaining, first_timestamp) = impl_active_timestamp(context, input, false)?; // TODO: Does the space0 at the end of the active/inactive timestamp parsers cause this to be incorrect? I could use a look-behind to make sure the preceding character is not whitespace let (remaining, _separator) = tag("--")(remaining)?; - let (remaining, second_timestamp) = active_timestamp(context, remaining)?; + let (remaining, second_timestamp) = impl_active_timestamp(context, remaining, false)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -264,6 +293,7 @@ fn active_date_range_timestamp<'b, 'g, 'r, 's>( warning_delay: first_timestamp .warning_delay .or(second_timestamp.warning_delay), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -302,7 +332,7 @@ fn active_time_range_timestamp<'b, 'g, 'r, 's>( )))(remaining)?; let (remaining, _) = tag(">")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -318,6 +348,7 @@ fn active_time_range_timestamp<'b, 'g, 'r, 's>( end_time: Some(second_time), repeater: repeater.map(|(_, repeater)| repeater), warning_delay: warning_delay.map(|(_, warning_delay)| warning_delay), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -330,12 +361,12 @@ pub(crate) fn inactive_date_range_timestamp<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Timestamp<'s>> { - let (remaining, first_timestamp) = inactive_timestamp(context, input)?; + let (remaining, first_timestamp) = impl_inactive_timestamp(context, input, false)?; // TODO: Does the space0 at the end of the active/inactive timestamp parsers cause this to be incorrect? I could use a look-behind to make sure the preceding character is not whitespace let (remaining, _separator) = tag("--")(remaining)?; - let (remaining, second_timestamp) = inactive_timestamp(context, remaining)?; + let (remaining, second_timestamp) = impl_inactive_timestamp(context, remaining, false)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -354,6 +385,7 @@ pub(crate) fn inactive_date_range_timestamp<'b, 'g, 'r, 's>( warning_delay: first_timestamp .warning_delay .or(second_timestamp.warning_delay), + post_blank: post_blank.map(Into::<&str>::into), }, )) } @@ -392,7 +424,7 @@ pub(crate) fn inactive_time_range_timestamp<'b, 'g, 'r, 's>( )))(remaining)?; let (remaining, _) = tag("]")(remaining)?; - let (remaining, _trailing_whitespace) = + let (remaining, post_blank) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -408,6 +440,7 @@ pub(crate) fn inactive_time_range_timestamp<'b, 'g, 'r, 's>( end_time: Some(second_time), repeater: repeater.map(|(_, repeater)| repeater), warning_delay: warning_delay.map(|(_, warning_delay)| warning_delay), + post_blank: post_blank.map(Into::<&str>::into), }, )) } diff --git a/src/parser/util.rs b/src/parser/util.rs index 82f1250..29523f2 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -81,14 +81,21 @@ pub(crate) fn maybe_consume_object_trailing_whitespace_if_not_exiting<'b, 'g, 'r input: OrgSource<'s>, ) -> Res, Option>> { // We have to check exit matcher after each character because description list tags need to end with a space unconsumed (" ::"). - let (remaining, _) = many_till( + let (remaining, post_blank) = recognize(many_till( one_of(" \t"), alt(( peek(recognize(none_of(" \t"))), parser_with_context!(exit_matcher_parser)(context), )), - )(input)?; - Ok((remaining, None)) + ))(input)?; + Ok(( + remaining, + if post_blank.len() == 0 { + None + } else { + Some(post_blank) + }, + )) } #[cfg_attr( diff --git a/src/types/ast_node.rs b/src/types/ast_node.rs index c9a1729..e282b90 100644 --- a/src/types/ast_node.rs +++ b/src/types/ast_node.rs @@ -1,7 +1,9 @@ use super::macros::to_ast_node; use super::CenterBlock; +use super::PostBlank; use super::QuoteBlock; use super::SpecialBlock; +use super::StandardProperties; use crate::types::AngleLink; use crate::types::BabelCall; use crate::types::Bold; @@ -24,7 +26,6 @@ use crate::types::ExportSnippet; use crate::types::FixedWidthArea; use crate::types::FootnoteDefinition; use crate::types::FootnoteReference; -use crate::types::GetStandardProperties; use crate::types::Heading; use crate::types::HorizontalRule; use crate::types::InlineBabelCall; @@ -259,67 +260,193 @@ to_ast_node!(&'r Superscript<'s>, AstNode::Superscript); to_ast_node!(&'r TableCell<'s>, AstNode::TableCell); to_ast_node!(&'r Timestamp<'s>, AstNode::Timestamp); -impl<'r, 's> GetStandardProperties<'s> for AstNode<'r, 's> { - fn get_standard_properties<'b>(&'b self) -> &'b dyn crate::types::StandardProperties<'s> { +impl<'r, 's> StandardProperties<'s> for AstNode<'r, 's> { + fn get_source<'b>(&'b self) -> &'s str { match self { - AstNode::Document(inner) => *inner, - AstNode::Heading(inner) => *inner, - AstNode::Section(inner) => *inner, - AstNode::Paragraph(inner) => *inner, - AstNode::PlainList(inner) => *inner, - AstNode::PlainListItem(inner) => *inner, - AstNode::CenterBlock(inner) => *inner, - AstNode::QuoteBlock(inner) => *inner, - AstNode::SpecialBlock(inner) => *inner, - AstNode::DynamicBlock(inner) => *inner, - AstNode::FootnoteDefinition(inner) => *inner, - AstNode::Comment(inner) => *inner, - AstNode::Drawer(inner) => *inner, - AstNode::PropertyDrawer(inner) => *inner, - AstNode::NodeProperty(inner) => *inner, - AstNode::Table(inner) => *inner, - AstNode::TableRow(inner) => *inner, - AstNode::VerseBlock(inner) => *inner, - AstNode::CommentBlock(inner) => *inner, - AstNode::ExampleBlock(inner) => *inner, - AstNode::ExportBlock(inner) => *inner, - AstNode::SrcBlock(inner) => *inner, - AstNode::Clock(inner) => *inner, - AstNode::DiarySexp(inner) => *inner, - AstNode::Planning(inner) => *inner, - AstNode::FixedWidthArea(inner) => *inner, - AstNode::HorizontalRule(inner) => *inner, - AstNode::Keyword(inner) => *inner, - AstNode::BabelCall(inner) => *inner, - AstNode::LatexEnvironment(inner) => *inner, - AstNode::Bold(inner) => *inner, - AstNode::Italic(inner) => *inner, - AstNode::Underline(inner) => *inner, - AstNode::StrikeThrough(inner) => *inner, - AstNode::Code(inner) => *inner, - AstNode::Verbatim(inner) => *inner, - AstNode::PlainText(inner) => *inner, - AstNode::RegularLink(inner) => *inner, - AstNode::RadioLink(inner) => *inner, - AstNode::RadioTarget(inner) => *inner, - AstNode::PlainLink(inner) => *inner, - AstNode::AngleLink(inner) => *inner, - AstNode::OrgMacro(inner) => *inner, - AstNode::Entity(inner) => *inner, - AstNode::LatexFragment(inner) => *inner, - AstNode::ExportSnippet(inner) => *inner, - AstNode::FootnoteReference(inner) => *inner, - AstNode::Citation(inner) => *inner, - AstNode::CitationReference(inner) => *inner, - AstNode::InlineBabelCall(inner) => *inner, - AstNode::InlineSourceBlock(inner) => *inner, - AstNode::LineBreak(inner) => *inner, - AstNode::Target(inner) => *inner, - AstNode::StatisticsCookie(inner) => *inner, - AstNode::Subscript(inner) => *inner, - AstNode::Superscript(inner) => *inner, - AstNode::TableCell(inner) => *inner, - AstNode::Timestamp(inner) => *inner, + AstNode::Document(inner) => inner.get_source(), + AstNode::Heading(inner) => inner.get_source(), + AstNode::Section(inner) => inner.get_source(), + AstNode::Paragraph(inner) => inner.get_source(), + AstNode::PlainList(inner) => inner.get_source(), + AstNode::PlainListItem(inner) => inner.get_source(), + AstNode::CenterBlock(inner) => inner.get_source(), + AstNode::QuoteBlock(inner) => inner.get_source(), + AstNode::SpecialBlock(inner) => inner.get_source(), + AstNode::DynamicBlock(inner) => inner.get_source(), + AstNode::FootnoteDefinition(inner) => inner.get_source(), + AstNode::Comment(inner) => inner.get_source(), + AstNode::Drawer(inner) => inner.get_source(), + AstNode::PropertyDrawer(inner) => inner.get_source(), + AstNode::NodeProperty(inner) => inner.get_source(), + AstNode::Table(inner) => inner.get_source(), + AstNode::TableRow(inner) => inner.get_source(), + AstNode::VerseBlock(inner) => inner.get_source(), + AstNode::CommentBlock(inner) => inner.get_source(), + AstNode::ExampleBlock(inner) => inner.get_source(), + AstNode::ExportBlock(inner) => inner.get_source(), + AstNode::SrcBlock(inner) => inner.get_source(), + AstNode::Clock(inner) => inner.get_source(), + AstNode::DiarySexp(inner) => inner.get_source(), + AstNode::Planning(inner) => inner.get_source(), + AstNode::FixedWidthArea(inner) => inner.get_source(), + AstNode::HorizontalRule(inner) => inner.get_source(), + AstNode::Keyword(inner) => inner.get_source(), + AstNode::BabelCall(inner) => inner.get_source(), + AstNode::LatexEnvironment(inner) => inner.get_source(), + AstNode::Bold(inner) => inner.get_source(), + AstNode::Italic(inner) => inner.get_source(), + AstNode::Underline(inner) => inner.get_source(), + AstNode::StrikeThrough(inner) => inner.get_source(), + AstNode::Code(inner) => inner.get_source(), + AstNode::Verbatim(inner) => inner.get_source(), + AstNode::PlainText(inner) => inner.get_source(), + AstNode::RegularLink(inner) => inner.get_source(), + AstNode::RadioLink(inner) => inner.get_source(), + AstNode::RadioTarget(inner) => inner.get_source(), + AstNode::PlainLink(inner) => inner.get_source(), + AstNode::AngleLink(inner) => inner.get_source(), + AstNode::OrgMacro(inner) => inner.get_source(), + AstNode::Entity(inner) => inner.get_source(), + AstNode::LatexFragment(inner) => inner.get_source(), + AstNode::ExportSnippet(inner) => inner.get_source(), + AstNode::FootnoteReference(inner) => inner.get_source(), + AstNode::Citation(inner) => inner.get_source(), + AstNode::CitationReference(inner) => inner.get_source(), + AstNode::InlineBabelCall(inner) => inner.get_source(), + AstNode::InlineSourceBlock(inner) => inner.get_source(), + AstNode::LineBreak(inner) => inner.get_source(), + AstNode::Target(inner) => inner.get_source(), + AstNode::StatisticsCookie(inner) => inner.get_source(), + AstNode::Subscript(inner) => inner.get_source(), + AstNode::Superscript(inner) => inner.get_source(), + AstNode::TableCell(inner) => inner.get_source(), + AstNode::Timestamp(inner) => inner.get_source(), + } + } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + match self { + AstNode::Document(inner) => inner.get_contents(), + AstNode::Heading(inner) => inner.get_contents(), + AstNode::Section(inner) => inner.get_contents(), + AstNode::Paragraph(inner) => inner.get_contents(), + AstNode::PlainList(inner) => inner.get_contents(), + AstNode::PlainListItem(inner) => inner.get_contents(), + AstNode::CenterBlock(inner) => inner.get_contents(), + AstNode::QuoteBlock(inner) => inner.get_contents(), + AstNode::SpecialBlock(inner) => inner.get_contents(), + AstNode::DynamicBlock(inner) => inner.get_contents(), + AstNode::FootnoteDefinition(inner) => inner.get_contents(), + AstNode::Comment(inner) => inner.get_contents(), + AstNode::Drawer(inner) => inner.get_contents(), + AstNode::PropertyDrawer(inner) => inner.get_contents(), + AstNode::NodeProperty(inner) => inner.get_contents(), + AstNode::Table(inner) => inner.get_contents(), + AstNode::TableRow(inner) => inner.get_contents(), + AstNode::VerseBlock(inner) => inner.get_contents(), + AstNode::CommentBlock(inner) => inner.get_contents(), + AstNode::ExampleBlock(inner) => inner.get_contents(), + AstNode::ExportBlock(inner) => inner.get_contents(), + AstNode::SrcBlock(inner) => inner.get_contents(), + AstNode::Clock(inner) => inner.get_contents(), + AstNode::DiarySexp(inner) => inner.get_contents(), + AstNode::Planning(inner) => inner.get_contents(), + AstNode::FixedWidthArea(inner) => inner.get_contents(), + AstNode::HorizontalRule(inner) => inner.get_contents(), + AstNode::Keyword(inner) => inner.get_contents(), + AstNode::BabelCall(inner) => inner.get_contents(), + AstNode::LatexEnvironment(inner) => inner.get_contents(), + AstNode::Bold(inner) => inner.get_contents(), + AstNode::Italic(inner) => inner.get_contents(), + AstNode::Underline(inner) => inner.get_contents(), + AstNode::StrikeThrough(inner) => inner.get_contents(), + AstNode::Code(inner) => inner.get_contents(), + AstNode::Verbatim(inner) => inner.get_contents(), + AstNode::PlainText(inner) => inner.get_contents(), + AstNode::RegularLink(inner) => inner.get_contents(), + AstNode::RadioLink(inner) => inner.get_contents(), + AstNode::RadioTarget(inner) => inner.get_contents(), + AstNode::PlainLink(inner) => inner.get_contents(), + AstNode::AngleLink(inner) => inner.get_contents(), + AstNode::OrgMacro(inner) => inner.get_contents(), + AstNode::Entity(inner) => inner.get_contents(), + AstNode::LatexFragment(inner) => inner.get_contents(), + AstNode::ExportSnippet(inner) => inner.get_contents(), + AstNode::FootnoteReference(inner) => inner.get_contents(), + AstNode::Citation(inner) => inner.get_contents(), + AstNode::CitationReference(inner) => inner.get_contents(), + AstNode::InlineBabelCall(inner) => inner.get_contents(), + AstNode::InlineSourceBlock(inner) => inner.get_contents(), + AstNode::LineBreak(inner) => inner.get_contents(), + AstNode::Target(inner) => inner.get_contents(), + AstNode::StatisticsCookie(inner) => inner.get_contents(), + AstNode::Subscript(inner) => inner.get_contents(), + AstNode::Superscript(inner) => inner.get_contents(), + AstNode::TableCell(inner) => inner.get_contents(), + AstNode::Timestamp(inner) => inner.get_contents(), + } + } + + fn get_post_blank(&self) -> PostBlank { + match self { + AstNode::Document(inner) => inner.get_post_blank(), + AstNode::Heading(inner) => inner.get_post_blank(), + AstNode::Section(inner) => inner.get_post_blank(), + AstNode::Paragraph(inner) => inner.get_post_blank(), + AstNode::PlainList(inner) => inner.get_post_blank(), + AstNode::PlainListItem(inner) => inner.get_post_blank(), + AstNode::CenterBlock(inner) => inner.get_post_blank(), + AstNode::QuoteBlock(inner) => inner.get_post_blank(), + AstNode::SpecialBlock(inner) => inner.get_post_blank(), + AstNode::DynamicBlock(inner) => inner.get_post_blank(), + AstNode::FootnoteDefinition(inner) => inner.get_post_blank(), + AstNode::Comment(inner) => inner.get_post_blank(), + AstNode::Drawer(inner) => inner.get_post_blank(), + AstNode::PropertyDrawer(inner) => inner.get_post_blank(), + AstNode::NodeProperty(inner) => inner.get_post_blank(), + AstNode::Table(inner) => inner.get_post_blank(), + AstNode::TableRow(inner) => inner.get_post_blank(), + AstNode::VerseBlock(inner) => inner.get_post_blank(), + AstNode::CommentBlock(inner) => inner.get_post_blank(), + AstNode::ExampleBlock(inner) => inner.get_post_blank(), + AstNode::ExportBlock(inner) => inner.get_post_blank(), + AstNode::SrcBlock(inner) => inner.get_post_blank(), + AstNode::Clock(inner) => inner.get_post_blank(), + AstNode::DiarySexp(inner) => inner.get_post_blank(), + AstNode::Planning(inner) => inner.get_post_blank(), + AstNode::FixedWidthArea(inner) => inner.get_post_blank(), + AstNode::HorizontalRule(inner) => inner.get_post_blank(), + AstNode::Keyword(inner) => inner.get_post_blank(), + AstNode::BabelCall(inner) => inner.get_post_blank(), + AstNode::LatexEnvironment(inner) => inner.get_post_blank(), + AstNode::Bold(inner) => inner.get_post_blank(), + AstNode::Italic(inner) => inner.get_post_blank(), + AstNode::Underline(inner) => inner.get_post_blank(), + AstNode::StrikeThrough(inner) => inner.get_post_blank(), + AstNode::Code(inner) => inner.get_post_blank(), + AstNode::Verbatim(inner) => inner.get_post_blank(), + AstNode::PlainText(inner) => inner.get_post_blank(), + AstNode::RegularLink(inner) => inner.get_post_blank(), + AstNode::RadioLink(inner) => inner.get_post_blank(), + AstNode::RadioTarget(inner) => inner.get_post_blank(), + AstNode::PlainLink(inner) => inner.get_post_blank(), + AstNode::AngleLink(inner) => inner.get_post_blank(), + AstNode::OrgMacro(inner) => inner.get_post_blank(), + AstNode::Entity(inner) => inner.get_post_blank(), + AstNode::LatexFragment(inner) => inner.get_post_blank(), + AstNode::ExportSnippet(inner) => inner.get_post_blank(), + AstNode::FootnoteReference(inner) => inner.get_post_blank(), + AstNode::Citation(inner) => inner.get_post_blank(), + AstNode::CitationReference(inner) => inner.get_post_blank(), + AstNode::InlineBabelCall(inner) => inner.get_post_blank(), + AstNode::InlineSourceBlock(inner) => inner.get_post_blank(), + AstNode::LineBreak(inner) => inner.get_post_blank(), + AstNode::Target(inner) => inner.get_post_blank(), + AstNode::StatisticsCookie(inner) => inner.get_post_blank(), + AstNode::Subscript(inner) => inner.get_post_blank(), + AstNode::Superscript(inner) => inner.get_post_blank(), + AstNode::TableCell(inner) => inner.get_post_blank(), + AstNode::Timestamp(inner) => inner.get_post_blank(), } } } diff --git a/src/types/document.rs b/src/types/document.rs index e6f128c..e926940 100644 --- a/src/types/document.rs +++ b/src/types/document.rs @@ -1,9 +1,9 @@ use std::path::PathBuf; use super::Element; -use super::GetStandardProperties; use super::NodeProperty; use super::Object; +use super::PostBlank; use super::StandardProperties; use super::Timestamp; @@ -17,6 +17,7 @@ pub struct Document<'s> { pub path: Option, pub zeroth_section: Option>, pub children: Vec>, + pub contents: &'s str, } #[derive(Debug)] @@ -34,11 +35,14 @@ pub struct Heading<'s> { pub scheduled: Option>, pub deadline: Option>, pub closed: Option>, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct Section<'s> { pub source: &'s str, + pub post_blank: Option<&'s str>, pub children: Vec>, } @@ -54,41 +58,60 @@ pub enum TodoKeywordType { Done, } -impl<'s> GetStandardProperties<'s> for DocumentElement<'s> { - fn get_standard_properties<'b>(&'b self) -> &'b dyn StandardProperties<'s> { - match self { - DocumentElement::Heading(inner) => inner, - DocumentElement::Section(inner) => inner, - } - } -} - impl<'s> StandardProperties<'s> for Document<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + 0 + } } impl<'s> StandardProperties<'s> for Section<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.source) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Heading<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> Heading<'s> { pub fn get_raw_value(&self) -> String { // TODO: I think this could just return a string slice instead of an owned string. - let title_source: String = self - .title - .iter() - .map(|obj| obj.get_standard_properties().get_source()) - .collect(); + let title_source: String = self.title.iter().map(|obj| obj.get_source()).collect(); title_source } @@ -132,3 +155,26 @@ impl<'s> Document<'s> { .flat_map(|property_drawer| property_drawer.children.iter()) } } + +impl<'s> StandardProperties<'s> for DocumentElement<'s> { + fn get_source<'b>(&'b self) -> &'s str { + match self { + DocumentElement::Heading(inner) => inner.get_source(), + DocumentElement::Section(inner) => inner.get_source(), + } + } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + match self { + DocumentElement::Heading(inner) => inner.get_contents(), + DocumentElement::Section(inner) => inner.get_contents(), + } + } + + fn get_post_blank(&self) -> PostBlank { + match self { + DocumentElement::Heading(inner) => inner.get_post_blank(), + DocumentElement::Section(inner) => inner.get_post_blank(), + } + } +} diff --git a/src/types/element.rs b/src/types/element.rs index 19e7ca9..b3a2ecb 100644 --- a/src/types/element.rs +++ b/src/types/element.rs @@ -20,7 +20,7 @@ use super::lesser_element::SrcBlock; use super::lesser_element::VerseBlock; use super::CenterBlock; use super::Drawer; -use super::GetStandardProperties; +use super::PostBlank; use super::QuoteBlock; use super::SpecialBlock; use super::StandardProperties; @@ -54,33 +54,91 @@ pub enum Element<'s> { LatexEnvironment(LatexEnvironment<'s>), } -impl<'s> GetStandardProperties<'s> for Element<'s> { - fn get_standard_properties<'b>(&'b self) -> &'b dyn StandardProperties<'s> { +impl<'s> StandardProperties<'s> for Element<'s> { + fn get_source<'b>(&'b self) -> &'s str { match self { - Element::Paragraph(inner) => inner, - Element::PlainList(inner) => inner, - Element::CenterBlock(inner) => inner, - Element::QuoteBlock(inner) => inner, - Element::SpecialBlock(inner) => inner, - Element::DynamicBlock(inner) => inner, - Element::FootnoteDefinition(inner) => inner, - Element::Comment(inner) => inner, - Element::Drawer(inner) => inner, - Element::PropertyDrawer(inner) => inner, - Element::Table(inner) => inner, - Element::VerseBlock(inner) => inner, - Element::CommentBlock(inner) => inner, - Element::ExampleBlock(inner) => inner, - Element::ExportBlock(inner) => inner, - Element::SrcBlock(inner) => inner, - Element::Clock(inner) => inner, - Element::DiarySexp(inner) => inner, - Element::Planning(inner) => inner, - Element::FixedWidthArea(inner) => inner, - Element::HorizontalRule(inner) => inner, - Element::Keyword(inner) => inner, - Element::BabelCall(inner) => inner, - Element::LatexEnvironment(inner) => inner, + Element::Paragraph(inner) => inner.get_source(), + Element::PlainList(inner) => inner.get_source(), + Element::CenterBlock(inner) => inner.get_source(), + Element::QuoteBlock(inner) => inner.get_source(), + Element::SpecialBlock(inner) => inner.get_source(), + Element::DynamicBlock(inner) => inner.get_source(), + Element::FootnoteDefinition(inner) => inner.get_source(), + Element::Comment(inner) => inner.get_source(), + Element::Drawer(inner) => inner.get_source(), + Element::PropertyDrawer(inner) => inner.get_source(), + Element::Table(inner) => inner.get_source(), + Element::VerseBlock(inner) => inner.get_source(), + Element::CommentBlock(inner) => inner.get_source(), + Element::ExampleBlock(inner) => inner.get_source(), + Element::ExportBlock(inner) => inner.get_source(), + Element::SrcBlock(inner) => inner.get_source(), + Element::Clock(inner) => inner.get_source(), + Element::DiarySexp(inner) => inner.get_source(), + Element::Planning(inner) => inner.get_source(), + Element::FixedWidthArea(inner) => inner.get_source(), + Element::HorizontalRule(inner) => inner.get_source(), + Element::Keyword(inner) => inner.get_source(), + Element::BabelCall(inner) => inner.get_source(), + Element::LatexEnvironment(inner) => inner.get_source(), + } + } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + match self { + Element::Paragraph(inner) => inner.get_contents(), + Element::PlainList(inner) => inner.get_contents(), + Element::CenterBlock(inner) => inner.get_contents(), + Element::QuoteBlock(inner) => inner.get_contents(), + Element::SpecialBlock(inner) => inner.get_contents(), + Element::DynamicBlock(inner) => inner.get_contents(), + Element::FootnoteDefinition(inner) => inner.get_contents(), + Element::Comment(inner) => inner.get_contents(), + Element::Drawer(inner) => inner.get_contents(), + Element::PropertyDrawer(inner) => inner.get_contents(), + Element::Table(inner) => inner.get_contents(), + Element::VerseBlock(inner) => inner.get_contents(), + Element::CommentBlock(inner) => inner.get_contents(), + Element::ExampleBlock(inner) => inner.get_contents(), + Element::ExportBlock(inner) => inner.get_contents(), + Element::SrcBlock(inner) => inner.get_contents(), + Element::Clock(inner) => inner.get_contents(), + Element::DiarySexp(inner) => inner.get_contents(), + Element::Planning(inner) => inner.get_contents(), + Element::FixedWidthArea(inner) => inner.get_contents(), + Element::HorizontalRule(inner) => inner.get_contents(), + Element::Keyword(inner) => inner.get_contents(), + Element::BabelCall(inner) => inner.get_contents(), + Element::LatexEnvironment(inner) => inner.get_contents(), + } + } + + fn get_post_blank(&self) -> PostBlank { + match self { + Element::Paragraph(inner) => inner.get_post_blank(), + Element::PlainList(inner) => inner.get_post_blank(), + Element::CenterBlock(inner) => inner.get_post_blank(), + Element::QuoteBlock(inner) => inner.get_post_blank(), + Element::SpecialBlock(inner) => inner.get_post_blank(), + Element::DynamicBlock(inner) => inner.get_post_blank(), + Element::FootnoteDefinition(inner) => inner.get_post_blank(), + Element::Comment(inner) => inner.get_post_blank(), + Element::Drawer(inner) => inner.get_post_blank(), + Element::PropertyDrawer(inner) => inner.get_post_blank(), + Element::Table(inner) => inner.get_post_blank(), + Element::VerseBlock(inner) => inner.get_post_blank(), + Element::CommentBlock(inner) => inner.get_post_blank(), + Element::ExampleBlock(inner) => inner.get_post_blank(), + Element::ExportBlock(inner) => inner.get_post_blank(), + Element::SrcBlock(inner) => inner.get_post_blank(), + Element::Clock(inner) => inner.get_post_blank(), + Element::DiarySexp(inner) => inner.get_post_blank(), + Element::Planning(inner) => inner.get_post_blank(), + Element::FixedWidthArea(inner) => inner.get_post_blank(), + Element::HorizontalRule(inner) => inner.get_post_blank(), + Element::Keyword(inner) => inner.get_post_blank(), + Element::BabelCall(inner) => inner.get_post_blank(), + Element::LatexEnvironment(inner) => inner.get_post_blank(), } } } diff --git a/src/types/get_standard_properties.rs b/src/types/get_standard_properties.rs deleted file mode 100644 index ae6464b..0000000 --- a/src/types/get_standard_properties.rs +++ /dev/null @@ -1,12 +0,0 @@ -use super::StandardProperties; - -pub trait GetStandardProperties<'s> { - // TODO: Can I eliminate this dynamic dispatch, perhaps using nominal generic structs? Low prioritiy since this is not used during parsing. - fn get_standard_properties<'b>(&'b self) -> &'b dyn StandardProperties<'s>; -} - -impl<'s, I: StandardProperties<'s>> GetStandardProperties<'s> for I { - fn get_standard_properties<'b>(&'b self) -> &'b dyn StandardProperties<'s> { - self - } -} diff --git a/src/types/greater_element.rs b/src/types/greater_element.rs index e058b42..7b5b8bf 100644 --- a/src/types/greater_element.rs +++ b/src/types/greater_element.rs @@ -4,6 +4,7 @@ use super::lesser_element::TableCell; use super::AffiliatedKeywords; use super::Keyword; use super::Object; +use super::PostBlank; use super::StandardProperties; #[derive(Debug)] @@ -12,6 +13,8 @@ pub struct PlainList<'s> { pub affiliated_keywords: AffiliatedKeywords<'s>, pub list_type: PlainListType, pub children: Vec>, + pub contents: Option<&'s str>, // TODO: Can contents ever be None? + pub post_blank: Option<&'s str>, } #[derive(Debug, Copy, Clone)] @@ -34,6 +37,8 @@ pub struct PlainListItem<'s> { pub tag: Vec>, pub pre_blank: PlainListItemPreBlank, pub children: Vec>, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, } pub type PlainListItemCounter = u16; @@ -51,6 +56,8 @@ pub struct CenterBlock<'s> { pub source: &'s str, pub affiliated_keywords: AffiliatedKeywords<'s>, pub children: Vec>, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -58,6 +65,8 @@ pub struct QuoteBlock<'s> { pub source: &'s str, pub affiliated_keywords: AffiliatedKeywords<'s>, pub children: Vec>, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -67,6 +76,8 @@ pub struct SpecialBlock<'s> { pub block_type: &'s str, pub parameters: Option<&'s str>, pub children: Vec>, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -76,11 +87,15 @@ pub struct DynamicBlock<'s> { pub block_name: &'s str, pub parameters: Option<&'s str>, pub children: Vec>, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct FootnoteDefinition<'s> { pub source: &'s str, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, pub affiliated_keywords: AffiliatedKeywords<'s>, pub label: &'s str, pub children: Vec>, @@ -92,12 +107,16 @@ pub struct Drawer<'s> { pub affiliated_keywords: AffiliatedKeywords<'s>, pub drawer_name: &'s str, pub children: Vec>, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct PropertyDrawer<'s> { pub source: &'s str, pub children: Vec>, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -113,12 +132,15 @@ pub struct Table<'s> { pub affiliated_keywords: AffiliatedKeywords<'s>, pub formulas: Vec>, pub children: Vec>, + pub contents: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct TableRow<'s> { pub source: &'s str, pub children: Vec>, + pub contents: Option<&'s str>, } #[derive(Debug)] @@ -131,72 +153,208 @@ impl<'s> StandardProperties<'s> for PlainList<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for PlainListItem<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for CenterBlock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for QuoteBlock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for SpecialBlock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for DynamicBlock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for FootnoteDefinition<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Drawer<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for PropertyDrawer<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for NodeProperty<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + 0 + } } impl<'s> StandardProperties<'s> for Table<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for TableRow<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + 0 + } } impl<'s> PlainListItem<'s> { diff --git a/src/types/lesser_element.rs b/src/types/lesser_element.rs index 41097e4..660eb43 100644 --- a/src/types/lesser_element.rs +++ b/src/types/lesser_element.rs @@ -16,6 +16,7 @@ use super::object::Object; use super::AffiliatedKeywords; use super::GetAffiliatedKeywords; use super::PlainText; +use super::PostBlank; use super::StandardProperties; use super::Timestamp; use crate::error::CustomError; @@ -24,6 +25,8 @@ use crate::error::Res; #[derive(Debug)] pub struct Paragraph<'s> { pub source: &'s str, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, pub affiliated_keywords: AffiliatedKeywords<'s>, pub children: Vec>, } @@ -32,12 +35,14 @@ pub struct Paragraph<'s> { pub struct Comment<'s> { pub source: &'s str, pub value: Vec<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct TableCell<'s> { pub source: &'s str, pub children: Vec>, + pub contents: &'s str, } #[derive(Debug)] @@ -46,6 +51,8 @@ pub struct VerseBlock<'s> { pub affiliated_keywords: AffiliatedKeywords<'s>, pub data: Option<&'s str>, pub children: Vec>, + pub contents: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -53,6 +60,7 @@ pub struct CommentBlock<'s> { pub source: &'s str, pub affiliated_keywords: AffiliatedKeywords<'s>, pub contents: &'s str, + pub post_blank: Option<&'s str>, } pub type CharOffsetInLine = u16; @@ -75,7 +83,8 @@ pub struct ExampleBlock<'s> { pub retain_labels: RetainLabels, pub use_labels: bool, pub label_format: Option<&'s str>, - pub contents: &'s str, + pub value: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -84,7 +93,8 @@ pub struct ExportBlock<'s> { pub affiliated_keywords: AffiliatedKeywords<'s>, pub export_type: Option<&'s str>, pub data: Option<&'s str>, - pub contents: &'s str, + pub value: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -99,7 +109,8 @@ pub struct SrcBlock<'s> { pub retain_labels: RetainLabels, pub use_labels: bool, pub label_format: Option<&'s str>, - pub contents: &'s str, + pub value: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -114,6 +125,7 @@ pub struct Clock<'s> { pub timestamp: Timestamp<'s>, pub duration: Option<&'s str>, pub status: ClockStatus, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -121,6 +133,7 @@ pub struct DiarySexp<'s> { pub source: &'s str, pub affiliated_keywords: AffiliatedKeywords<'s>, pub value: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -129,6 +142,7 @@ pub struct Planning<'s> { pub scheduled: Option>, pub deadline: Option>, pub closed: Option>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -136,12 +150,14 @@ pub struct FixedWidthArea<'s> { pub source: &'s str, pub affiliated_keywords: AffiliatedKeywords<'s>, pub value: Vec<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct HorizontalRule<'s> { pub source: &'s str, pub affiliated_keywords: AffiliatedKeywords<'s>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -150,6 +166,7 @@ pub struct Keyword<'s> { pub affiliated_keywords: AffiliatedKeywords<'s>, pub key: &'s str, pub value: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -161,6 +178,7 @@ pub struct BabelCall<'s> { pub inside_header: Option<&'s str>, pub arguments: Option<&'s str>, pub end_header: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -168,6 +186,7 @@ pub struct LatexEnvironment<'s> { pub source: &'s str, pub affiliated_keywords: AffiliatedKeywords<'s>, pub value: &'s str, + pub post_blank: Option<&'s str>, } /// A line number used in switches to lesser blocks. @@ -185,9 +204,16 @@ impl<'s> Paragraph<'s> { /// Generate a paragraph of the passed in text with no additional properties. /// /// This is used for elements that support an "empty" content like greater blocks. - pub(crate) fn of_text(source: &'s str, body: &'s str) -> Self { + pub(crate) fn of_text( + source: &'s str, + body: &'s str, + contents: Option<&'s str>, + post_blank: Option<&'s str>, + ) -> Self { Paragraph { source, + contents, + post_blank, affiliated_keywords: AffiliatedKeywords::default(), children: vec![Object::PlainText(PlainText { source: body })], } @@ -198,92 +224,280 @@ impl<'s> StandardProperties<'s> for Paragraph<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for TableCell<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + 0 + } } impl<'s> StandardProperties<'s> for Comment<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for VerseBlock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for CommentBlock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for ExampleBlock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for ExportBlock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for SrcBlock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Clock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for DiarySexp<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Planning<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for FixedWidthArea<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for HorizontalRule<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Keyword<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for BabelCall<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for LatexEnvironment<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.lines().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> Comment<'s> { @@ -300,20 +514,14 @@ impl<'s> Comment<'s> { impl<'s> FixedWidthArea<'s> { pub fn get_value(&self) -> String { - let final_size = self.value.iter().map(|line| line.len()).sum(); - let mut ret = String::with_capacity(final_size); - for line in &self.value { - ret.push_str(line); - } - - ret + self.value.join("\n") } } impl<'s> ExampleBlock<'s> { /// Gets the contents of the lesser block, handling the escaping of lines with leading commas. - pub fn get_contents(&self) -> Cow<'s, str> { - lesser_block_content(self.contents).expect("This parser should never fail.") + pub fn get_value(&self) -> Cow<'s, str> { + lesser_block_content(self.value).expect("This parser should never fail.") } } @@ -326,15 +534,15 @@ impl<'s> ExportBlock<'s> { } /// Gets the contents of the lesser block, handling the escaping of lines with leading commas. - pub fn get_contents(&self) -> Cow<'s, str> { - lesser_block_content(self.contents).expect("This parser should never fail.") + pub fn get_value(&self) -> Cow<'s, str> { + lesser_block_content(self.value).expect("This parser should never fail.") } } impl<'s> SrcBlock<'s> { /// Gets the contents of the lesser block, handling the escaping of lines with leading commas. - pub fn get_contents(&self) -> Cow<'s, str> { - lesser_block_content(self.contents).expect("This parser should never fail.") + pub fn get_value(&self) -> Cow<'s, str> { + lesser_block_content(self.value).expect("This parser should never fail.") } } @@ -473,7 +681,7 @@ fn content_line<'s>(input: &'s str) -> Res<&'s str, (Option<&'s str>, &'s str)> #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] /// Check if the child string slice is a slice of the parent string slice. -fn is_slice_of(parent: &str, child: &str) -> bool { +pub(crate) fn is_slice_of(parent: &str, child: &str) -> bool { let parent_start = parent.as_ptr() as usize; let parent_end = parent_start + parent.len(); let child_start = child.as_ptr() as usize; diff --git a/src/types/mod.rs b/src/types/mod.rs index 334dd2f..2653394 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -2,11 +2,11 @@ mod affiliated_keyword; mod ast_node; mod document; mod element; -mod get_standard_properties; mod greater_element; mod lesser_element; mod macros; mod object; +mod remove_trailing; mod standard_properties; mod util; pub use affiliated_keyword::AffiliatedKeyword; @@ -22,7 +22,6 @@ pub use document::PriorityCookie; pub use document::Section; pub use document::TodoKeywordType; pub use element::Element; -pub use get_standard_properties::GetStandardProperties; pub use greater_element::CenterBlock; pub use greater_element::CheckboxType; pub use greater_element::Drawer; @@ -112,4 +111,5 @@ pub use object::WarningDelay; pub use object::WarningDelayType; pub use object::Year; pub use object::YearInner; +pub use standard_properties::PostBlank; pub use standard_properties::StandardProperties; diff --git a/src/types/object.rs b/src/types/object.rs index f900ccb..b2f5f33 100644 --- a/src/types/object.rs +++ b/src/types/object.rs @@ -6,7 +6,7 @@ use super::util::coalesce_whitespace_if_line_break; use super::util::remove_line_break; use super::util::remove_whitespace_if_line_break; use super::util::to_lowercase; -use super::GetStandardProperties; +use super::PostBlank; use super::StandardProperties; #[derive(Debug)] @@ -43,24 +43,32 @@ pub enum Object<'s> { #[derive(Debug)] pub struct Bold<'s> { pub source: &'s str, + pub contents: &'s str, + pub post_blank: Option<&'s str>, pub children: Vec>, } #[derive(Debug)] pub struct Italic<'s> { pub source: &'s str, + pub contents: &'s str, + pub post_blank: Option<&'s str>, pub children: Vec>, } #[derive(Debug)] pub struct Underline<'s> { pub source: &'s str, + pub contents: &'s str, + pub post_blank: Option<&'s str>, pub children: Vec>, } #[derive(Debug)] pub struct StrikeThrough<'s> { pub source: &'s str, + pub contents: &'s str, + pub post_blank: Option<&'s str>, pub children: Vec>, } @@ -68,12 +76,14 @@ pub struct StrikeThrough<'s> { pub struct Code<'s> { pub source: &'s str, pub contents: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct Verbatim<'s> { pub source: &'s str, pub contents: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -100,6 +110,8 @@ pub struct RegularLink<'s> { /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_search_option` for an equivalent value. pub search_option: Option>, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, pub children: Vec>, pub application: Option>, } @@ -108,6 +120,7 @@ pub struct RegularLink<'s> { pub struct RadioTarget<'s> { pub source: &'s str, pub value: &'s str, + pub post_blank: Option<&'s str>, pub children: Vec>, } @@ -115,6 +128,7 @@ pub struct RadioTarget<'s> { pub struct RadioLink<'s> { pub source: &'s str, pub path: &'s str, + pub post_blank: Option<&'s str>, pub children: Vec>, } @@ -126,6 +140,7 @@ pub struct PlainLink<'s> { pub raw_link: &'s str, pub search_option: Option<&'s str>, pub application: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -144,6 +159,7 @@ pub struct AngleLink<'s> { /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_search_option` for an equivalent value. pub search_option: Option<&'s str>, pub application: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -161,6 +177,7 @@ pub struct OrgMacro<'s> { pub args: Vec<&'s str>, pub value: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -174,12 +191,14 @@ pub struct Entity<'s> { // Skipping latin1 because it is detrimental to the future. If anyone out there is using latin1, take a long look in the mirror and change your ways. pub utf8: &'s str, pub use_brackets: bool, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct LatexFragment<'s> { pub source: &'s str, pub value: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -187,11 +206,14 @@ pub struct ExportSnippet<'s> { pub source: &'s str, pub backend: &'s str, pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct FootnoteReference<'s> { pub source: &'s str, + pub contents: Option<&'s str>, + pub post_blank: Option<&'s str>, pub label: Option<&'s str>, pub definition: Vec>, } @@ -203,6 +225,8 @@ pub struct Citation<'s> { pub prefix: Vec>, pub suffix: Vec>, pub children: Vec>, + pub contents: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -221,6 +245,7 @@ pub struct InlineBabelCall<'s> { pub inside_header: Option<&'s str>, pub arguments: Option<&'s str>, pub end_header: Option<&'s str>, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -229,6 +254,7 @@ pub struct InlineSourceBlock<'s> { pub language: &'s str, pub parameters: Option<&'s str>, pub value: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] @@ -240,18 +266,22 @@ pub struct LineBreak<'s> { pub struct Target<'s> { pub source: &'s str, pub value: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct StatisticsCookie<'s> { pub source: &'s str, pub value: &'s str, + pub post_blank: Option<&'s str>, } #[derive(Debug)] pub struct Subscript<'s> { pub source: &'s str, pub use_brackets: bool, + pub contents: &'s str, + pub post_blank: Option<&'s str>, pub children: Vec>, } @@ -259,6 +289,8 @@ pub struct Subscript<'s> { pub struct Superscript<'s> { pub source: &'s str, pub use_brackets: bool, + pub contents: &'s str, + pub post_blank: Option<&'s str>, pub children: Vec>, } @@ -274,6 +306,7 @@ pub struct Timestamp<'s> { pub end_time: Option>, pub repeater: Option, pub warning_delay: Option, + pub post_blank: Option<&'s str>, } #[derive(Debug, Clone)] @@ -515,200 +548,480 @@ pub struct WarningDelay { pub unit: TimeUnit, } -impl<'s> GetStandardProperties<'s> for Object<'s> { - fn get_standard_properties<'b>(&'b self) -> &'b dyn StandardProperties<'s> { - match self { - Object::Bold(inner) => inner, - Object::Italic(inner) => inner, - Object::Underline(inner) => inner, - Object::StrikeThrough(inner) => inner, - Object::Code(inner) => inner, - Object::Verbatim(inner) => inner, - Object::PlainText(inner) => inner, - Object::RegularLink(inner) => inner, - Object::RadioLink(inner) => inner, - Object::RadioTarget(inner) => inner, - Object::PlainLink(inner) => inner, - Object::AngleLink(inner) => inner, - Object::OrgMacro(inner) => inner, - Object::Entity(inner) => inner, - Object::LatexFragment(inner) => inner, - Object::ExportSnippet(inner) => inner, - Object::FootnoteReference(inner) => inner, - Object::Citation(inner) => inner, - Object::CitationReference(inner) => inner, - Object::InlineBabelCall(inner) => inner, - Object::InlineSourceBlock(inner) => inner, - Object::LineBreak(inner) => inner, - Object::Target(inner) => inner, - Object::StatisticsCookie(inner) => inner, - Object::Subscript(inner) => inner, - Object::Superscript(inner) => inner, - Object::Timestamp(inner) => inner, - } - } -} - impl<'s> StandardProperties<'s> for Bold<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Italic<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Underline<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for StrikeThrough<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Code<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Verbatim<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for RegularLink<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for RadioLink<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.path) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for RadioTarget<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.value) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for PlainLink<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for AngleLink<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for OrgMacro<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Entity<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for LatexFragment<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for ExportSnippet<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for FootnoteReference<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + self.contents + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Citation<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for CitationReference<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + 0 + } } impl<'s> StandardProperties<'s> for InlineBabelCall<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for InlineSourceBlock<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for LineBreak<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + 0 + } } impl<'s> StandardProperties<'s> for Target<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for StatisticsCookie<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|post_blank| post_blank.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Subscript<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Superscript<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + Some(self.contents) + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for Timestamp<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + None + } + + fn get_post_blank(&self) -> PostBlank { + self.post_blank + .map(|text| text.chars().count()) + .unwrap_or(0) + .try_into() + .expect("Too much post-blank to fit into a PostBlank.") + } } impl<'s> StandardProperties<'s> for PlainText<'s> { fn get_source<'b>(&'b self) -> &'s str { self.source } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + // This field does not actually exist in emacs for plaintext + Some(self.source) + } + + fn get_post_blank(&self) -> PostBlank { + // This field does not actually exist in emacs for plaintext + 0 + } } impl<'s> Timestamp<'s> { @@ -801,3 +1114,101 @@ impl<'s> FootnoteReference<'s> { } } } + +impl<'s> StandardProperties<'s> for Object<'s> { + fn get_source<'b>(&'b self) -> &'s str { + match self { + Object::Bold(inner) => inner.get_source(), + Object::Italic(inner) => inner.get_source(), + Object::Underline(inner) => inner.get_source(), + Object::StrikeThrough(inner) => inner.get_source(), + Object::Code(inner) => inner.get_source(), + Object::Verbatim(inner) => inner.get_source(), + Object::PlainText(inner) => inner.get_source(), + Object::RegularLink(inner) => inner.get_source(), + Object::RadioLink(inner) => inner.get_source(), + Object::RadioTarget(inner) => inner.get_source(), + Object::PlainLink(inner) => inner.get_source(), + Object::AngleLink(inner) => inner.get_source(), + Object::OrgMacro(inner) => inner.get_source(), + Object::Entity(inner) => inner.get_source(), + Object::LatexFragment(inner) => inner.get_source(), + Object::ExportSnippet(inner) => inner.get_source(), + Object::FootnoteReference(inner) => inner.get_source(), + Object::Citation(inner) => inner.get_source(), + Object::CitationReference(inner) => inner.get_source(), + Object::InlineBabelCall(inner) => inner.get_source(), + Object::InlineSourceBlock(inner) => inner.get_source(), + Object::LineBreak(inner) => inner.get_source(), + Object::Target(inner) => inner.get_source(), + Object::StatisticsCookie(inner) => inner.get_source(), + Object::Subscript(inner) => inner.get_source(), + Object::Superscript(inner) => inner.get_source(), + Object::Timestamp(inner) => inner.get_source(), + } + } + + fn get_contents<'b>(&'b self) -> Option<&'s str> { + match self { + Object::Bold(inner) => inner.get_contents(), + Object::Italic(inner) => inner.get_contents(), + Object::Underline(inner) => inner.get_contents(), + Object::StrikeThrough(inner) => inner.get_contents(), + Object::Code(inner) => inner.get_contents(), + Object::Verbatim(inner) => inner.get_contents(), + Object::PlainText(inner) => inner.get_contents(), + Object::RegularLink(inner) => inner.get_contents(), + Object::RadioLink(inner) => inner.get_contents(), + Object::RadioTarget(inner) => inner.get_contents(), + Object::PlainLink(inner) => inner.get_contents(), + Object::AngleLink(inner) => inner.get_contents(), + Object::OrgMacro(inner) => inner.get_contents(), + Object::Entity(inner) => inner.get_contents(), + Object::LatexFragment(inner) => inner.get_contents(), + Object::ExportSnippet(inner) => inner.get_contents(), + Object::FootnoteReference(inner) => inner.get_contents(), + Object::Citation(inner) => inner.get_contents(), + Object::CitationReference(inner) => inner.get_contents(), + Object::InlineBabelCall(inner) => inner.get_contents(), + Object::InlineSourceBlock(inner) => inner.get_contents(), + Object::LineBreak(inner) => inner.get_contents(), + Object::Target(inner) => inner.get_contents(), + Object::StatisticsCookie(inner) => inner.get_contents(), + Object::Subscript(inner) => inner.get_contents(), + Object::Superscript(inner) => inner.get_contents(), + Object::Timestamp(inner) => inner.get_contents(), + } + } + + fn get_post_blank(&self) -> PostBlank { + match self { + Object::Bold(inner) => inner.get_post_blank(), + Object::Italic(inner) => inner.get_post_blank(), + Object::Underline(inner) => inner.get_post_blank(), + Object::StrikeThrough(inner) => inner.get_post_blank(), + Object::Code(inner) => inner.get_post_blank(), + Object::Verbatim(inner) => inner.get_post_blank(), + Object::PlainText(inner) => inner.get_post_blank(), + Object::RegularLink(inner) => inner.get_post_blank(), + Object::RadioLink(inner) => inner.get_post_blank(), + Object::RadioTarget(inner) => inner.get_post_blank(), + Object::PlainLink(inner) => inner.get_post_blank(), + Object::AngleLink(inner) => inner.get_post_blank(), + Object::OrgMacro(inner) => inner.get_post_blank(), + Object::Entity(inner) => inner.get_post_blank(), + Object::LatexFragment(inner) => inner.get_post_blank(), + Object::ExportSnippet(inner) => inner.get_post_blank(), + Object::FootnoteReference(inner) => inner.get_post_blank(), + Object::Citation(inner) => inner.get_post_blank(), + Object::CitationReference(inner) => inner.get_post_blank(), + Object::InlineBabelCall(inner) => inner.get_post_blank(), + Object::InlineSourceBlock(inner) => inner.get_post_blank(), + Object::LineBreak(inner) => inner.get_post_blank(), + Object::Target(inner) => inner.get_post_blank(), + Object::StatisticsCookie(inner) => inner.get_post_blank(), + Object::Subscript(inner) => inner.get_post_blank(), + Object::Superscript(inner) => inner.get_post_blank(), + Object::Timestamp(inner) => inner.get_post_blank(), + } + } +} diff --git a/src/types/remove_trailing.rs b/src/types/remove_trailing.rs new file mode 100644 index 0000000..822c64c --- /dev/null +++ b/src/types/remove_trailing.rs @@ -0,0 +1,56 @@ +pub(crate) trait RemoveTrailing: Iterator + Sized { + fn remove_trailing>(self, amount_to_remove: R) -> RemoveTrailingIter; +} + +impl RemoveTrailing for I +where + I: Iterator, +{ + fn remove_trailing>(self, amount_to_remove: R) -> RemoveTrailingIter { + RemoveTrailingIter { + inner: self, + buffer: Vec::new(), + next_to_pop: 0, + amount_to_remove: amount_to_remove.into(), + } + } +} + +pub(crate) struct RemoveTrailingIter { + inner: I, + buffer: Vec, + next_to_pop: usize, + amount_to_remove: usize, +} + +impl Iterator for RemoveTrailingIter { + type Item = I::Item; + + fn next(&mut self) -> Option { + if self.buffer.len() < self.amount_to_remove { + self.buffer.reserve_exact(self.amount_to_remove); + } + while self.buffer.len() < self.amount_to_remove { + if let Some(elem) = self.inner.next() { + self.buffer.push(elem); + } else { + // The inner was smaller than amount_to_remove, so never return anything. + return None; + } + } + + let new_value = self.inner.next(); + if self.amount_to_remove == 0 { + return new_value; + } + + if let Some(new_value) = new_value { + let ret = std::mem::replace(&mut self.buffer[self.next_to_pop], new_value); + self.next_to_pop = (self.next_to_pop + 1) % self.amount_to_remove; + Some(ret) + } else { + // We have exactly the amount in the buffer than we wanted to cut off, so stop returning values. + None + } + } +} diff --git a/src/types/standard_properties.rs b/src/types/standard_properties.rs index aa57962..426e0f5 100644 --- a/src/types/standard_properties.rs +++ b/src/types/standard_properties.rs @@ -5,10 +5,15 @@ pub trait StandardProperties<'s> { /// This corresponds to :begin to :end in upstream org-mode's standard properties. fn get_source<'b>(&'b self) -> &'s str; - // Get the slice of the AST node's contents. - // - // This corresponds to :contents-begin to :contents-end - // fn get_contents(&'s self) -> &'s str; + /// Get the slice of the AST node's contents. + /// + /// This corresponds to :contents-begin to :contents-end + fn get_contents<'b>(&'b self) -> Option<&'s str>; + + /// Get the ast node's post-blank. + /// + /// For objects this is a count of the characters of whitespace after the object. For elements this is a count of the line breaks following an element. + fn get_post_blank(&self) -> PostBlank; } // TODO: Write some debugging code to alert when any of the unknown fields below are non-nil in our test data so we can see what these fields represent. @@ -56,3 +61,5 @@ pub trait StandardProperties<'s> { // X :parent - Some weird numeric reference to the containing object. Since we output a tree structure, I do not see any value in including this, especially considering the back-references would be a nightmare in rust. // Special case: Plain text. Plain text counts :begin and :end from the start of the text (so :begin is always 0 AFAICT) and instead of including the full set of standard properties, it only includes :begin, :end, and :parent. + +pub type PostBlank = u8;