Update getting the previous character and previous line.

This can be done a lot more efficiently now that we are keeping track of this information in the wrapped input type instead of having to fetch to the original document out of the context tree.
This commit is contained in:
Tom Alexander 2023-08-24 16:55:56 -04:00
parent dab598e5e7
commit 720afa5d32
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
9 changed files with 59 additions and 114 deletions

View File

@ -21,7 +21,6 @@ use crate::error::Res;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::get_one_before;
use crate::parser::LatexFragment;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -175,10 +174,7 @@ fn dollar_char_fragment<'r, 's>(
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
let preceding_character = input.get_preceding_character();
if let Some('$') = preceding_character {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not a valid pre character for dollar char fragment.".into(),
@ -239,10 +235,7 @@ pub fn close_border<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
let preceding_character = input.get_preceding_character();
match preceding_character {
Some(c) if !c.is_whitespace() && !".,;$".contains(c) => Ok((input, ())),
_ => {

View File

@ -10,8 +10,6 @@ use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::util::get_consumed;
use crate::parser::util::get_current_line_before_position;
use crate::parser::util::get_one_before;
use crate::parser::LineBreak;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -34,10 +32,7 @@ pub fn line_break<'r, 's>(
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
let preceding_character = input.get_preceding_character();
match preceding_character {
// If None, we are at the start of the file
None | Some('\\') => {
@ -47,21 +42,13 @@ fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<
}
_ => {}
};
let current_line = get_current_line_before_position(document_root, input);
match current_line {
Some(line) => {
let is_non_empty_line = Into::<&str>::into(line).chars().any(|c| !c.is_whitespace());
if !is_non_empty_line {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not a valid pre line for line break.".into(),
))));
}
}
None => {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"No preceding line for line break.".into(),
))));
}
let current_line = input.text_since_line_break();
let is_non_empty_line = current_line.chars().any(|c| !c.is_whitespace());
if !is_non_empty_line {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not a valid pre line for line break.".into(),
))));
}
Ok((input, ()))

View File

@ -17,6 +17,7 @@ pub struct OrgSource<'s> {
start: usize,
end: usize, // exclusive
preceding_line_break: Option<usize>,
preceding_character: Option<char>,
}
impl<'s> OrgSource<'s> {
@ -29,6 +30,7 @@ impl<'s> OrgSource<'s> {
start: 0,
end: input.len(),
preceding_line_break: None,
preceding_character: None,
}
}
@ -41,6 +43,17 @@ impl<'s> OrgSource<'s> {
pub fn len(&self) -> usize {
self.end - self.start
}
pub fn get_preceding_character(&self) -> Option<char> {
todo!()
}
pub fn is_at_start_of_line(&self) -> bool {
self.start == 0 || match self.preceding_line_break {
Some(offset) => offset == self.start - 1,
None => false,
}
}
}
impl<'s> InputTake for OrgSource<'s> {
@ -114,6 +127,7 @@ where
start: new_start,
end: new_end,
preceding_line_break: last_line_feed,
preceding_character: skipped_text.chars().last()
}
}
}
@ -325,4 +339,22 @@ mod tests {
assert_eq!(input.slice(6..).text_since_line_break(), "");
assert_eq!(input.slice(6..).slice(10..).text_since_line_break(), "ba");
}
#[test]
fn preceding_character() {
let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum");
assert_eq!(input.get_preceding_character(), None);
assert_eq!(input.slice(5..).get_preceding_character(), Some('m'));
assert_eq!(input.slice(6..).get_preceding_character(), Some('\n'));
assert_eq!(input.slice(6..).slice(10..).get_preceding_character(), Some('a'));
}
#[test]
fn is_at_start_of_line() {
let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum");
assert_eq!(input.is_at_start_of_line(), true);
assert_eq!(input.slice(5..).is_at_start_of_line(), false);
assert_eq!(input.slice(6..).is_at_start_of_line(), true);
assert_eq!(input.slice(6..).slice(10..).is_at_start_of_line(), false);
}
}

View File

@ -21,7 +21,6 @@ use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::get_one_before;
use crate::parser::util::WORD_CONSTITUENT_CHARACTERS;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -47,10 +46,7 @@ pub fn plain_link<'r, 's>(
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
let preceding_character = input.get_preceding_character();
match preceding_character {
// If None, we are at the start of the file which is fine
None => {}

View File

@ -156,7 +156,7 @@ mod tests {
crate::parser::Element::Paragraph(paragraph) => paragraph,
_ => panic!("Should be a paragraph!"),
};
assert_eq!(remaining, "");
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(first_paragraph.get_source(), "foo bar baz");
assert_eq!(first_paragraph.children.len(), 3);
assert_eq!(
@ -183,12 +183,13 @@ mod tests {
.with_additional_node(ContextElement::DocumentRoot(input))
.with_additional_node(ContextElement::RadioTarget(vec![&radio_target_match]));
let paragraph_matcher = parser_with_context!(element(true))(&document_context);
let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph");
let (remaining, first_paragraph) =
paragraph_matcher(input.into()).expect("Parse first paragraph");
let first_paragraph = match first_paragraph {
crate::parser::Element::Paragraph(paragraph) => paragraph,
_ => panic!("Should be a paragraph!"),
};
assert_eq!(remaining, "");
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(first_paragraph.get_source(), "foo *bar* baz");
assert_eq!(first_paragraph.children.len(), 3);
assert_eq!(

View File

@ -25,7 +25,6 @@ use crate::parser::parser_context::SubscriptSuperscriptBrace;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::get_one_before;
use crate::parser::Subscript;
use crate::parser::Superscript;
@ -69,10 +68,7 @@ pub fn superscript<'r, 's>(
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
let preceding_character = input.get_preceding_character();
match preceding_character {
Some(c) if !c.is_whitespace() => {}
_ => {

View File

@ -18,7 +18,6 @@ use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::get_one_before;
use crate::parser::Target;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -41,10 +40,8 @@ pub fn target<'r, 's>(
parser_with_context!(exit_matcher_parser)(&parser_context),
))(remaining)?;
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, remaining)
.map(|slice| slice.chars().next())
.flatten()
let preceding_character = remaining
.get_preceding_character()
.expect("We cannot be at the start of the file because we are inside a target.");
if preceding_character.is_whitespace() {
return Err(nom::Err::Error(CustomError::MyError(MyError(

View File

@ -29,7 +29,6 @@ use crate::parser::parser_with_context::parser_with_context;
use crate::parser::radio_link::rematch_target;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::get_one_before;
use crate::parser::util::preceded_by_whitespace;
use crate::parser::Bold;
use crate::parser::Code;
@ -261,10 +260,7 @@ fn _text_markup_string<'r, 's, 'x>(
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
let preceding_character = input.get_preceding_character();
match preceding_character {
// If None, we are at the start of the file which is technically the beginning of a line.
None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(')

View File

@ -49,47 +49,6 @@ pub fn immediate_in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name:
false
}
/// Get one character from before the current position.
pub fn get_one_before<'s>(document: &'s str, current_position: OrgSource<'s>) -> Option<&'s str> {
// TODO: This should be replaced with new logic now that we are wrapping the input type.
let current_position = Into::<&str>::into(&current_position);
assert!(is_slice_of(document, current_position));
if document.as_ptr() as usize == current_position.as_ptr() as usize {
return None;
}
let offset = current_position.as_ptr() as usize - document.as_ptr() as usize;
let previous_character_offset = document.floor_char_boundary(offset - 1);
Some(&document[previous_character_offset..offset])
}
/// Get the line current_position is on up until current_position
pub fn get_current_line_before_position<'s>(
document: &'s str,
current_position: OrgSource<'s>,
) -> Option<OrgSource<'s>> {
// TODO: This should be replaced with new logic now that we are wrapping the input type.
let current_position = Into::<&str>::into(&current_position);
assert!(is_slice_of(document, current_position));
if document.as_ptr() as usize == current_position.as_ptr() as usize {
return None;
}
let offset = current_position.as_ptr() as usize - document.as_ptr() as usize;
let mut previous_character_offset = offset;
loop {
let new_offset = document.floor_char_boundary(previous_character_offset - 1);
let new_line = &document[new_offset..offset];
let leading_char = new_line
.chars()
.next()
.expect("Impossible to not have at least 1 character to read.");
if "\r\n".contains(leading_char) || new_offset == 0 {
break;
}
previous_character_offset = new_offset;
}
Some((&document[previous_character_offset..offset]).into())
}
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
@ -170,22 +129,13 @@ pub fn start_of_line<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
match preceding_character {
Some('\n') => {}
Some(_) => {
// Not at start of line, cannot be a heading
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not at start of line".into(),
))));
}
// If None, we are at the start of the file which allows for headings
None => {}
};
Ok((input, ()))
if input.is_at_start_of_line() {
Ok((input, ()))
} else {
Err(nom::Err::Error(CustomError::MyError(MyError(
"Not at start of line".into(),
))))
}
}
/// Check that we are at the start of a line
@ -194,10 +144,7 @@ pub fn preceded_by_whitespace<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
let preceding_character = input.get_preceding_character();
match preceding_character {
Some('\n') | Some('\r') | Some(' ') | Some('\t') => {}
// If None, we are at the start of the file which is not allowed