use std::ops::RangeBounds; use nom::Compare; use nom::InputIter; use nom::InputLength; use nom::InputTake; use nom::InputTakeAtPosition; use nom::Offset; use nom::Slice; use crate::error::CustomError; use crate::error::MyError; #[derive(Debug, Copy, Clone)] pub struct OrgSource<'s> { full_source: &'s str, start: usize, end: usize, // exclusive start_of_line: usize, preceding_character: Option<char>, line_number: usize, } impl<'s> OrgSource<'s> { /// Returns a wrapped string that keeps track of values we need for parsing org-mode. /// /// Only call this on the full original string. Calling this on a substring can result in invalid values. pub fn new(input: &'s str) -> Self { OrgSource { full_source: input, start: 0, end: input.len(), start_of_line: 0, preceding_character: None, line_number: 1, } } /// Get the text since the line break preceding the start of this WrappedInput. pub fn text_since_line_break(&self) -> &'s str { &self.full_source[self.start_of_line..self.start] } pub fn len(&self) -> usize { self.end - self.start } pub fn get_preceding_character(&self) -> Option<char> { self.preceding_character } pub fn is_at_start_of_line(&self) -> bool { self.start == self.start_of_line } pub fn get_until(&self, other: OrgSource<'s>) -> OrgSource<'s> { assert!(other.start >= self.start); assert!(other.end <= self.end); self.slice(..(other.start - self.start)) } pub fn get_line_number(&self) -> usize { self.line_number } } impl<'s> InputTake for OrgSource<'s> { fn take(&self, count: usize) -> Self { self.slice(..count) } fn take_split(&self, count: usize) -> (Self, Self) { (self.slice(count..), self.slice(..count)) } } impl<'s, 'o, O: Into<&'o str>> Compare<O> for OrgSource<'s> { fn compare(&self, t: O) -> nom::CompareResult { (&self.full_source[self.start..self.end]).compare(t.into()) } fn compare_no_case(&self, t: O) -> nom::CompareResult { (&self.full_source[self.start..self.end]).compare_no_case(t.into()) } } impl<'s> From<&'s str> for OrgSource<'s> { fn from(value: &'s str) -> Self { OrgSource::new(value) } } impl<'s> From<&OrgSource<'s>> for &'s str { fn from(value: &OrgSource<'s>) -> Self { &value.full_source[value.start..value.end] } } impl<'s> From<OrgSource<'s>> for &'s str { fn from(value: OrgSource<'s>) -> Self { &value.full_source[value.start..value.end] } } impl<'s, R> Slice<R> for OrgSource<'s> where R: RangeBounds<usize>, { fn slice(&self, range: R) -> Self { let new_start = match range.start_bound() { std::ops::Bound::Included(idx) => self.start + idx, std::ops::Bound::Excluded(idx) => self.start + idx - 1, std::ops::Bound::Unbounded => self.start, }; let new_end = match range.end_bound() { std::ops::Bound::Included(idx) => self.start + idx + 1, std::ops::Bound::Excluded(idx) => self.start + idx, std::ops::Bound::Unbounded => self.end, }; if new_start < self.start { panic!("Attempted to extend before the start of the WrappedInput.") } if new_end > self.end { panic!("Attempted to extend past the end of the WrappedInput.") } let skipped_text = &self.full_source[self.start..new_start]; let mut start_of_line = self.start_of_line; let mut line_number = self.line_number; for (offset, character) in skipped_text.char_indices() { if character == '\n' { start_of_line = self.start + offset + 1; line_number += 1; } } OrgSource { full_source: self.full_source, start: new_start, end: new_end, start_of_line, preceding_character: skipped_text.chars().last(), line_number, } } } impl<'s> std::fmt::Display for OrgSource<'s> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { Into::<&str>::into(self).fmt(f) } } impl<'s> InputLength for OrgSource<'s> { fn input_len(&self) -> usize { self.end - self.start } } impl<'s> InputIter for OrgSource<'s> { type Item = <&'s str as InputIter>::Item; type Iter = <&'s str as InputIter>::Iter; type IterElem = <&'s str as InputIter>::IterElem; fn iter_indices(&self) -> Self::Iter { Into::<&str>::into(self).char_indices() } fn iter_elements(&self) -> Self::IterElem { Into::<&str>::into(self).iter_elements() } fn position<P>(&self, predicate: P) -> Option<usize> where P: Fn(Self::Item) -> bool, { Into::<&str>::into(self).position(predicate) } fn slice_index(&self, count: usize) -> Result<usize, nom::Needed> { Into::<&str>::into(self).slice_index(count) } } impl<'s> Offset for OrgSource<'s> { fn offset(&self, second: &Self) -> usize { second.start - self.start } } impl<'s> InputTakeAtPosition for OrgSource<'s> { type Item = <&'s str as InputTakeAtPosition>::Item; fn split_at_position<P, E: nom::error::ParseError<Self>>( &self, predicate: P, ) -> nom::IResult<Self, Self, E> where P: Fn(Self::Item) -> bool, { match Into::<&str>::into(self).position(predicate) { Some(idx) => Ok(self.take_split(idx)), None => Err(nom::Err::Incomplete(nom::Needed::new(1))), } } fn split_at_position1<P, E: nom::error::ParseError<Self>>( &self, predicate: P, e: nom::error::ErrorKind, ) -> nom::IResult<Self, Self, E> where P: Fn(Self::Item) -> bool, { match Into::<&str>::into(self).position(predicate) { Some(0) => Err(nom::Err::Error(E::from_error_kind(self.clone(), e))), Some(idx) => Ok(self.take_split(idx)), None => Err(nom::Err::Incomplete(nom::Needed::new(1))), } } fn split_at_position_complete<P, E: nom::error::ParseError<Self>>( &self, predicate: P, ) -> nom::IResult<Self, Self, E> where P: Fn(Self::Item) -> bool, { match self.split_at_position(predicate) { Err(nom::Err::Incomplete(_)) => Ok(self.take_split(self.input_len())), res => res, } } fn split_at_position1_complete<P, E: nom::error::ParseError<Self>>( &self, predicate: P, e: nom::error::ErrorKind, ) -> nom::IResult<Self, Self, E> where P: Fn(Self::Item) -> bool, { let window = Into::<&str>::into(self); match window.position(predicate) { Some(0) => Err(nom::Err::Error(E::from_error_kind(self.clone(), e))), Some(n) => Ok(self.take_split(n)), None => { if window.input_len() == 0 { Err(nom::Err::Error(E::from_error_kind(self.clone(), e))) } else { Ok(self.take_split(self.input_len())) } } } } } pub fn convert_error(err: nom::Err<CustomError<OrgSource<'_>>>) -> nom::Err<CustomError<&str>> { match err { nom::Err::Incomplete(needed) => nom::Err::Incomplete(needed), nom::Err::Error(err) => nom::Err::Error(err.into()), nom::Err::Failure(err) => nom::Err::Failure(err.into()), } } impl<'s> From<CustomError<OrgSource<'s>>> for CustomError<&'s str> { fn from(value: CustomError<OrgSource<'s>>) -> Self { match value { CustomError::MyError(err) => CustomError::MyError(err.into()), CustomError::Nom(input, error_kind) => CustomError::Nom(input.into(), error_kind), } } } impl<'s> From<MyError<OrgSource<'s>>> for MyError<&'s str> { fn from(value: MyError<OrgSource<'s>>) -> Self { MyError(value.0.into()) } } #[cfg(test)] mod tests { use super::*; #[test] fn range() { let input = OrgSource::new("foo bar baz"); let output = input.slice(4..7); assert_eq!(output.to_string(), "bar"); } #[test] fn range_to() { let input = OrgSource::new("foo bar baz"); let output = input.slice(..7); assert_eq!(output.to_string(), "foo bar"); } #[test] fn range_from() { let input = OrgSource::new("foo bar baz"); let output = input.slice(4..); assert_eq!(output.to_string(), "bar baz"); } #[test] fn full_range() { let input = OrgSource::new("foo bar baz"); let output = input.slice(..); assert_eq!(output.to_string(), "foo bar baz"); } #[test] fn nested_range() { let input = OrgSource::new("lorem foo bar baz ipsum"); let first_cut = input.slice(6..17); let output = first_cut.slice(4..7); assert_eq!(first_cut.to_string(), "foo bar baz"); assert_eq!(output.to_string(), "bar"); } #[test] #[should_panic] fn out_of_bounds() { let input = OrgSource::new("lorem foo bar baz ipsum"); input.slice(6..30); } #[test] #[should_panic] fn out_of_nested_bounds() { let input = OrgSource::new("lorem foo bar baz ipsum"); let first_cut = input.slice(6..17); first_cut.slice(4..14); } #[test] fn line_break() { let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum"); assert_eq!(input.slice(5..).start_of_line, 0); assert_eq!(input.slice(6..).start_of_line, 6); assert_eq!(input.slice(6..).slice(10..).start_of_line, 14); } #[test] fn text_since_line_break() { let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum"); assert_eq!(input.text_since_line_break(), ""); assert_eq!(input.slice(5..).text_since_line_break(), "lorem"); assert_eq!(input.slice(6..).text_since_line_break(), ""); assert_eq!(input.slice(6..).slice(10..).text_since_line_break(), "ba"); } #[test] fn preceding_character() { let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum"); assert_eq!(input.get_preceding_character(), None); assert_eq!(input.slice(5..).get_preceding_character(), Some('m')); assert_eq!(input.slice(6..).get_preceding_character(), Some('\n')); assert_eq!( input.slice(6..).slice(10..).get_preceding_character(), Some('a') ); } #[test] fn is_at_start_of_line() { let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum"); assert_eq!(input.is_at_start_of_line(), true); assert_eq!(input.slice(5..).is_at_start_of_line(), false); assert_eq!(input.slice(6..).is_at_start_of_line(), true); assert_eq!(input.slice(6..).slice(10..).is_at_start_of_line(), false); } #[test] fn preceding_character_unicode() { let input = OrgSource::new("๐งก๐๐๐๐"); assert_eq!(input.get_preceding_character(), None); assert_eq!(input.slice(8..).get_preceding_character(), Some('๐')); } #[test] fn line_number() { let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum"); assert_eq!(input.get_line_number(), 1); assert_eq!(input.slice(5..).get_line_number(), 1); assert_eq!(input.slice(6..).get_line_number(), 2); assert_eq!(input.slice(6..).slice(10..).get_line_number(), 4); } }