Add my implementation of a take_until_parser_matches parser.

The author of nom is too busy to review the PR, and cargo does not allow for git dependencies, so I am going to copy my implementation into this code base so I can use upstream nom so I can push to cargo. While this code has been submitted upstream to nom which is under the MIT license, I am the author of this code so I believe I have the full right to also release it in this project under the 0BSD license.
This commit is contained in:
Tom Alexander 2021-02-06 16:23:54 -05:00
parent b4dd4cebfd
commit 0302ed216f
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
4 changed files with 102 additions and 2 deletions

View File

@ -24,6 +24,8 @@ path = "src/bin.rs"
required-features = ["json-integration"]
[dependencies]
nom = { git = "https://github.com/tomalexander/nom.git", branch = "take_until_parser_matches" }
nom = "6.1.0"
# The author of nom is too busy to review the PR, and cargo does not allow for git dependencies, so I am going to copy my implementation into this code base so I can use upstream nom so I can push to cargo.
# nom = { git = "https://github.com/tomalexander/nom.git", branch = "take_until_parser_matches" }
serde = { version = "1.0.106", optional = true }
serde_json = { version = "1.0.51", optional = true }

View File

@ -1,6 +1,7 @@
//! This module contains a rust implementation of LinkedIn Dust
mod parser;
mod take_until_parser_matches;
pub use parser::template;
pub use parser::Body;

View File

@ -1,8 +1,9 @@
use super::take_until_parser_matches::take_until_parser_matches;
use nom::branch::alt;
use nom::bytes::complete::escaped_transform;
use nom::bytes::complete::is_a;
use nom::bytes::complete::is_not;
use nom::bytes::complete::{tag, take_until, take_until_parser_matches};
use nom::bytes::complete::{tag, take_until};
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::one_of;

View File

@ -0,0 +1,96 @@
use nom::{
error::ErrorKind, error::ParseError, IResult, InputIter, InputLength, InputTake, Parser,
};
/// Returns the shortest input slice till it matches the parser.
///
/// It doesn't consume the input to the parser. It will return `Err(Err::Error((_, ErrorKind::TakeUntilParserMatches)))`
/// if the pattern wasn't met
///
/// The performance of this parser depends HEAVILY on the inner parser
/// failing early. For each step on the input, this will run the inner
/// parser against the remaining input, so if the inner parser does
/// not fail fast then you will end up re-parsing the remaining input
/// repeatedly.
///
/// If you are looking to match until a string
/// (`take_until_parser_matches(tag("foo"))`) it would be faster to
/// use `take_until("foo")`.
///
/// # Simple Example
/// ```ignore
/// # #[macro_use] extern crate nom;
/// # use nom::{Err, error::ErrorKind, IResult};
/// use nom::bytes::complete::{take_until_parser_matches, tag};
///
/// fn until_eof(s: &str) -> IResult<&str, &str> {
/// take_until_parser_matches(tag("eof"))(s)
/// }
///
/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
/// assert_eq!(until_eof("hello, world"), Err(Err::Error(error_position!("hello, world", ErrorKind::TakeUntilParserMatches))));
/// assert_eq!(until_eof(""), Err(Err::Error(error_position!("", ErrorKind::TakeUntilParserMatches))));
/// ```
///
/// # Powerful Example
/// To show the power of this parser we will parse a line containing
/// a set of flags at the end surrounded by brackets. Example:
/// "Submit a PR [inprogress]"
/// ```ignore
/// # #[macro_use] extern crate nom;
/// # use nom::{Err, error::ErrorKind, IResult};
/// use nom::bytes::complete::{is_not, take_until_parser_matches, tag};
/// use nom::sequence::{delimited, tuple};
/// use nom::multi::separated_list1;
///
/// fn flag(i: &str) -> IResult<&str, &str> {
/// delimited(tag("["), is_not("]\r\n"), tag("]"))(i)
/// }
///
/// fn line_ending_with_flags(i: &str) -> IResult<&str, (&str, std::vec::Vec<&str>)> {
/// tuple((
/// take_until_parser_matches(flag),
/// separated_list1(tag(" "), flag),
/// ))(i)
/// }
///
/// assert_eq!(line_ending_with_flags("Parsing Seminar [important] [presentation]"), Ok(("", ("Parsing Seminar ", vec!["important", "presentation"]))));
/// ```
pub fn take_until_parser_matches<F, Input, O, Error>(
mut f: F,
) -> impl FnMut(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + InputIter + InputLength + Clone,
F: Parser<Input, O, Error>,
Error: ParseError<Input>,
{
move |input: Input| {
let i = input.clone();
for (ind, _) in i.iter_indices() {
let (remaining, _taken) = i.take_split(ind);
match f.parse(remaining) {
Err(_) => (),
Ok(_) => {
let res: IResult<Input, Input, Error> = Ok(i.take_split(ind));
return res;
}
}
}
// Attempt to match one last time past the end of the input. This
// allows for 0-length combinators to be used (for example, an eof
// combinator).
let (remaining, _taken) = i.take_split(i.input_len());
match f.parse(remaining) {
Err(_) => (),
Ok(_) => {
let res: IResult<Input, Input, Error> = Ok(i.take_split(i.input_len()));
return res;
}
}
Err(nom::Err::Error(Error::from_error_kind(
i,
// Normally this would be `ErrorKind::TakeUntilParserMatches` but I cannot extend ErrorKind in this project.
ErrorKind::TakeUntil,
)))
}
}