Add my implementation of a take_until_parser_matches parser.
The author of nom is too busy to review the PR, and cargo does not allow for git dependencies, so I am going to copy my implementation into this code base so I can use upstream nom so I can push to cargo. While this code has been submitted upstream to nom which is under the MIT license, I am the author of this code so I believe I have the full right to also release it in this project under the 0BSD license.
This commit is contained in:
parent
b4dd4cebfd
commit
0302ed216f
@ -24,6 +24,8 @@ path = "src/bin.rs"
|
||||
required-features = ["json-integration"]
|
||||
|
||||
[dependencies]
|
||||
nom = { git = "https://github.com/tomalexander/nom.git", branch = "take_until_parser_matches" }
|
||||
nom = "6.1.0"
|
||||
# The author of nom is too busy to review the PR, and cargo does not allow for git dependencies, so I am going to copy my implementation into this code base so I can use upstream nom so I can push to cargo.
|
||||
# nom = { git = "https://github.com/tomalexander/nom.git", branch = "take_until_parser_matches" }
|
||||
serde = { version = "1.0.106", optional = true }
|
||||
serde_json = { version = "1.0.51", optional = true }
|
||||
|
@ -1,6 +1,7 @@
|
||||
//! This module contains a rust implementation of LinkedIn Dust
|
||||
|
||||
mod parser;
|
||||
mod take_until_parser_matches;
|
||||
|
||||
pub use parser::template;
|
||||
pub use parser::Body;
|
||||
|
@ -1,8 +1,9 @@
|
||||
use super::take_until_parser_matches::take_until_parser_matches;
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::escaped_transform;
|
||||
use nom::bytes::complete::is_a;
|
||||
use nom::bytes::complete::is_not;
|
||||
use nom::bytes::complete::{tag, take_until, take_until_parser_matches};
|
||||
use nom::bytes::complete::{tag, take_until};
|
||||
use nom::character::complete::line_ending;
|
||||
use nom::character::complete::multispace0;
|
||||
use nom::character::complete::one_of;
|
||||
|
96
src/parser/take_until_parser_matches.rs
Normal file
96
src/parser/take_until_parser_matches.rs
Normal file
@ -0,0 +1,96 @@
|
||||
use nom::{
|
||||
error::ErrorKind, error::ParseError, IResult, InputIter, InputLength, InputTake, Parser,
|
||||
};
|
||||
|
||||
/// Returns the shortest input slice till it matches the parser.
|
||||
///
|
||||
/// It doesn't consume the input to the parser. It will return `Err(Err::Error((_, ErrorKind::TakeUntilParserMatches)))`
|
||||
/// if the pattern wasn't met
|
||||
///
|
||||
/// The performance of this parser depends HEAVILY on the inner parser
|
||||
/// failing early. For each step on the input, this will run the inner
|
||||
/// parser against the remaining input, so if the inner parser does
|
||||
/// not fail fast then you will end up re-parsing the remaining input
|
||||
/// repeatedly.
|
||||
///
|
||||
/// If you are looking to match until a string
|
||||
/// (`take_until_parser_matches(tag("foo"))`) it would be faster to
|
||||
/// use `take_until("foo")`.
|
||||
///
|
||||
/// # Simple Example
|
||||
/// ```ignore
|
||||
/// # #[macro_use] extern crate nom;
|
||||
/// # use nom::{Err, error::ErrorKind, IResult};
|
||||
/// use nom::bytes::complete::{take_until_parser_matches, tag};
|
||||
///
|
||||
/// fn until_eof(s: &str) -> IResult<&str, &str> {
|
||||
/// take_until_parser_matches(tag("eof"))(s)
|
||||
/// }
|
||||
///
|
||||
/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
|
||||
/// assert_eq!(until_eof("hello, world"), Err(Err::Error(error_position!("hello, world", ErrorKind::TakeUntilParserMatches))));
|
||||
/// assert_eq!(until_eof(""), Err(Err::Error(error_position!("", ErrorKind::TakeUntilParserMatches))));
|
||||
/// ```
|
||||
///
|
||||
/// # Powerful Example
|
||||
/// To show the power of this parser we will parse a line containing
|
||||
/// a set of flags at the end surrounded by brackets. Example:
|
||||
/// "Submit a PR [inprogress]"
|
||||
/// ```ignore
|
||||
/// # #[macro_use] extern crate nom;
|
||||
/// # use nom::{Err, error::ErrorKind, IResult};
|
||||
/// use nom::bytes::complete::{is_not, take_until_parser_matches, tag};
|
||||
/// use nom::sequence::{delimited, tuple};
|
||||
/// use nom::multi::separated_list1;
|
||||
///
|
||||
/// fn flag(i: &str) -> IResult<&str, &str> {
|
||||
/// delimited(tag("["), is_not("]\r\n"), tag("]"))(i)
|
||||
/// }
|
||||
///
|
||||
/// fn line_ending_with_flags(i: &str) -> IResult<&str, (&str, std::vec::Vec<&str>)> {
|
||||
/// tuple((
|
||||
/// take_until_parser_matches(flag),
|
||||
/// separated_list1(tag(" "), flag),
|
||||
/// ))(i)
|
||||
/// }
|
||||
///
|
||||
/// assert_eq!(line_ending_with_flags("Parsing Seminar [important] [presentation]"), Ok(("", ("Parsing Seminar ", vec!["important", "presentation"]))));
|
||||
/// ```
|
||||
pub fn take_until_parser_matches<F, Input, O, Error>(
|
||||
mut f: F,
|
||||
) -> impl FnMut(Input) -> IResult<Input, Input, Error>
|
||||
where
|
||||
Input: InputTake + InputIter + InputLength + Clone,
|
||||
F: Parser<Input, O, Error>,
|
||||
Error: ParseError<Input>,
|
||||
{
|
||||
move |input: Input| {
|
||||
let i = input.clone();
|
||||
for (ind, _) in i.iter_indices() {
|
||||
let (remaining, _taken) = i.take_split(ind);
|
||||
match f.parse(remaining) {
|
||||
Err(_) => (),
|
||||
Ok(_) => {
|
||||
let res: IResult<Input, Input, Error> = Ok(i.take_split(ind));
|
||||
return res;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Attempt to match one last time past the end of the input. This
|
||||
// allows for 0-length combinators to be used (for example, an eof
|
||||
// combinator).
|
||||
let (remaining, _taken) = i.take_split(i.input_len());
|
||||
match f.parse(remaining) {
|
||||
Err(_) => (),
|
||||
Ok(_) => {
|
||||
let res: IResult<Input, Input, Error> = Ok(i.take_split(i.input_len()));
|
||||
return res;
|
||||
}
|
||||
}
|
||||
Err(nom::Err::Error(Error::from_error_kind(
|
||||
i,
|
||||
// Normally this would be `ErrorKind::TakeUntilParserMatches` but I cannot extend ErrorKind in this project.
|
||||
ErrorKind::TakeUntil,
|
||||
)))
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user