From 0302ed216f166b32f2891602ecb2c853abb28e73 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 6 Feb 2021 16:23:54 -0500 Subject: [PATCH] Add my implementation of a take_until_parser_matches parser. The author of nom is too busy to review the PR, and cargo does not allow for git dependencies, so I am going to copy my implementation into this code base so I can use upstream nom so I can push to cargo. While this code has been submitted upstream to nom which is under the MIT license, I am the author of this code so I believe I have the full right to also release it in this project under the 0BSD license. --- Cargo.toml | 4 +- src/parser/mod.rs | 1 + src/parser/parser.rs | 3 +- src/parser/take_until_parser_matches.rs | 96 +++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 src/parser/take_until_parser_matches.rs diff --git a/Cargo.toml b/Cargo.toml index e47393b..81e5f3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,8 @@ path = "src/bin.rs" required-features = ["json-integration"] [dependencies] -nom = { git = "https://github.com/tomalexander/nom.git", branch = "take_until_parser_matches" } +nom = "6.1.0" +# The author of nom is too busy to review the PR, and cargo does not allow for git dependencies, so I am going to copy my implementation into this code base so I can use upstream nom so I can push to cargo. +# nom = { git = "https://github.com/tomalexander/nom.git", branch = "take_until_parser_matches" } serde = { version = "1.0.106", optional = true } serde_json = { version = "1.0.51", optional = true } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6efae74..c4ba2ad 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,6 +1,7 @@ //! This module contains a rust implementation of LinkedIn Dust mod parser; +mod take_until_parser_matches; pub use parser::template; pub use parser::Body; diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 880f722..d807008 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1,8 +1,9 @@ +use super::take_until_parser_matches::take_until_parser_matches; use nom::branch::alt; use nom::bytes::complete::escaped_transform; use nom::bytes::complete::is_a; use nom::bytes::complete::is_not; -use nom::bytes::complete::{tag, take_until, take_until_parser_matches}; +use nom::bytes::complete::{tag, take_until}; use nom::character::complete::line_ending; use nom::character::complete::multispace0; use nom::character::complete::one_of; diff --git a/src/parser/take_until_parser_matches.rs b/src/parser/take_until_parser_matches.rs new file mode 100644 index 0000000..200b395 --- /dev/null +++ b/src/parser/take_until_parser_matches.rs @@ -0,0 +1,96 @@ +use nom::{ + error::ErrorKind, error::ParseError, IResult, InputIter, InputLength, InputTake, Parser, +}; + +/// Returns the shortest input slice till it matches the parser. +/// +/// It doesn't consume the input to the parser. It will return `Err(Err::Error((_, ErrorKind::TakeUntilParserMatches)))` +/// if the pattern wasn't met +/// +/// The performance of this parser depends HEAVILY on the inner parser +/// failing early. For each step on the input, this will run the inner +/// parser against the remaining input, so if the inner parser does +/// not fail fast then you will end up re-parsing the remaining input +/// repeatedly. +/// +/// If you are looking to match until a string +/// (`take_until_parser_matches(tag("foo"))`) it would be faster to +/// use `take_until("foo")`. +/// +/// # Simple Example +/// ```ignore +/// # #[macro_use] extern crate nom; +/// # use nom::{Err, error::ErrorKind, IResult}; +/// use nom::bytes::complete::{take_until_parser_matches, tag}; +/// +/// fn until_eof(s: &str) -> IResult<&str, &str> { +/// take_until_parser_matches(tag("eof"))(s) +/// } +/// +/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); +/// assert_eq!(until_eof("hello, world"), Err(Err::Error(error_position!("hello, world", ErrorKind::TakeUntilParserMatches)))); +/// assert_eq!(until_eof(""), Err(Err::Error(error_position!("", ErrorKind::TakeUntilParserMatches)))); +/// ``` +/// +/// # Powerful Example +/// To show the power of this parser we will parse a line containing +/// a set of flags at the end surrounded by brackets. Example: +/// "Submit a PR [inprogress]" +/// ```ignore +/// # #[macro_use] extern crate nom; +/// # use nom::{Err, error::ErrorKind, IResult}; +/// use nom::bytes::complete::{is_not, take_until_parser_matches, tag}; +/// use nom::sequence::{delimited, tuple}; +/// use nom::multi::separated_list1; +/// +/// fn flag(i: &str) -> IResult<&str, &str> { +/// delimited(tag("["), is_not("]\r\n"), tag("]"))(i) +/// } +/// +/// fn line_ending_with_flags(i: &str) -> IResult<&str, (&str, std::vec::Vec<&str>)> { +/// tuple(( +/// take_until_parser_matches(flag), +/// separated_list1(tag(" "), flag), +/// ))(i) +/// } +/// +/// assert_eq!(line_ending_with_flags("Parsing Seminar [important] [presentation]"), Ok(("", ("Parsing Seminar ", vec!["important", "presentation"])))); +/// ``` +pub fn take_until_parser_matches( + mut f: F, +) -> impl FnMut(Input) -> IResult +where + Input: InputTake + InputIter + InputLength + Clone, + F: Parser, + Error: ParseError, +{ + move |input: Input| { + let i = input.clone(); + for (ind, _) in i.iter_indices() { + let (remaining, _taken) = i.take_split(ind); + match f.parse(remaining) { + Err(_) => (), + Ok(_) => { + let res: IResult = Ok(i.take_split(ind)); + return res; + } + } + } + // Attempt to match one last time past the end of the input. This + // allows for 0-length combinators to be used (for example, an eof + // combinator). + let (remaining, _taken) = i.take_split(i.input_len()); + match f.parse(remaining) { + Err(_) => (), + Ok(_) => { + let res: IResult = Ok(i.take_split(i.input_len())); + return res; + } + } + Err(nom::Err::Error(Error::from_error_kind( + i, + // Normally this would be `ErrorKind::TakeUntilParserMatches` but I cannot extend ErrorKind in this project. + ErrorKind::TakeUntil, + ))) + } +}