Add my implementation of a take_until_parser_matches parser.

The author of nom is too busy to review the PR, and cargo does not allow for git dependencies, so I am going to copy my implementation into this code base so I can use upstream nom so I can push to cargo. While this code has been submitted upstream to nom which is under the MIT license, I am the author of this code so I believe I have the full right to also release it in this project under the 0BSD license.
2021-02-06 16:23:54 -05:00 · 2021-02-06 16:23:54 -05:00 · 0302ed216f
commit 0302ed216f
parent b4dd4cebfd
4 changed files with 102 additions and 2 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -24,6 +24,8 @@ path = "src/bin.rs"
 required-features = ["json-integration"]

 [dependencies]
-nom = { git = "https://github.com/tomalexander/nom.git", branch = "take_until_parser_matches" }
+nom = "6.1.0"
+# The author of nom is too busy to review the PR, and cargo does not allow for git dependencies, so I am going to copy my implementation into this code base so I can use upstream nom so I can push to cargo.
+# nom = { git = "https://github.com/tomalexander/nom.git", branch = "take_until_parser_matches" }
 serde = { version = "1.0.106", optional = true }
 serde_json = { version = "1.0.51", optional = true }
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@ -1,6 +1,7 @@
 //! This module contains a rust implementation of LinkedIn Dust

 mod parser;
+mod take_until_parser_matches;

 pub use parser::template;
 pub use parser::Body;
--- a/src/parser/parser.rs
+++ b/src/parser/parser.rs
@ -1,8 +1,9 @@
+use super::take_until_parser_matches::take_until_parser_matches;
 use nom::branch::alt;
 use nom::bytes::complete::escaped_transform;
 use nom::bytes::complete::is_a;
 use nom::bytes::complete::is_not;
-use nom::bytes::complete::{tag, take_until, take_until_parser_matches};
+use nom::bytes::complete::{tag, take_until};
 use nom::character::complete::line_ending;
 use nom::character::complete::multispace0;
 use nom::character::complete::one_of;
--- a/src/parser/take_until_parser_matches.rs
+++ b/src/parser/take_until_parser_matches.rs
@ -0,0 +1,96 @@
+use nom::{
+    error::ErrorKind, error::ParseError, IResult, InputIter, InputLength, InputTake, Parser,
+};
+
+/// Returns the shortest input slice till it matches the parser.
+///
+/// It doesn't consume the input to the parser. It will return `Err(Err::Error((_, ErrorKind::TakeUntilParserMatches)))`
+/// if the pattern wasn't met
+///
+/// The performance of this parser depends HEAVILY on the inner parser
+/// failing early. For each step on the input, this will run the inner
+/// parser against the remaining input, so if the inner parser does
+/// not fail fast then you will end up re-parsing the remaining input
+/// repeatedly.
+///
+/// If you are looking to match until a string
+/// (`take_until_parser_matches(tag("foo"))`) it would be faster to
+/// use `take_until("foo")`.
+///
+/// # Simple Example
+/// ```ignore
+/// # #[macro_use] extern crate nom;
+/// # use nom::{Err, error::ErrorKind, IResult};
+/// use nom::bytes::complete::{take_until_parser_matches, tag};
+///
+/// fn until_eof(s: &str) -> IResult<&str, &str> {
+///   take_until_parser_matches(tag("eof"))(s)
+/// }
+///
+/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world")));
+/// assert_eq!(until_eof("hello, world"), Err(Err::Error(error_position!("hello, world", ErrorKind::TakeUntilParserMatches))));
+/// assert_eq!(until_eof(""), Err(Err::Error(error_position!("", ErrorKind::TakeUntilParserMatches))));
+/// ```
+///
+/// # Powerful Example
+/// To show the power of this parser we will parse a line containing
+/// a set of flags at the end surrounded by brackets. Example:
+/// "Submit a PR [inprogress]"
+/// ```ignore
+/// # #[macro_use] extern crate nom;
+/// # use nom::{Err, error::ErrorKind, IResult};
+/// use nom::bytes::complete::{is_not, take_until_parser_matches, tag};
+/// use nom::sequence::{delimited, tuple};
+/// use nom::multi::separated_list1;
+///
+/// fn flag(i: &str) -> IResult<&str, &str> {
+///   delimited(tag("["), is_not("]\r\n"), tag("]"))(i)
+/// }
+///
+/// fn line_ending_with_flags(i: &str) -> IResult<&str, (&str, std::vec::Vec<&str>)> {
+///   tuple((
+///     take_until_parser_matches(flag),
+///     separated_list1(tag(" "), flag),
+///   ))(i)
+/// }
+///
+/// assert_eq!(line_ending_with_flags("Parsing Seminar [important] [presentation]"), Ok(("", ("Parsing Seminar ", vec!["important", "presentation"]))));
+/// ```
+pub fn take_until_parser_matches<F, Input, O, Error>(
+    mut f: F,
+) -> impl FnMut(Input) -> IResult<Input, Input, Error>
+where
+    Input: InputTake + InputIter + InputLength + Clone,
+    F: Parser<Input, O, Error>,
+    Error: ParseError<Input>,
+{
+    move |input: Input| {
+        let i = input.clone();
+        for (ind, _) in i.iter_indices() {
+            let (remaining, _taken) = i.take_split(ind);
+            match f.parse(remaining) {
+                Err(_) => (),
+                Ok(_) => {
+                    let res: IResult<Input, Input, Error> = Ok(i.take_split(ind));
+                    return res;
+                }
+            }
+        }
+        // Attempt to match one last time past the end of the input. This
+        // allows for 0-length combinators to be used (for example, an eof
+        // combinator).
+        let (remaining, _taken) = i.take_split(i.input_len());
+        match f.parse(remaining) {
+            Err(_) => (),
+            Ok(_) => {
+                let res: IResult<Input, Input, Error> = Ok(i.take_split(i.input_len()));
+                return res;
+            }
+        }
+        Err(nom::Err::Error(Error::from_error_kind(
+            i,
+            // Normally this would be `ErrorKind::TakeUntilParserMatches` but I cannot extend ErrorKind in this project.
+            ErrorKind::TakeUntil,
+        )))
+    }
+}