Compare commits

...

11 Commits

Author SHA1 Message Date
Tom Alexander
129228c5c5
Require either eof or whitespace to line ending for valueless items.
Some checks failed
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
rust-foreign-document-test Build rust-foreign-document-test has failed
2023-09-21 22:06:30 -04:00
Tom Alexander
f0a7493a89
Support blank lines for descriptive list with empty value before final list item. 2023-09-21 22:03:21 -04:00
Tom Alexander
dc5695ec9f
Update description list test to ensure we match blank values properly for both final and non-final items. 2023-09-21 21:47:42 -04:00
Tom Alexander
4ff62fbfae
Support backslash as a post character for text markup. 2023-09-21 21:25:33 -04:00
Tom Alexander
c892d406c3
Do not parse the tag for a plain list item if it is an ordered plain list item. 2023-09-21 20:58:03 -04:00
Tom Alexander
1a41cfc6c7
Support detecting line indentation when checking for contentless plain list items. 2023-09-21 20:08:04 -04:00
Tom Alexander
4f34ab9089
Support subscript/superscript wrapped in parenthesis. 2023-09-21 19:21:47 -04:00
Tom Alexander
9b2348c0ef
Allow matched parenthesis inside plain links. 2023-09-21 18:51:11 -04:00
Tom Alexander
5716cbccea
Remove unnecessary peak. 2023-09-21 16:34:24 -04:00
Tom Alexander
124cd50243
Add more test cases. 2023-09-21 15:36:55 -04:00
Tom Alexander
bac5d6e1d9
Add a test for parenthesis in regular links for good measure.
We are properly handling this currently, but it is good to have more test coverage.
2023-09-21 14:34:51 -04:00
11 changed files with 263 additions and 38 deletions

View File

@ -0,0 +1,6 @@
- foo ::
- bar ::
baz

View File

@ -0,0 +1,3 @@
1. foo
- bar
- lorem :: ipsum

View File

@ -0,0 +1,2 @@
# Since this is an ordered list, the text before the " :: " is NOT parsed as a tag.
1. foo :: bar

View File

@ -1 +1,11 @@
# Should be a link:
https://en.wikipedia.org/wiki/Shebang_(Unix) https://en.wikipedia.org/wiki/Shebang_(Unix)
# No closing parenthesis, so link ends at underscore.
https://en.wikipedia.org/wiki/Shebang_(Unix
# Parenthesis only allowed to depth of 2 so link ends at underscore.
https://en.wikipedia.org/wiki/Shebang_(((Unix)))
# Even though they eventually become balanced, we hit negative parenthesis depth so link ends at )
https://en.wikipedia.org/wiki/Shebang)Unix(

View File

@ -0,0 +1 @@
[[https://en.wikipedia.org/wiki/Shebang_(Unix)]]

View File

@ -0,0 +1,13 @@
foo_(bar)
foo_(b(ar)
foo_(b{ar)
foo_{b(ar}
foo_(b(a)r)
foo_b(a)r
foo_(b+ar)

View File

@ -732,6 +732,10 @@ fn compare_plain_list<'s>(
Ok(_) => {} Ok(_) => {}
}; };
// TODO compare :type
//
// :type is an unquoted atom of either descriptive, ordered, or unordered
for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) {
child_status.push(compare_plain_list_item(source, emacs_child, rust_child)?); child_status.push(compare_plain_list_item(source, emacs_child, rust_child)?);
} }

View File

@ -5,17 +5,24 @@ use nom::character::complete::anychar;
use nom::character::complete::none_of; use nom::character::complete::none_of;
use nom::character::complete::one_of; use nom::character::complete::one_of;
use nom::combinator::eof; use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::peek; use nom::combinator::peek;
use nom::combinator::recognize; use nom::combinator::recognize;
use nom::combinator::verify; use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till; use nom::multi::many_till;
use nom::sequence::tuple;
use super::org_source::BracketDepth;
use super::org_source::OrgSource; use super::org_source::OrgSource;
use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting;
use crate::context::parser_with_context; use crate::context::parser_with_context;
use crate::context::ContextElement; use crate::context::ContextElement;
use crate::context::ContextMatcher;
use crate::context::ExitClass; use crate::context::ExitClass;
use crate::context::ExitMatcherNode; use crate::context::ExitMatcherNode;
use crate::context::Matcher;
use crate::context::RefContext; use crate::context::RefContext;
use crate::error::CustomError; use crate::error::CustomError;
use crate::error::MyError; use crate::error::MyError;
@ -130,17 +137,77 @@ fn path_plain<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>, context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> { ) -> Res<OrgSource<'s>, OrgSource<'s>> {
// TODO: "optionally containing parenthesis-wrapped non-whitespace non-bracket substrings up to a depth of two. The string must end with either a non-punctation non-whitespace character, a forwards slash, or a parenthesis-wrapped substring" let path_plain_end = path_plain_end(input.get_parenthesis_depth());
let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Gamma, class: ExitClass::Gamma,
exit_matcher: &path_plain_end, exit_matcher: &path_plain_end,
}); });
let parser_context = context.with_additional_node(&parser_context); let parser_context = context.with_additional_node(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let (remaining, _components) = many1(alt((
parser_with_context!(path_plain_no_parenthesis)(&parser_context),
parser_with_context!(path_plain_parenthesis)(&parser_context),
)))(input)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
fn path_plain_end(starting_parenthesis_depth: BracketDepth) -> impl ContextMatcher {
move |context, input: OrgSource<'_>| _path_plain_end(context, input, starting_parenthesis_depth)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _path_plain_end<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
starting_parenthesis_depth: BracketDepth,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let (remaining, _leading_punctuation) = many0(verify(anychar, |c| {
!" \t\r\n[]<>()/".contains(*c) && c.is_ascii_punctuation()
}))(input)?;
let disallowed_character = recognize(one_of(" \t\r\n[]<>"))(remaining);
if disallowed_character.is_ok() {
return disallowed_character;
}
let current_depth = remaining.get_parenthesis_depth() - starting_parenthesis_depth;
if current_depth == 0 {
let close_parenthesis =
tag::<&str, OrgSource<'_>, CustomError<OrgSource<'_>>>(")")(remaining);
if close_parenthesis.is_ok() {
return close_parenthesis;
}
let open_parenthesis_without_match = recognize(tuple((
peek(tag("(")),
not(parser_with_context!(path_plain_parenthesis)(context)),
)))(remaining);
if open_parenthesis_without_match.is_ok() {
return open_parenthesis_without_match;
}
}
// many0 punctuation
Err(nom::Err::Error(CustomError::MyError(MyError(
"No path plain end".into(),
))))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn path_plain_no_parenthesis<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let (remaining, path) = recognize(verify( let (remaining, path) = recognize(verify(
many_till(anychar, peek(exit_matcher)), many_till(
anychar,
alt((
peek(path_plain_no_parenthesis_disallowed_character),
parser_with_context!(exit_matcher_parser)(context),
)),
),
|(children, _exit_contents)| !children.is_empty(), |(children, _exit_contents)| !children.is_empty(),
))(input)?; ))(input)?;
@ -148,14 +215,65 @@ fn path_plain<'b, 'g, 'r, 's>(
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn path_plain_end<'b, 'g, 'r, 's>( fn path_plain_no_parenthesis_disallowed_character<'s>(
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> { ) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(many_till( recognize(verify(anychar, |c| {
verify(anychar, |c| { c.is_whitespace() || "()[]<>".contains(*c)
*c != '/' && (c.is_ascii_punctuation() || c.is_whitespace()) }))(input)
}), }
one_of(" \t\r\n()[]<>"),
))(input) #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn path_plain_parenthesis<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let (remaining, _opening) = tag("(")(input)?;
let starting_depth = remaining.get_parenthesis_depth();
let (remaining, _path) = recognize(verify(
many_till(
anychar,
alt((
peek(path_plain_parenthesis_end(starting_depth)),
parser_with_context!(exit_matcher_parser)(context),
)),
),
|(children, _exit_contents)| !children.is_empty(),
))(remaining)?;
let (remaining, _opening) = tag(")")(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
fn path_plain_parenthesis_end(starting_parenthesis_depth: BracketDepth) -> impl Matcher {
move |input: OrgSource<'_>| _path_plain_parenthesis_end(input, starting_parenthesis_depth)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _path_plain_parenthesis_end<'s>(
input: OrgSource<'s>,
starting_parenthesis_depth: BracketDepth,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let current_depth = input.get_parenthesis_depth() - starting_parenthesis_depth;
if current_depth < 0 {
// This shouldn't be possible because if depth is 0 then a closing parenthesis should end the link.
unreachable!("Exceeded plain link parenthesis depth.")
}
if current_depth == 0 {
let close_parenthesis = tag::<&str, OrgSource<'_>, CustomError<OrgSource<'_>>>(")")(input);
if close_parenthesis.is_ok() {
return close_parenthesis;
}
}
if current_depth == 1 {
let open_parenthesis = tag::<&str, OrgSource<'_>, CustomError<OrgSource<'_>>>("(")(input);
if open_parenthesis.is_ok() {
return open_parenthesis;
}
}
Err(nom::Err::Error(CustomError::MyError(MyError(
"No closing parenthesis".into(),
))))
} }

View File

@ -57,7 +57,7 @@ pub(crate) fn detect_plain_list<'b, 'g, 'r, 's>(
parser_with_context!(bullet)(context), parser_with_context!(bullet)(context),
alt((space1, line_ending, eof)), alt((space1, line_ending, eof)),
)), )),
|(_start, indent, bull, _after_whitespace)| { |(_start, indent, (_bullet_type, bull), _after_whitespace)| {
Into::<&str>::into(bull) != "*" || indent.len() > 0 Into::<&str>::into(bull) != "*" || indent.len() > 0
}, },
)(input) )(input)
@ -151,9 +151,9 @@ fn plain_list_item<'b, 'g, 'r, 's>(
) -> Res<OrgSource<'s>, PlainListItem<'s>> { ) -> Res<OrgSource<'s>, PlainListItem<'s>> {
start_of_line(input)?; start_of_line(input)?;
let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?; let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?;
let (remaining, bull) = verify( let (remaining, (bullet_type, bull)) = verify(
parser_with_context!(bullet)(context), parser_with_context!(bullet)(context),
|bull: &OrgSource<'_>| Into::<&str>::into(bull) != "*" || indent_level > 0, |(_bullet_type, bull)| Into::<&str>::into(bull) != "*" || indent_level > 0,
)(remaining)?; )(remaining)?;
let (remaining, _maybe_counter_set) = opt(tuple(( let (remaining, _maybe_counter_set) = opt(tuple((
@ -165,15 +165,33 @@ fn plain_list_item<'b, 'g, 'r, 's>(
let (remaining, maybe_checkbox) = opt(tuple((space1, item_checkbox)))(remaining)?; let (remaining, maybe_checkbox) = opt(tuple((space1, item_checkbox)))(remaining)?;
let (remaining, maybe_tag) = let (remaining, maybe_tag) = if let BulletType::Unordered = bullet_type {
opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)?; opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)?
} else {
(remaining, None)
};
let exit_matcher = plain_list_item_end(indent_level);
let contexts = [
ContextElement::ConsumeTrailingWhitespace(true),
ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &exit_matcher,
}),
];
let parser_context = context.with_additional_node(&contexts[0]);
let parser_context = parser_context.with_additional_node(&contexts[1]);
let maybe_contentless_item: Res<OrgSource<'_>, ()> = peek(parser_with_context!( let maybe_contentless_item: Res<OrgSource<'_>, ()> = peek(parser_with_context!(
detect_contentless_item_contents detect_contentless_item_contents
)(context))(remaining); )(&parser_context))(remaining);
match maybe_contentless_item { match maybe_contentless_item {
Ok((_rem, _ws)) => { Ok((_rem, _ws)) => {
let (remaining, _trailing_ws) = opt(blank_line)(remaining)?; let (remaining, _trailing_ws) = if context.should_consume_trailing_whitespace() {
recognize(alt((recognize(many1(blank_line)), eof)))(remaining)?
} else {
recognize(alt((blank_line, eof)))(remaining)?
};
let source = get_consumed(input, remaining); let source = get_consumed(input, remaining);
return Ok(( return Ok((
remaining, remaining,
@ -191,17 +209,7 @@ fn plain_list_item<'b, 'g, 'r, 's>(
} }
Err(_) => {} Err(_) => {}
}; };
let (remaining, _ws) = item_tag_post_gap(context, remaining)?; let (remaining, _ws) = item_tag_post_gap(&parser_context, remaining)?;
let exit_matcher = plain_list_item_end(indent_level);
let contexts = [
ContextElement::ConsumeTrailingWhitespace(true),
ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &exit_matcher,
}),
];
let parser_context = context.with_additional_node(&contexts[0]);
let parser_context = parser_context.with_additional_node(&contexts[1]);
let (mut remaining, (mut children, _exit_contents)) = many_till( let (mut remaining, (mut children, _exit_contents)) = many_till(
include_input(parser_with_context!(element(true))(&parser_context)), include_input(parser_with_context!(element(true))(&parser_context)),
@ -241,19 +249,28 @@ fn plain_list_item<'b, 'g, 'r, 's>(
)); ));
} }
#[derive(Debug)]
enum BulletType {
Ordered,
Unordered,
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn bullet<'b, 'g, 'r, 's>( fn bullet<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>, context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> { ) -> Res<OrgSource<'s>, (BulletType, OrgSource<'s>)> {
alt(( alt((
tag("*"), map(tag("*"), |bull| (BulletType::Unordered, bull)),
tag("-"), map(tag("-"), |bull| (BulletType::Unordered, bull)),
tag("+"), map(tag("+"), |bull| (BulletType::Unordered, bull)),
map(
recognize(tuple(( recognize(tuple((
parser_with_context!(counter)(context), parser_with_context!(counter)(context),
alt((tag("."), tag(")"))), alt((tag("."), tag(")"))),
))), ))),
|bull| (BulletType::Ordered, bull),
),
))(input) ))(input)
} }

View File

@ -23,6 +23,7 @@ use crate::context::ContextElement;
use crate::context::ContextMatcher; use crate::context::ContextMatcher;
use crate::context::ExitClass; use crate::context::ExitClass;
use crate::context::ExitMatcherNode; use crate::context::ExitMatcherNode;
use crate::context::Matcher;
use crate::context::RefContext; use crate::context::RefContext;
use crate::error::CustomError; use crate::error::CustomError;
use crate::error::MyError; use crate::error::MyError;
@ -112,6 +113,10 @@ fn script_body<'b, 'g, 'r, 's>(
map(parser_with_context!(script_with_braces)(context), |body| { map(parser_with_context!(script_with_braces)(context), |body| {
ScriptBody::WithBraces(body.into()) ScriptBody::WithBraces(body.into())
}), }),
map(
parser_with_context!(script_with_parenthesis)(context),
|body| ScriptBody::Braceless(body.into()),
),
))(input) ))(input)
} }
@ -199,3 +204,49 @@ fn _script_with_braces_end<'b, 'g, 'r, 's>(
} }
tag("}")(input) tag("}")(input)
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn script_with_parenthesis<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let (remaining, _) = tag("(")(input)?;
let exit_with_depth = script_with_parenthesis_end(remaining.get_parenthesis_depth());
let (remaining, _) = many_till(
anychar,
alt((
peek(exit_with_depth),
parser_with_context!(exit_matcher_parser)(context),
)),
)(remaining)?;
let (remaining, _) = tag(")")(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
fn script_with_parenthesis_end(starting_parenthesis_depth: BracketDepth) -> impl Matcher {
move |input: OrgSource<'_>| _script_with_parenthesis_end(input, starting_parenthesis_depth)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _script_with_parenthesis_end<'s>(
input: OrgSource<'s>,
starting_parenthesis_depth: BracketDepth,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let current_depth = input.get_parenthesis_depth() - starting_parenthesis_depth;
if current_depth < 0 {
// This shouldn't be possible because if depth is 0 then a closing bracket should end the citation.
unreachable!("Exceeded citation key suffix bracket depth.")
}
if current_depth == 0 {
let close_parenthesis = tag::<&str, OrgSource<'_>, CustomError<OrgSource<'_>>>(")")(input);
if close_parenthesis.is_ok() {
return close_parenthesis;
}
}
Err(nom::Err::Error(CustomError::MyError(MyError(
"No script parenthesis end.".into(),
))))
}

View File

@ -312,7 +312,7 @@ fn post<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>, _context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> { ) -> Res<OrgSource<'s>, ()> {
let (remaining, _) = alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"")), line_ending))(input)?; let (remaining, _) = alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"\\")), line_ending))(input)?;
Ok((remaining, ())) Ok((remaining, ()))
} }