Merge branch 'accuracy_fixes_from_feeding_large_documents'
This commit is contained in:
commit
f4ea1b7303
1
build.rs
1
build.rs
@ -75,7 +75,6 @@ fn is_expect_fail(name: &str) -> Option<&str> {
|
||||
"autogen_greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
|
||||
"autogen_element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
|
||||
"autogen_lesser_element_paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
|
||||
"autogen_sections_and_headings_empty_section" => Some("We are not yet handling empty sections properly."),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
3
elisp_snippets/dump_org_element_affiliated_keywords.el
Normal file
3
elisp_snippets/dump_org_element_affiliated_keywords.el
Normal file
@ -0,0 +1,3 @@
|
||||
(dolist (var org-element-affiliated-keywords)
|
||||
(message "\"%s\"," (downcase var))
|
||||
)
|
@ -0,0 +1,15 @@
|
||||
#+name: foo
|
||||
#+caption: bar
|
||||
#+caption: baz
|
||||
|
||||
[[file:lorem/ipsum.png]]
|
||||
|
||||
#+name: cat
|
||||
#+foo: dog
|
||||
[[file:lorem/ipsum.png]]
|
||||
|
||||
#+name: cat
|
||||
#+foo: dog
|
||||
|
||||
|
||||
foo
|
52
org_mode_samples/object/plain_link/empty_links.org
Normal file
52
org_mode_samples/object/plain_link/empty_links.org
Normal file
@ -0,0 +1,52 @@
|
||||
non-link text
|
||||
eww://
|
||||
rmail://
|
||||
mhe://
|
||||
irc://
|
||||
info://
|
||||
gnus://
|
||||
docview://
|
||||
bibtex://
|
||||
bbdb://
|
||||
w3m://
|
||||
doi://
|
||||
file+sys://
|
||||
file+emacs://
|
||||
shell://
|
||||
news://
|
||||
mailto://
|
||||
https://
|
||||
http://
|
||||
ftp://
|
||||
help://
|
||||
file://
|
||||
elisp://
|
||||
randomfakeprotocl://
|
||||
non-link text
|
||||
|
||||
|
||||
non-link text
|
||||
eww:
|
||||
rmail:
|
||||
mhe:
|
||||
irc:
|
||||
info:
|
||||
gnus:
|
||||
docview:
|
||||
bibtex:
|
||||
bbdb:
|
||||
w3m:
|
||||
doi:
|
||||
file+sys:
|
||||
file+emacs:
|
||||
shell:
|
||||
news:
|
||||
mailto:
|
||||
https:
|
||||
http:
|
||||
ftp:
|
||||
help:
|
||||
file:
|
||||
elisp:
|
||||
randomfakeprotocl:
|
||||
non-link text
|
@ -0,0 +1,3 @@
|
||||
mailto:foo@bar.baz.
|
||||
|
||||
mailto:foo@bar.baz....
|
@ -120,7 +120,7 @@ impl<'s> DiffResult<'s> {
|
||||
indentation = " ".repeat(indentation),
|
||||
status_text = status_text,
|
||||
name = self.name,
|
||||
char_offset = rust_offset,
|
||||
char_offset = rust_offset + 1,
|
||||
message = self.message.as_ref().map(|m| m.as_str()).unwrap_or("")
|
||||
);
|
||||
for child in self.children.iter() {
|
||||
|
@ -283,10 +283,21 @@ fn _heading<'r, 's>(
|
||||
headline(context, input, parent_stars)?;
|
||||
let section_matcher = parser_with_context!(section)(context);
|
||||
let heading_matcher = parser_with_context!(heading(star_count))(context);
|
||||
let (remaining, children) = many0(alt((
|
||||
map(heading_matcher, DocumentElement::Heading),
|
||||
map(section_matcher, DocumentElement::Section),
|
||||
)))(remaining)?;
|
||||
let (remaining, maybe_section) =
|
||||
opt(map(section_matcher, DocumentElement::Section))(remaining)?;
|
||||
let (remaining, mut children) =
|
||||
many0(map(heading_matcher, DocumentElement::Heading))(remaining)?;
|
||||
if let Some(section) = maybe_section {
|
||||
children.insert(0, section);
|
||||
}
|
||||
let remaining = if children.is_empty() {
|
||||
// Support empty headings
|
||||
let (remain, _ws) = many0(blank_line)(remaining)?;
|
||||
remain
|
||||
} else {
|
||||
remaining
|
||||
};
|
||||
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((
|
||||
remaining,
|
||||
|
@ -12,6 +12,7 @@ use super::fixed_width_area::fixed_width_area;
|
||||
use super::footnote_definition::footnote_definition;
|
||||
use super::greater_block::greater_block;
|
||||
use super::horizontal_rule::horizontal_rule;
|
||||
use super::keyword::affiliated_keyword;
|
||||
use super::keyword::keyword;
|
||||
use super::latex_environment::latex_environment;
|
||||
use super::lesser_block::comment_block;
|
||||
@ -62,10 +63,12 @@ fn _element<'r, 's>(
|
||||
let fixed_width_area_matcher = parser_with_context!(fixed_width_area)(context);
|
||||
let horizontal_rule_matcher = parser_with_context!(horizontal_rule)(context);
|
||||
let keyword_matcher = parser_with_context!(keyword)(context);
|
||||
let affiliated_keyword_matcher = parser_with_context!(affiliated_keyword)(context);
|
||||
let paragraph_matcher = parser_with_context!(paragraph)(context);
|
||||
let latex_environment_matcher = parser_with_context!(latex_environment)(context);
|
||||
|
||||
let (remaining, mut affiliated_keywords) = many0(keyword_matcher)(input)?;
|
||||
// TODO: Affiliated keywords cannot be on comments, clocks, headings, inlinetasks, items, node properties, planning, property drawers, sections, and table rows
|
||||
let (remaining, mut affiliated_keywords) = many0(affiliated_keyword_matcher)(input)?;
|
||||
let (remaining, mut element) = match alt((
|
||||
map(plain_list_matcher, Element::PlainList),
|
||||
map(greater_block_matcher, Element::GreaterBlock),
|
||||
@ -84,6 +87,7 @@ fn _element<'r, 's>(
|
||||
map(fixed_width_area_matcher, Element::FixedWidthArea),
|
||||
map(horizontal_rule_matcher, Element::HorizontalRule),
|
||||
map(latex_environment_matcher, Element::LatexEnvironment),
|
||||
map(keyword_matcher, Element::Keyword),
|
||||
))(remaining)
|
||||
{
|
||||
the_ok @ Ok(_) => the_ok,
|
||||
@ -93,12 +97,12 @@ fn _element<'r, 's>(
|
||||
the_ok @ Ok(_) => the_ok,
|
||||
Err(_) => {
|
||||
affiliated_keywords.clear();
|
||||
map(keyword_matcher, Element::Keyword)(input)
|
||||
map(affiliated_keyword_matcher, Element::Keyword)(input)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
affiliated_keywords.clear();
|
||||
map(keyword_matcher, Element::Keyword)(input)
|
||||
map(affiliated_keyword_matcher, Element::Keyword)(input)
|
||||
}
|
||||
}
|
||||
}?;
|
||||
|
@ -15,7 +15,8 @@ use crate::error::Res;
|
||||
use crate::parser::object::Entity;
|
||||
use crate::parser::util::get_consumed;
|
||||
|
||||
const ENTITIES: [&'static str; 413] = [
|
||||
// TODO: Make this a user-provided variable corresponding to elisp's org-entities
|
||||
const ORG_ENTITIES: [&'static str; 413] = [
|
||||
"Agrave",
|
||||
"agrave",
|
||||
"Aacute",
|
||||
@ -457,8 +458,7 @@ fn name<'r, 's>(
|
||||
input: OrgSource<'s>,
|
||||
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||
// TODO: This should be defined by org-entities and optionally org-entities-user
|
||||
for entity in ENTITIES {
|
||||
// foo
|
||||
for entity in ORG_ENTITIES {
|
||||
let result = tag_no_case::<_, _, CustomError<_>>(entity)(input);
|
||||
match result {
|
||||
Ok((remaining, ent)) => {
|
||||
|
@ -2,6 +2,8 @@ use nom::branch::alt;
|
||||
use nom::bytes::complete::is_not;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::bytes::complete::tag_no_case;
|
||||
use nom::bytes::complete::take_while1;
|
||||
use nom::character::complete::anychar;
|
||||
use nom::character::complete::line_ending;
|
||||
use nom::character::complete::space0;
|
||||
use nom::character::complete::space1;
|
||||
@ -9,14 +11,24 @@ use nom::combinator::eof;
|
||||
use nom::combinator::not;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::recognize;
|
||||
use nom::multi::many_till;
|
||||
use nom::sequence::tuple;
|
||||
|
||||
use super::org_source::BracketDepth;
|
||||
use super::org_source::OrgSource;
|
||||
use super::Context;
|
||||
use crate::error::CustomError;
|
||||
use crate::error::MyError;
|
||||
use crate::error::Res;
|
||||
use crate::parser::util::start_of_line;
|
||||
use crate::parser::Keyword;
|
||||
|
||||
const ORG_ELEMENT_AFFILIATED_KEYWORDS: [&'static str; 13] = [
|
||||
"caption", "data", "header", "headers", "label", "name", "plot", "resname", "result",
|
||||
"results", "source", "srcname", "tblname",
|
||||
];
|
||||
const ORG_ELEMENT_DUAL_KEYWORDS: [&'static str; 2] = ["caption", "results"];
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
pub fn keyword<'r, 's>(
|
||||
_context: Context<'r, 's>,
|
||||
@ -41,3 +53,112 @@ pub fn keyword<'r, 's>(
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
pub fn affiliated_keyword<'r, 's>(
|
||||
_context: Context<'r, 's>,
|
||||
input: OrgSource<'s>,
|
||||
) -> Res<OrgSource<'s>, Keyword<'s>> {
|
||||
start_of_line(input)?;
|
||||
|
||||
// TODO: When key is a member of org-element-parsed-keywords, value can contain the standard set objects, excluding footnote references.
|
||||
let (remaining, rule) = recognize(tuple((
|
||||
space0,
|
||||
tag("#+"),
|
||||
affiliated_key,
|
||||
tag(":"),
|
||||
alt((recognize(tuple((space1, is_not("\r\n")))), space0)),
|
||||
alt((line_ending, eof)),
|
||||
)))(input)?;
|
||||
Ok((
|
||||
remaining,
|
||||
Keyword {
|
||||
source: rule.into(),
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||
alt((
|
||||
recognize(tuple((dual_affiliated_key, tag("["), optval, tag("]")))),
|
||||
plain_affiliated_key,
|
||||
export_keyword,
|
||||
))(input)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn plain_affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||
for keyword in ORG_ELEMENT_AFFILIATED_KEYWORDS {
|
||||
let result = tag_no_case::<_, _, CustomError<_>>(keyword)(input);
|
||||
match result {
|
||||
Ok((remaining, ent)) => {
|
||||
return Ok((remaining, ent));
|
||||
}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"NoKeywordKey".into(),
|
||||
))))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn dual_affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||
for keyword in ORG_ELEMENT_DUAL_KEYWORDS {
|
||||
let result = tag_no_case::<_, _, CustomError<_>>(keyword)(input);
|
||||
match result {
|
||||
Ok((remaining, ent)) => {
|
||||
return Ok((remaining, ent));
|
||||
}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"NoKeywordKey".into(),
|
||||
))))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn optval<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||
recognize(many_till(
|
||||
anychar,
|
||||
peek(optval_end(input.get_bracket_depth())),
|
||||
))(input)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
const fn optval_end(
|
||||
starting_bracket_depth: BracketDepth,
|
||||
) -> impl for<'s> Fn(OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||
move |input: OrgSource<'_>| _optval_end(input, starting_bracket_depth)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn _optval_end<'s>(
|
||||
input: OrgSource<'s>,
|
||||
starting_bracket_depth: BracketDepth,
|
||||
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||
let current_depth = input.get_bracket_depth() - starting_bracket_depth;
|
||||
if current_depth < 0 {
|
||||
// This shouldn't be possible because if depth is 0 then a closing bracket should end the opval.
|
||||
unreachable!("Exceeded optval bracket depth.")
|
||||
}
|
||||
if current_depth == 0 {
|
||||
let close_bracket = tag::<_, _, CustomError<_>>("]")(input);
|
||||
if close_bracket.is_ok() {
|
||||
return close_bracket;
|
||||
}
|
||||
}
|
||||
tag("\n")(input)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn export_keyword<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||
recognize(tuple((
|
||||
tag_no_case("attr_"),
|
||||
take_while1(|c: char| c.is_alphanumeric() || "-_".contains(c)),
|
||||
)))(input)
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::anychar;
|
||||
use nom::character::complete::space0;
|
||||
use nom::combinator::not;
|
||||
use nom::combinator::opt;
|
||||
use nom::combinator::peek;
|
||||
@ -24,6 +25,7 @@ pub fn org_macro<'r, 's>(
|
||||
let (remaining, macro_name) = org_macro_name(context, remaining)?;
|
||||
let (remaining, macro_args) = opt(parser_with_context!(org_macro_args)(context))(remaining)?;
|
||||
let (remaining, _) = tag("}}}")(remaining)?;
|
||||
let (remaining, _trailing_whitespace) = space0(remaining)?;
|
||||
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((
|
||||
|
@ -7,6 +7,7 @@ use nom::character::complete::one_of;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::recognize;
|
||||
use nom::combinator::verify;
|
||||
use nom::multi::many_till;
|
||||
|
||||
use super::org_source::OrgSource;
|
||||
@ -23,6 +24,33 @@ use crate::parser::util::exit_matcher_parser;
|
||||
use crate::parser::util::get_consumed;
|
||||
use crate::parser::util::WORD_CONSTITUENT_CHARACTERS;
|
||||
|
||||
// TODO: Make this a user-provided variable corresponding to elisp's org-link-parameters
|
||||
const ORG_LINK_PARAMETERS: [&'static str; 23] = [
|
||||
"id",
|
||||
"eww",
|
||||
"rmail",
|
||||
"mhe",
|
||||
"irc",
|
||||
"info",
|
||||
"gnus",
|
||||
"docview",
|
||||
"bibtex",
|
||||
"bbdb",
|
||||
"w3m",
|
||||
"doi",
|
||||
"file+sys",
|
||||
"file+emacs",
|
||||
"shell",
|
||||
"news",
|
||||
"mailto",
|
||||
"https",
|
||||
"http",
|
||||
"ftp",
|
||||
"help",
|
||||
"file",
|
||||
"elisp",
|
||||
];
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
pub fn plain_link<'r, 's>(
|
||||
context: Context<'r, 's>,
|
||||
@ -73,36 +101,19 @@ pub fn protocol<'r, 's>(
|
||||
input: OrgSource<'s>,
|
||||
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||
// TODO: This should be defined by org-link-parameters
|
||||
let (remaining, proto) = alt((
|
||||
alt((
|
||||
tag_no_case("id"),
|
||||
tag_no_case("eww"),
|
||||
tag_no_case("rmail"),
|
||||
tag_no_case("mhe"),
|
||||
tag_no_case("irc"),
|
||||
tag_no_case("info"),
|
||||
tag_no_case("gnus"),
|
||||
tag_no_case("docview"),
|
||||
tag_no_case("bibtex"),
|
||||
tag_no_case("bbdb"),
|
||||
tag_no_case("w3m"),
|
||||
)),
|
||||
alt((
|
||||
tag_no_case("doi"),
|
||||
tag_no_case("file+sys"),
|
||||
tag_no_case("file+emacs"),
|
||||
tag_no_case("shell"),
|
||||
tag_no_case("news"),
|
||||
tag_no_case("mailto"),
|
||||
tag_no_case("https"),
|
||||
tag_no_case("http"),
|
||||
tag_no_case("ftp"),
|
||||
tag_no_case("help"),
|
||||
tag_no_case("file"),
|
||||
tag_no_case("elisp"),
|
||||
)),
|
||||
))(input)?;
|
||||
Ok((remaining, proto))
|
||||
for link_parameter in ORG_LINK_PARAMETERS {
|
||||
let result = tag_no_case::<_, _, CustomError<_>>(link_parameter)(input);
|
||||
match result {
|
||||
Ok((remaining, ent)) => {
|
||||
return Ok((remaining, ent));
|
||||
}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"NoLinkProtocol".into(),
|
||||
))))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@ -119,7 +130,11 @@ fn path_plain<'r, 's>(
|
||||
|
||||
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
|
||||
|
||||
let (remaining, path) = recognize(many_till(anychar, peek(exit_matcher)))(input)?;
|
||||
let (remaining, path) = recognize(verify(
|
||||
many_till(anychar, peek(exit_matcher)),
|
||||
|(children, _exit_contents)| !children.is_empty(),
|
||||
))(input)?;
|
||||
|
||||
Ok((remaining, path))
|
||||
}
|
||||
|
||||
@ -128,5 +143,10 @@ fn path_plain_end<'r, 's>(
|
||||
_context: Context<'r, 's>,
|
||||
input: OrgSource<'s>,
|
||||
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||
recognize(one_of(" \t\r\n()[]<>"))(input)
|
||||
recognize(many_till(
|
||||
verify(anychar, |c| {
|
||||
*c != '/' && (c.is_ascii_punctuation() || c.is_whitespace())
|
||||
}),
|
||||
one_of(" \t\r\n()[]<>"),
|
||||
))(input)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user