31 Commits

Author SHA1 Message Date
Tom Alexander
28aca041f7 Publish version 0.1.3.
Some checks failed
rustfmt Build rustfmt has failed
rust-test Build rust-test has failed
rust-build Build rust-build has succeeded
2023-08-29 23:05:36 -04:00
Tom Alexander
d82def2a70 Merge branch 'compare_improvements'
All checks were successful
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
2023-08-29 22:58:42 -04:00
Tom Alexander
d471f7178b Wrap the components of headlines in diff layers. 2023-08-29 22:57:08 -04:00
Tom Alexander
2c5c26c55f Allow diff layers that are not associated with tokens. 2023-08-29 22:47:40 -04:00
Tom Alexander
7944659802 Compare headline level. 2023-08-29 22:11:56 -04:00
Tom Alexander
58aca53144 Move get_property into util. 2023-08-29 22:07:23 -04:00
Tom Alexander
6f2d90162b Do not use the plain text parser for property drawers.
This additional exit condition was causing property drawers to parse incorrectly.
2023-08-29 22:03:20 -04:00
Tom Alexander
f170a557ed Use character offsets in diff. 2023-08-29 21:49:16 -04:00
Tom Alexander
eaa38ce772 Include an error message for failed bounds checking.
All checks were successful
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
2023-08-29 21:27:54 -04:00
Tom Alexander
a6d742a536 Fix handling line breaks after divider in description lists. 2023-08-29 21:27:54 -04:00
Tom Alexander
45be9e7bde Merge branch 'description_list'
All checks were successful
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
2023-08-29 19:36:45 -04:00
Tom Alexander
f6c895319f Add support for diffing description lists. 2023-08-29 19:35:54 -04:00
Tom Alexander
2682779534 Add support for parsing description lists. 2023-08-29 18:13:55 -04:00
Tom Alexander
b48d472546 Fix build.
All checks were successful
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
2023-08-29 18:07:35 -04:00
Tom Alexander
ea6faf728c Add test for description list. 2023-08-29 18:07:35 -04:00
Tom Alexander
f4ea1b7303 Merge branch 'accuracy_fixes_from_feeding_large_documents'
Some checks failed
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has failed
2023-08-29 17:49:19 -04:00
Tom Alexander
80b55fdd45 Consume trailing whitespace for macro. 2023-08-29 17:42:58 -04:00
Tom Alexander
f426e32798 Do not include trailing punctuation or whitespace in plain links. 2023-08-29 17:35:04 -04:00
Tom Alexander
66037356c5 Add test showing plain links ending in punctuation currently do not parse correctly. 2023-08-29 17:24:44 -04:00
Tom Alexander
1bcd1895c0 Allow affiliating keywords with regular keywords. 2023-08-29 17:19:13 -04:00
Tom Alexander
e3d38cfbe2 Add a test for floating affiliated keywords. 2023-08-29 17:05:19 -04:00
Tom Alexander
2ba0dc49be Support export affiliated keywords. 2023-08-29 17:01:35 -04:00
Tom Alexander
9df40fb13f Only allow specific keywords for affiliated keywords. 2023-08-29 16:56:07 -04:00
Tom Alexander
cc671925db Support empty sections under headings. 2023-08-29 16:07:43 -04:00
Tom Alexander
950baa9d5d Only allow a single section under a heading. 2023-08-29 16:03:13 -04:00
Tom Alexander
56865c68fc Do not allow plain links without a path. 2023-08-29 15:44:04 -04:00
Tom Alexander
f592b73ae7 Merge branch 'reduce_context_usage_in_exit_matchers'
All checks were successful
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
2023-08-29 15:17:16 -04:00
Tom Alexander
3206027b96 Add all entities. 2023-08-29 15:16:22 -04:00
Tom Alexander
3e6df7ba78 Print character offset from rust's parse perspective during compare. 2023-08-29 14:40:58 -04:00
Tom Alexander
ac313d093e Improve error handling in compare. 2023-08-29 14:20:53 -04:00
Tom Alexander
f376f1cf8e Add a test for empty sections. 2023-08-29 14:10:26 -04:00
27 changed files with 1827 additions and 465 deletions

View File

@@ -1,6 +1,6 @@
[package]
name = "organic"
version = "0.1.2"
version = "0.1.3"
authors = ["Tom Alexander <tom@fizz.buzz>"]
description = "An org-mode parser."
edition = "2021"
@@ -14,7 +14,8 @@ include = [
"LICENSE",
"**/*.rs",
"Cargo.toml",
"tests/*"
"tests/*",
"org_mode_samples/"
]
[lib]
@@ -40,7 +41,7 @@ tracing-subscriber = { version = "0.3.17", optional = true, features = ["env-fil
walkdir = "2.3.3"
[features]
default = ["compare"]
default = []
compare = []
tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"]

View File

@@ -75,6 +75,7 @@ fn is_expect_fail(name: &str) -> Option<&str> {
"autogen_greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
"autogen_element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
"autogen_lesser_element_paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
"autogen_unicode_hearts" => Some("Unicode is coming out of emacs strange."),
_ => None,
}
}

1
elisp_snippets/README.md Normal file
View File

@@ -0,0 +1 @@
This folder is for snippets of elisp that are useful for development.

View File

@@ -0,0 +1,3 @@
(dolist (var org-element-affiliated-keywords)
(message "\"%s\"," (downcase var))
)

View File

@@ -0,0 +1,5 @@
(dolist (var org-entities)
(when (listp var)
(message "\"%s\"," (nth 0 var))
)
)

View File

@@ -0,0 +1,2 @@
- foo ::

View File

@@ -0,0 +1,11 @@
- foo :: bar
- cat ::
dog
- lorem
:: ipsum
-
lorem :: ipsum
- dolar *bold* foo :: ipsum
- big gap ::
stuff

View File

@@ -0,0 +1,7 @@
** foo
:PROPERTIES:
:DESCRIPTION: lorem
:ALT_TITLE: ipsum
:END:
bar

View File

@@ -0,0 +1,15 @@
#+name: foo
#+caption: bar
#+caption: baz
[[file:lorem/ipsum.png]]
#+name: cat
#+foo: dog
[[file:lorem/ipsum.png]]
#+name: cat
#+foo: dog
foo

View File

@@ -0,0 +1,52 @@
non-link text
eww://
rmail://
mhe://
irc://
info://
gnus://
docview://
bibtex://
bbdb://
w3m://
doi://
file+sys://
file+emacs://
shell://
news://
mailto://
https://
http://
ftp://
help://
file://
elisp://
randomfakeprotocl://
non-link text
non-link text
eww:
rmail:
mhe:
irc:
info:
gnus:
docview:
bibtex:
bbdb:
w3m:
doi:
file+sys:
file+emacs:
shell:
news:
mailto:
https:
http:
ftp:
help:
file:
elisp:
randomfakeprotocl:
non-link text

View File

@@ -0,0 +1,3 @@
mailto:foo@bar.baz.
mailto:foo@bar.baz....

View File

@@ -0,0 +1,9 @@
* Foo
* Bar
* Baz

View File

@@ -0,0 +1 @@
🧡💛💚💙💜

File diff suppressed because it is too large Load Diff

View File

@@ -51,7 +51,7 @@ pub fn assert_bounds<'s, S: Source<'s>>(
);
let (rust_begin, rust_end) = get_offsets(source, rust);
if (rust_begin + 1) != begin || (rust_end + 1) != end {
Err(format!("Rust bounds ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?;
Err(format!("Rust bounds (in bytes) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?;
}
Ok(())
@@ -137,3 +137,23 @@ fn maybe_token_to_usize(
.flatten() // Outer option is whether or not the param exists, inner option is whether or not it is nil
.map_or(Ok(None), |r| r.map(Some))?)
}
pub fn get_property<'s, 'x>(
emacs: &'s Token<'s>,
key: &'x str,
) -> Result<Option<&'s Token<'s>>, Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let attributes_child = children
.iter()
.nth(1)
.ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let prop = attributes_map
.get(key)
.ok_or(format!("Missing {} attribute.", key))?;
match prop.as_atom() {
Ok("nil") => return Ok(None),
_ => {}
};
Ok(Some(*prop))
}

View File

@@ -55,20 +55,21 @@ fn read_stdin_to_string() -> Result<String, Box<dyn std::error::Error>> {
fn run_compare<P: AsRef<str>>(org_contents: P) -> Result<(), Box<dyn std::error::Error>> {
let emacs_version = get_emacs_version()?;
let org_mode_version = get_org_mode_version()?;
let org_contents = org_contents.as_ref();
eprintln!("Using emacs version: {}", emacs_version.trim());
eprintln!("Using org-mode version: {}", org_mode_version.trim());
let (remaining, rust_parsed) = document(org_contents.as_ref()).map_err(|e| e.to_string())?;
let org_sexp = emacs_parse_org_document(org_contents.as_ref())?;
let (remaining, rust_parsed) = document(org_contents).map_err(|e| e.to_string())?;
let org_sexp = emacs_parse_org_document(org_contents)?;
let (_remaining, parsed_sexp) =
sexp_with_padding(org_sexp.as_str()).map_err(|e| e.to_string())?;
println!("{}\n\n\n", org_contents.as_ref());
println!("{}\n\n\n", org_contents);
println!("{}", org_sexp);
println!("{:#?}", rust_parsed);
// We do the diffing after printing out both parsed forms in case the diffing panics
let diff_result = compare_document(&parsed_sexp, &rust_parsed)?;
diff_result.print()?;
diff_result.print(org_contents)?;
if diff_result.is_bad() {
Err("Diff results do not match.")?;
@@ -85,7 +86,7 @@ fn run_compare<P: AsRef<str>>(org_contents: P) -> Result<(), Box<dyn std::error:
eprintln!(
"This program was built with compare disabled. Only parsing with organic, not comparing."
);
let (remaining, rust_parsed) = document(org_contents.as_ref()).map_err(|e| e.to_string())?;
let (_remaining, rust_parsed) = document(org_contents.as_ref()).map_err(|e| e.to_string())?;
println!("{:#?}", rust_parsed);
Ok(())
}

View File

@@ -283,10 +283,21 @@ fn _heading<'r, 's>(
headline(context, input, parent_stars)?;
let section_matcher = parser_with_context!(section)(context);
let heading_matcher = parser_with_context!(heading(star_count))(context);
let (remaining, children) = many0(alt((
map(heading_matcher, DocumentElement::Heading),
map(section_matcher, DocumentElement::Section),
)))(remaining)?;
let (remaining, maybe_section) =
opt(map(section_matcher, DocumentElement::Section))(remaining)?;
let (remaining, mut children) =
many0(map(heading_matcher, DocumentElement::Heading))(remaining)?;
if let Some(section) = maybe_section {
children.insert(0, section);
}
let remaining = if children.is_empty() {
// Support empty headings
let (remain, _ws) = many0(blank_line)(remaining)?;
remain
} else {
remaining
};
let source = get_consumed(input, remaining);
Ok((
remaining,

View File

@@ -12,6 +12,7 @@ use super::fixed_width_area::fixed_width_area;
use super::footnote_definition::footnote_definition;
use super::greater_block::greater_block;
use super::horizontal_rule::horizontal_rule;
use super::keyword::affiliated_keyword;
use super::keyword::keyword;
use super::latex_environment::latex_environment;
use super::lesser_block::comment_block;
@@ -62,10 +63,12 @@ fn _element<'r, 's>(
let fixed_width_area_matcher = parser_with_context!(fixed_width_area)(context);
let horizontal_rule_matcher = parser_with_context!(horizontal_rule)(context);
let keyword_matcher = parser_with_context!(keyword)(context);
let affiliated_keyword_matcher = parser_with_context!(affiliated_keyword)(context);
let paragraph_matcher = parser_with_context!(paragraph)(context);
let latex_environment_matcher = parser_with_context!(latex_environment)(context);
let (remaining, mut affiliated_keywords) = many0(keyword_matcher)(input)?;
// TODO: Affiliated keywords cannot be on comments, clocks, headings, inlinetasks, items, node properties, planning, property drawers, sections, and table rows
let (remaining, mut affiliated_keywords) = many0(affiliated_keyword_matcher)(input)?;
let (remaining, mut element) = match alt((
map(plain_list_matcher, Element::PlainList),
map(greater_block_matcher, Element::GreaterBlock),
@@ -84,6 +87,7 @@ fn _element<'r, 's>(
map(fixed_width_area_matcher, Element::FixedWidthArea),
map(horizontal_rule_matcher, Element::HorizontalRule),
map(latex_environment_matcher, Element::LatexEnvironment),
map(keyword_matcher, Element::Keyword),
))(remaining)
{
the_ok @ Ok(_) => the_ok,
@@ -93,12 +97,12 @@ fn _element<'r, 's>(
the_ok @ Ok(_) => the_ok,
Err(_) => {
affiliated_keywords.clear();
map(keyword_matcher, Element::Keyword)(input)
map(affiliated_keyword_matcher, Element::Keyword)(input)
}
}
} else {
affiliated_keywords.clear();
map(keyword_matcher, Element::Keyword)(input)
map(affiliated_keyword_matcher, Element::Keyword)(input)
}
}
}?;

View File

@@ -9,11 +9,429 @@ use nom::combinator::recognize;
use super::org_source::OrgSource;
use super::Context;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::object::Entity;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::get_consumed;
// TODO: Make this a user-provided variable corresponding to elisp's org-entities
const ORG_ENTITIES: [&'static str; 413] = [
"Agrave",
"agrave",
"Aacute",
"aacute",
"Acirc",
"acirc",
"Amacr",
"amacr",
"Atilde",
"atilde",
"Auml",
"auml",
"Aring",
"AA",
"aring",
"AElig",
"aelig",
"Ccedil",
"ccedil",
"Egrave",
"egrave",
"Eacute",
"eacute",
"Ecirc",
"ecirc",
"Euml",
"euml",
"Igrave",
"igrave",
"Iacute",
"iacute",
"Idot",
"inodot",
"Icirc",
"icirc",
"Iuml",
"iuml",
"Ntilde",
"ntilde",
"Ograve",
"ograve",
"Oacute",
"oacute",
"Ocirc",
"ocirc",
"Otilde",
"otilde",
"Ouml",
"ouml",
"Oslash",
"oslash",
"OElig",
"oelig",
"Scaron",
"scaron",
"szlig",
"Ugrave",
"ugrave",
"Uacute",
"uacute",
"Ucirc",
"ucirc",
"Uuml",
"uuml",
"Yacute",
"yacute",
"Yuml",
"yuml",
"fnof",
"real",
"image",
"weierp",
"ell",
"imath",
"jmath",
"Alpha",
"alpha",
"Beta",
"beta",
"Gamma",
"gamma",
"Delta",
"delta",
"Epsilon",
"epsilon",
"varepsilon",
"Zeta",
"zeta",
"Eta",
"eta",
"Theta",
"theta",
"thetasym",
"vartheta",
"Iota",
"iota",
"Kappa",
"kappa",
"Lambda",
"lambda",
"Mu",
"mu",
"nu",
"Nu",
"Xi",
"xi",
"Omicron",
"omicron",
"Pi",
"pi",
"Rho",
"rho",
"Sigma",
"sigma",
"sigmaf",
"varsigma",
"Tau",
"Upsilon",
"upsih",
"upsilon",
"Phi",
"phi",
"varphi",
"Chi",
"chi",
"acutex",
"Psi",
"psi",
"tau",
"Omega",
"omega",
"piv",
"varpi",
"partial",
"alefsym",
"aleph",
"gimel",
"beth",
"dalet",
"ETH",
"eth",
"THORN",
"thorn",
"dots",
"cdots",
"hellip",
"middot",
"iexcl",
"iquest",
"shy",
"ndash",
"mdash",
"quot",
"acute",
"ldquo",
"rdquo",
"bdquo",
"lsquo",
"rsquo",
"sbquo",
"laquo",
"raquo",
"lsaquo",
"rsaquo",
"circ",
"vert",
"vbar",
"brvbar",
"S",
"sect",
"amp",
"lt",
"gt",
"tilde",
"slash",
"plus",
"under",
"equal",
"asciicirc",
"dagger",
"dag",
"Dagger",
"ddag",
"nbsp",
"ensp",
"emsp",
"thinsp",
"curren",
"cent",
"pound",
"yen",
"euro",
"EUR",
"dollar",
"USD",
"copy",
"reg",
"trade",
"minus",
"pm",
"plusmn",
"times",
"frasl",
"colon",
"div",
"frac12",
"frac14",
"frac34",
"permil",
"sup1",
"sup2",
"sup3",
"radic",
"sum",
"prod",
"micro",
"macr",
"deg",
"prime",
"Prime",
"infin",
"infty",
"prop",
"propto",
"not",
"neg",
"land",
"wedge",
"lor",
"vee",
"cap",
"cup",
"smile",
"frown",
"int",
"therefore",
"there4",
"because",
"sim",
"cong",
"simeq",
"asymp",
"approx",
"ne",
"neq",
"equiv",
"triangleq",
"le",
"leq",
"ge",
"geq",
"lessgtr",
"lesseqgtr",
"ll",
"Ll",
"lll",
"gg",
"Gg",
"ggg",
"prec",
"preceq",
"preccurlyeq",
"succ",
"succeq",
"succcurlyeq",
"sub",
"subset",
"sup",
"supset",
"nsub",
"sube",
"nsup",
"supe",
"setminus",
"forall",
"exist",
"exists",
"nexist",
"nexists",
"empty",
"emptyset",
"isin",
"in",
"notin",
"ni",
"nabla",
"ang",
"angle",
"perp",
"parallel",
"sdot",
"cdot",
"lceil",
"rceil",
"lfloor",
"rfloor",
"lang",
"rang",
"langle",
"rangle",
"hbar",
"mho",
"larr",
"leftarrow",
"gets",
"lArr",
"Leftarrow",
"uarr",
"uparrow",
"uArr",
"Uparrow",
"rarr",
"to",
"rightarrow",
"rArr",
"Rightarrow",
"darr",
"downarrow",
"dArr",
"Downarrow",
"harr",
"leftrightarrow",
"hArr",
"Leftrightarrow",
"crarr",
"hookleftarrow",
"arccos",
"arcsin",
"arctan",
"arg",
"cos",
"cosh",
"cot",
"coth",
"csc",
"deg",
"det",
"dim",
"exp",
"gcd",
"hom",
"inf",
"ker",
"lg",
"lim",
"liminf",
"limsup",
"ln",
"log",
"max",
"min",
"Pr",
"sec",
"sin",
"sinh",
"sup",
"tan",
"tanh",
"bull",
"bullet",
"star",
"lowast",
"ast",
"odot",
"oplus",
"otimes",
"check",
"checkmark",
"para",
"ordf",
"ordm",
"cedil",
"oline",
"uml",
"zwnj",
"zwj",
"lrm",
"rlm",
"smiley",
"blacksmile",
"sad",
"frowny",
"clubs",
"clubsuit",
"spades",
"spadesuit",
"hearts",
"heartsuit",
"diams",
"diamondsuit",
"diamond",
"Diamond",
"loz",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
];
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn entity<'r, 's>(
context: Context<'r, 's>,
@@ -21,10 +439,7 @@ pub fn entity<'r, 's>(
) -> Res<OrgSource<'s>, Entity<'s>> {
let (remaining, _) = tag("\\")(input)?;
let (remaining, entity_name) = name(context, remaining)?;
let (remaining, _) = alt((
tag("{}"),
peek(recognize(parser_with_context!(entity_end)(context))),
))(remaining)?;
let (remaining, _) = alt((tag("{}"), peek(recognize(entity_end))))(remaining)?;
let (remaining, _) = space0(remaining)?;
let source = get_consumed(input, remaining);
@@ -43,20 +458,23 @@ fn name<'r, 's>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
// TODO: This should be defined by org-entities and optionally org-entities-user
for entity in ORG_ENTITIES {
let result = tag_no_case::<_, _, CustomError<_>>(entity)(input);
match result {
Ok((remaining, ent)) => {
return Ok((remaining, ent));
}
Err(_) => {}
}
}
// TODO: Add the rest of the entities, this is a very incomplete list
let (remaining, proto) = alt((alt((
tag_no_case("delta"),
tag_no_case("pi"),
tag_no_case("ast"),
tag_no_case("lt"),
tag_no_case("gt"),
)),))(input)?;
Ok((remaining, proto))
Err(nom::Err::Error(CustomError::MyError(MyError(
"NoEntity".into(),
))))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn entity_end<'r, 's>(_context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
fn entity_end<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
let (remaining, _) = alt((eof, recognize(satisfy(|c| !c.is_alphabetic()))))(input)?;
Ok((remaining, ()))

View File

@@ -1,6 +1,7 @@
use super::element::Element;
use super::lesser_element::TableCell;
use super::source::Source;
use super::Object;
#[derive(Debug)]
pub struct PlainList<'s> {
@@ -13,6 +14,7 @@ pub struct PlainListItem<'s> {
pub source: &'s str,
pub indentation: usize,
pub bullet: &'s str,
pub tag: Vec<Object<'s>>,
pub children: Vec<Element<'s>>,
}

View File

@@ -2,6 +2,8 @@ use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_while1;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::space0;
use nom::character::complete::space1;
@@ -9,14 +11,24 @@ use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::org_source::BracketDepth;
use super::org_source::OrgSource;
use super::Context;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::util::start_of_line;
use crate::parser::Keyword;
const ORG_ELEMENT_AFFILIATED_KEYWORDS: [&'static str; 13] = [
"caption", "data", "header", "headers", "label", "name", "plot", "resname", "result",
"results", "source", "srcname", "tblname",
];
const ORG_ELEMENT_DUAL_KEYWORDS: [&'static str; 2] = ["caption", "results"];
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn keyword<'r, 's>(
_context: Context<'r, 's>,
@@ -41,3 +53,111 @@ pub fn keyword<'r, 's>(
},
))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn affiliated_keyword<'r, 's>(
_context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Keyword<'s>> {
start_of_line(input)?;
// TODO: When key is a member of org-element-parsed-keywords, value can contain the standard set objects, excluding footnote references.
let (remaining, rule) = recognize(tuple((
space0,
tag("#+"),
affiliated_key,
tag(":"),
alt((recognize(tuple((space1, is_not("\r\n")))), space0)),
alt((line_ending, eof)),
)))(input)?;
Ok((
remaining,
Keyword {
source: rule.into(),
},
))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
alt((
recognize(tuple((dual_affiliated_key, tag("["), optval, tag("]")))),
plain_affiliated_key,
export_keyword,
))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn plain_affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
for keyword in ORG_ELEMENT_AFFILIATED_KEYWORDS {
let result = tag_no_case::<_, _, CustomError<_>>(keyword)(input);
match result {
Ok((remaining, ent)) => {
return Ok((remaining, ent));
}
Err(_) => {}
}
}
Err(nom::Err::Error(CustomError::MyError(MyError(
"NoKeywordKey".into(),
))))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn dual_affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
for keyword in ORG_ELEMENT_DUAL_KEYWORDS {
let result = tag_no_case::<_, _, CustomError<_>>(keyword)(input);
match result {
Ok((remaining, ent)) => {
return Ok((remaining, ent));
}
Err(_) => {}
}
}
Err(nom::Err::Error(CustomError::MyError(MyError(
"NoKeywordKey".into(),
))))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn optval<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(many_till(
anychar,
peek(optval_end(input.get_bracket_depth())),
))(input)
}
const fn optval_end(
starting_bracket_depth: BracketDepth,
) -> impl for<'s> Fn(OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
move |input: OrgSource<'_>| _optval_end(input, starting_bracket_depth)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _optval_end<'s>(
input: OrgSource<'s>,
starting_bracket_depth: BracketDepth,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let current_depth = input.get_bracket_depth() - starting_bracket_depth;
if current_depth < 0 {
// This shouldn't be possible because if depth is 0 then a closing bracket should end the opval.
unreachable!("Exceeded optval bracket depth.")
}
if current_depth == 0 {
let close_bracket = tag::<_, _, CustomError<_>>("]")(input);
if close_bracket.is_ok() {
return close_bracket;
}
}
tag("\n")(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn export_keyword<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(tuple((
tag_no_case("attr_"),
take_while1(|c: char| c.is_alphanumeric() || "-_".contains(c)),
)))(input)
}

View File

@@ -374,3 +374,9 @@ impl<'s> Source<'s> for Timestamp<'s> {
self.source
}
}
impl<'s> Source<'s> for PlainText<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}

View File

@@ -1,5 +1,6 @@
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
use nom::character::complete::space0;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
@@ -24,6 +25,7 @@ pub fn org_macro<'r, 's>(
let (remaining, macro_name) = org_macro_name(context, remaining)?;
let (remaining, macro_args) = opt(parser_with_context!(org_macro_args)(context))(remaining)?;
let (remaining, _) = tag("}}}")(remaining)?;
let (remaining, _trailing_whitespace) = space0(remaining)?;
let source = get_consumed(input, remaining);
Ok((

View File

@@ -7,6 +7,7 @@ use nom::character::complete::one_of;
use nom::combinator::eof;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many_till;
use super::org_source::OrgSource;
@@ -23,6 +24,33 @@ use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::WORD_CONSTITUENT_CHARACTERS;
// TODO: Make this a user-provided variable corresponding to elisp's org-link-parameters
const ORG_LINK_PARAMETERS: [&'static str; 23] = [
"id",
"eww",
"rmail",
"mhe",
"irc",
"info",
"gnus",
"docview",
"bibtex",
"bbdb",
"w3m",
"doi",
"file+sys",
"file+emacs",
"shell",
"news",
"mailto",
"https",
"http",
"ftp",
"help",
"file",
"elisp",
];
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn plain_link<'r, 's>(
context: Context<'r, 's>,
@@ -73,36 +101,19 @@ pub fn protocol<'r, 's>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
// TODO: This should be defined by org-link-parameters
let (remaining, proto) = alt((
alt((
tag_no_case("id"),
tag_no_case("eww"),
tag_no_case("rmail"),
tag_no_case("mhe"),
tag_no_case("irc"),
tag_no_case("info"),
tag_no_case("gnus"),
tag_no_case("docview"),
tag_no_case("bibtex"),
tag_no_case("bbdb"),
tag_no_case("w3m"),
)),
alt((
tag_no_case("doi"),
tag_no_case("file+sys"),
tag_no_case("file+emacs"),
tag_no_case("shell"),
tag_no_case("news"),
tag_no_case("mailto"),
tag_no_case("https"),
tag_no_case("http"),
tag_no_case("ftp"),
tag_no_case("help"),
tag_no_case("file"),
tag_no_case("elisp"),
)),
))(input)?;
Ok((remaining, proto))
for link_parameter in ORG_LINK_PARAMETERS {
let result = tag_no_case::<_, _, CustomError<_>>(link_parameter)(input);
match result {
Ok((remaining, ent)) => {
return Ok((remaining, ent));
}
Err(_) => {}
}
}
Err(nom::Err::Error(CustomError::MyError(MyError(
"NoLinkProtocol".into(),
))))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@@ -119,7 +130,11 @@ fn path_plain<'r, 's>(
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let (remaining, path) = recognize(many_till(anychar, peek(exit_matcher)))(input)?;
let (remaining, path) = recognize(verify(
many_till(anychar, peek(exit_matcher)),
|(children, _exit_contents)| !children.is_empty(),
))(input)?;
Ok((remaining, path))
}
@@ -128,5 +143,10 @@ fn path_plain_end<'r, 's>(
_context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(one_of(" \t\r\n()[]<>"))(input)
recognize(many_till(
verify(anychar, |c| {
*c != '/' && (c.is_ascii_punctuation() || c.is_whitespace())
}),
one_of(" \t\r\n()[]<>"),
))(input)
}

View File

@@ -6,19 +6,24 @@ use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::greater_element::PlainList;
use super::greater_element::PlainListItem;
use super::object_parser::standard_set_object;
use super::org_source::OrgSource;
use super::parser_with_context::parser_with_context;
use super::util::non_whitespace_character;
use super::Context;
use super::Object;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
@@ -138,7 +143,6 @@ pub fn plain_list_item<'r, 's>(
Into::<&str>::into(bull) != "*" || indent_level > 0
})(remaining)?;
// TODO: This isn't taking into account items that immediately line break and then have contents
let maybe_contentless_item: Res<OrgSource<'_>, OrgSource<'_>> = eof(remaining);
match maybe_contentless_item {
Ok((rem, _ws)) => {
@@ -149,6 +153,7 @@ pub fn plain_list_item<'r, 's>(
source: source.into(),
indentation: indent_level,
bullet: bull.into(),
tag: Vec::new(),
children: Vec::new(),
},
));
@@ -156,7 +161,12 @@ pub fn plain_list_item<'r, 's>(
Err(_) => {}
};
let (remaining, _ws) = alt((space1, line_ending))(remaining)?;
let (remaining, maybe_tag) = opt(tuple((
space1,
parser_with_context!(item_tag)(context),
tag(" ::"),
)))(remaining)?;
let (remaining, _ws) = item_tag_post_gap(context, remaining)?;
let exit_matcher = plain_list_item_end(indent_level);
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
@@ -167,7 +177,10 @@ pub fn plain_list_item<'r, 's>(
let (remaining, (children, _exit_contents)) = many_till(
parser_with_context!(element(true))(&parser_context),
alt((
peek(recognize(tuple((start_of_line, many0(blank_line), eof)))),
parser_with_context!(exit_matcher_parser)(&parser_context),
)),
)(remaining)?;
let (remaining, _trailing_ws) =
@@ -180,6 +193,9 @@ pub fn plain_list_item<'r, 's>(
source: source.into(),
indentation: indent_level,
bullet: bull.into(),
tag: maybe_tag
.map(|(_ws, item_tag, _divider)| item_tag)
.unwrap_or(Vec::new()),
children,
},
));
@@ -262,6 +278,60 @@ fn _line_indented_lte<'r, 's>(
Ok(matched)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn item_tag<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Vec<Object<'s>>> {
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Gamma,
exit_matcher: &item_tag_end,
}));
let (remaining, (children, _exit_contents)) = verify(
many_till(
// TODO: Should this be using a different set like the minimal set?
parser_with_context!(standard_set_object)(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
),
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
Ok((remaining, children))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn item_tag_end<'r, 's>(
_context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(alt((
line_ending,
tag(" :: "),
recognize(tuple((tag(" ::"), alt((line_ending, eof))))),
)))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn item_tag_post_gap<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
verify(
recognize(tuple((
alt((blank_line, space0)),
many_till(
blank_line,
alt((
peek(recognize(not(blank_line))),
peek(recognize(tuple((many0(blank_line), eof)))),
parser_with_context!(exit_matcher_parser)(context),
)),
),
))),
|gap| gap.len() > 0,
)(input)
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -2,13 +2,14 @@ use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::opt;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many_till;
use nom::sequence::tuple;
@@ -23,7 +24,6 @@ use crate::parser::greater_element::PropertyDrawer;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::plain_text::plain_text;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::immediate_in_section;
@@ -147,11 +147,16 @@ fn node_property_name<'r, 's>(
}));
let (remaining, name) = recognize(tuple((
map(parser_with_context!(plain_text)(&parser_context), |pt| {
pt.source
}),
verify(
many_till(
anychar,
parser_with_context!(exit_matcher_parser)(&parser_context),
),
|(children, _exit_contents)| !children.is_empty(),
),
opt(tag("+")),
)))(input)?;
Ok((remaining, name))
}

View File

@@ -11,7 +11,7 @@ fn {name}() {{
let diff_result =
compare_document(&parsed_sexp, &rust_parsed).expect("Compare parsed documents.");
diff_result
.print()
.print(org_contents.as_str())
.expect("Print document parse tree diff.");
assert!(!diff_result.is_bad());
assert_eq!(remaining, "");