Add all entities.

This commit is contained in:
Tom Alexander 2023-08-29 15:10:27 -04:00
parent 3e6df7ba78
commit 3206027b96
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
5 changed files with 441 additions and 16 deletions

View File

@ -75,6 +75,7 @@ fn is_expect_fail(name: &str) -> Option<&str> {
"autogen_greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
"autogen_element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
"autogen_lesser_element_paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
"autogen_sections_and_headings_empty_section" => Some("We are not yet handling empty sections properly."),
_ => None,
}
}

1
elisp_snippets/README.md Normal file
View File

@ -0,0 +1 @@
This folder is for snippets of elisp that are useful for development.

View File

@ -0,0 +1,5 @@
(dolist (var org-entities)
(when (listp var)
(message "\"%s\"," (nth 0 var))
)
)

View File

@ -9,11 +9,428 @@ use nom::combinator::recognize;
use super::org_source::OrgSource;
use super::Context;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::object::Entity;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::get_consumed;
const ENTITIES: [&'static str; 413] = [
"Agrave",
"agrave",
"Aacute",
"aacute",
"Acirc",
"acirc",
"Amacr",
"amacr",
"Atilde",
"atilde",
"Auml",
"auml",
"Aring",
"AA",
"aring",
"AElig",
"aelig",
"Ccedil",
"ccedil",
"Egrave",
"egrave",
"Eacute",
"eacute",
"Ecirc",
"ecirc",
"Euml",
"euml",
"Igrave",
"igrave",
"Iacute",
"iacute",
"Idot",
"inodot",
"Icirc",
"icirc",
"Iuml",
"iuml",
"Ntilde",
"ntilde",
"Ograve",
"ograve",
"Oacute",
"oacute",
"Ocirc",
"ocirc",
"Otilde",
"otilde",
"Ouml",
"ouml",
"Oslash",
"oslash",
"OElig",
"oelig",
"Scaron",
"scaron",
"szlig",
"Ugrave",
"ugrave",
"Uacute",
"uacute",
"Ucirc",
"ucirc",
"Uuml",
"uuml",
"Yacute",
"yacute",
"Yuml",
"yuml",
"fnof",
"real",
"image",
"weierp",
"ell",
"imath",
"jmath",
"Alpha",
"alpha",
"Beta",
"beta",
"Gamma",
"gamma",
"Delta",
"delta",
"Epsilon",
"epsilon",
"varepsilon",
"Zeta",
"zeta",
"Eta",
"eta",
"Theta",
"theta",
"thetasym",
"vartheta",
"Iota",
"iota",
"Kappa",
"kappa",
"Lambda",
"lambda",
"Mu",
"mu",
"nu",
"Nu",
"Xi",
"xi",
"Omicron",
"omicron",
"Pi",
"pi",
"Rho",
"rho",
"Sigma",
"sigma",
"sigmaf",
"varsigma",
"Tau",
"Upsilon",
"upsih",
"upsilon",
"Phi",
"phi",
"varphi",
"Chi",
"chi",
"acutex",
"Psi",
"psi",
"tau",
"Omega",
"omega",
"piv",
"varpi",
"partial",
"alefsym",
"aleph",
"gimel",
"beth",
"dalet",
"ETH",
"eth",
"THORN",
"thorn",
"dots",
"cdots",
"hellip",
"middot",
"iexcl",
"iquest",
"shy",
"ndash",
"mdash",
"quot",
"acute",
"ldquo",
"rdquo",
"bdquo",
"lsquo",
"rsquo",
"sbquo",
"laquo",
"raquo",
"lsaquo",
"rsaquo",
"circ",
"vert",
"vbar",
"brvbar",
"S",
"sect",
"amp",
"lt",
"gt",
"tilde",
"slash",
"plus",
"under",
"equal",
"asciicirc",
"dagger",
"dag",
"Dagger",
"ddag",
"nbsp",
"ensp",
"emsp",
"thinsp",
"curren",
"cent",
"pound",
"yen",
"euro",
"EUR",
"dollar",
"USD",
"copy",
"reg",
"trade",
"minus",
"pm",
"plusmn",
"times",
"frasl",
"colon",
"div",
"frac12",
"frac14",
"frac34",
"permil",
"sup1",
"sup2",
"sup3",
"radic",
"sum",
"prod",
"micro",
"macr",
"deg",
"prime",
"Prime",
"infin",
"infty",
"prop",
"propto",
"not",
"neg",
"land",
"wedge",
"lor",
"vee",
"cap",
"cup",
"smile",
"frown",
"int",
"therefore",
"there4",
"because",
"sim",
"cong",
"simeq",
"asymp",
"approx",
"ne",
"neq",
"equiv",
"triangleq",
"le",
"leq",
"ge",
"geq",
"lessgtr",
"lesseqgtr",
"ll",
"Ll",
"lll",
"gg",
"Gg",
"ggg",
"prec",
"preceq",
"preccurlyeq",
"succ",
"succeq",
"succcurlyeq",
"sub",
"subset",
"sup",
"supset",
"nsub",
"sube",
"nsup",
"supe",
"setminus",
"forall",
"exist",
"exists",
"nexist",
"nexists",
"empty",
"emptyset",
"isin",
"in",
"notin",
"ni",
"nabla",
"ang",
"angle",
"perp",
"parallel",
"sdot",
"cdot",
"lceil",
"rceil",
"lfloor",
"rfloor",
"lang",
"rang",
"langle",
"rangle",
"hbar",
"mho",
"larr",
"leftarrow",
"gets",
"lArr",
"Leftarrow",
"uarr",
"uparrow",
"uArr",
"Uparrow",
"rarr",
"to",
"rightarrow",
"rArr",
"Rightarrow",
"darr",
"downarrow",
"dArr",
"Downarrow",
"harr",
"leftrightarrow",
"hArr",
"Leftrightarrow",
"crarr",
"hookleftarrow",
"arccos",
"arcsin",
"arctan",
"arg",
"cos",
"cosh",
"cot",
"coth",
"csc",
"deg",
"det",
"dim",
"exp",
"gcd",
"hom",
"inf",
"ker",
"lg",
"lim",
"liminf",
"limsup",
"ln",
"log",
"max",
"min",
"Pr",
"sec",
"sin",
"sinh",
"sup",
"tan",
"tanh",
"bull",
"bullet",
"star",
"lowast",
"ast",
"odot",
"oplus",
"otimes",
"check",
"checkmark",
"para",
"ordf",
"ordm",
"cedil",
"oline",
"uml",
"zwnj",
"zwj",
"lrm",
"rlm",
"smiley",
"blacksmile",
"sad",
"frowny",
"clubs",
"clubsuit",
"spades",
"spadesuit",
"hearts",
"heartsuit",
"diams",
"diamondsuit",
"diamond",
"Diamond",
"loz",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
];
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn entity<'r, 's>(
context: Context<'r, 's>,
@ -21,10 +438,7 @@ pub fn entity<'r, 's>(
) -> Res<OrgSource<'s>, Entity<'s>> {
let (remaining, _) = tag("\\")(input)?;
let (remaining, entity_name) = name(context, remaining)?;
let (remaining, _) = alt((
tag("{}"),
peek(recognize(parser_with_context!(entity_end)(context))),
))(remaining)?;
let (remaining, _) = alt((tag("{}"), peek(recognize(entity_end))))(remaining)?;
let (remaining, _) = space0(remaining)?;
let source = get_consumed(input, remaining);
@ -43,20 +457,24 @@ fn name<'r, 's>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
// TODO: This should be defined by org-entities and optionally org-entities-user
for entity in ENTITIES {
// foo
let result = tag_no_case::<_, _, CustomError<_>>(entity)(input);
match result {
Ok((remaining, ent)) => {
return Ok((remaining, ent));
}
Err(_) => {}
}
}
// TODO: Add the rest of the entities, this is a very incomplete list
let (remaining, proto) = alt((alt((
tag_no_case("delta"),
tag_no_case("pi"),
tag_no_case("ast"),
tag_no_case("lt"),
tag_no_case("gt"),
)),))(input)?;
Ok((remaining, proto))
Err(nom::Err::Error(CustomError::MyError(MyError(
"NoEntity".into(),
))))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn entity_end<'r, 's>(_context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
fn entity_end<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
let (remaining, _) = alt((eof, recognize(satisfy(|c| !c.is_alphabetic()))))(input)?;
Ok((remaining, ()))

View File

@ -11,7 +11,7 @@ fn {name}() {{
let diff_result =
compare_document(&parsed_sexp, &rust_parsed).expect("Compare parsed documents.");
diff_result
.print()
.print(org_contents.as_str())
.expect("Print document parse tree diff.");
assert!(!diff_result.is_bad());
assert_eq!(remaining, "");