organic/src/parser/entity.rs
Tom Alexander 3d86e75059
Some checks failed
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
rust-foreign-document-test Build rust-foreign-document-test has failed
Always match the entire entity name.
2023-09-14 04:29:50 -04:00

486 lines
7.5 KiB
Rust

use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::satisfy;
use nom::combinator::eof;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::sequence::tuple;
use super::org_source::OrgSource;
use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting;
use crate::context::RefContext;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::util::get_consumed;
use crate::types::Entity;
// TODO: Make this a user-provided variable corresponding to elisp's org-entities
const ORG_ENTITIES: [&'static str; 413] = [
"Agrave",
"agrave",
"Aacute",
"aacute",
"Acirc",
"acirc",
"Amacr",
"amacr",
"Atilde",
"atilde",
"Auml",
"auml",
"Aring",
"AA",
"aring",
"AElig",
"aelig",
"Ccedil",
"ccedil",
"Egrave",
"egrave",
"Eacute",
"eacute",
"Ecirc",
"ecirc",
"Euml",
"euml",
"Igrave",
"igrave",
"Iacute",
"iacute",
"Idot",
"inodot",
"Icirc",
"icirc",
"Iuml",
"iuml",
"Ntilde",
"ntilde",
"Ograve",
"ograve",
"Oacute",
"oacute",
"Ocirc",
"ocirc",
"Otilde",
"otilde",
"Ouml",
"ouml",
"Oslash",
"oslash",
"OElig",
"oelig",
"Scaron",
"scaron",
"szlig",
"Ugrave",
"ugrave",
"Uacute",
"uacute",
"Ucirc",
"ucirc",
"Uuml",
"uuml",
"Yacute",
"yacute",
"Yuml",
"yuml",
"fnof",
"real",
"image",
"weierp",
"ell",
"imath",
"jmath",
"Alpha",
"alpha",
"Beta",
"beta",
"Gamma",
"gamma",
"Delta",
"delta",
"Epsilon",
"epsilon",
"varepsilon",
"Zeta",
"zeta",
"Eta",
"eta",
"Theta",
"theta",
"thetasym",
"vartheta",
"Iota",
"iota",
"Kappa",
"kappa",
"Lambda",
"lambda",
"Mu",
"mu",
"nu",
"Nu",
"Xi",
"xi",
"Omicron",
"omicron",
"Pi",
"pi",
"Rho",
"rho",
"Sigma",
"sigma",
"sigmaf",
"varsigma",
"Tau",
"Upsilon",
"upsih",
"upsilon",
"Phi",
"phi",
"varphi",
"Chi",
"chi",
"acutex",
"Psi",
"psi",
"tau",
"Omega",
"omega",
"piv",
"varpi",
"partial",
"alefsym",
"aleph",
"gimel",
"beth",
"dalet",
"ETH",
"eth",
"THORN",
"thorn",
"dots",
"cdots",
"hellip",
"middot",
"iexcl",
"iquest",
"shy",
"ndash",
"mdash",
"quot",
"acute",
"ldquo",
"rdquo",
"bdquo",
"lsquo",
"rsquo",
"sbquo",
"laquo",
"raquo",
"lsaquo",
"rsaquo",
"circ",
"vert",
"vbar",
"brvbar",
"S",
"sect",
"amp",
"lt",
"gt",
"tilde",
"slash",
"plus",
"under",
"equal",
"asciicirc",
"dagger",
"dag",
"Dagger",
"ddag",
"nbsp",
"ensp",
"emsp",
"thinsp",
"curren",
"cent",
"pound",
"yen",
"euro",
"EUR",
"dollar",
"USD",
"copy",
"reg",
"trade",
"minus",
"pm",
"plusmn",
"times",
"frasl",
"colon",
"div",
"frac12",
"frac14",
"frac34",
"permil",
"sup1",
"sup2",
"sup3",
"radic",
"sum",
"prod",
"micro",
"macr",
"deg",
"prime",
"Prime",
"infin",
"infty",
"prop",
"propto",
"not",
"neg",
"land",
"wedge",
"lor",
"vee",
"cap",
"cup",
"smile",
"frown",
"int",
"therefore",
"there4",
"because",
"sim",
"cong",
"simeq",
"asymp",
"approx",
"ne",
"neq",
"equiv",
"triangleq",
"le",
"leq",
"ge",
"geq",
"lessgtr",
"lesseqgtr",
"ll",
"Ll",
"lll",
"gg",
"Gg",
"ggg",
"prec",
"preceq",
"preccurlyeq",
"succ",
"succeq",
"succcurlyeq",
"sub",
"subset",
"sup",
"supset",
"nsub",
"sube",
"nsup",
"supe",
"setminus",
"forall",
"exist",
"exists",
"nexist",
"nexists",
"empty",
"emptyset",
"isin",
"in",
"notin",
"ni",
"nabla",
"ang",
"angle",
"perp",
"parallel",
"sdot",
"cdot",
"lceil",
"rceil",
"lfloor",
"rfloor",
"lang",
"rang",
"langle",
"rangle",
"hbar",
"mho",
"larr",
"leftarrow",
"gets",
"lArr",
"Leftarrow",
"uarr",
"uparrow",
"uArr",
"Uparrow",
"rarr",
"to",
"rightarrow",
"rArr",
"Rightarrow",
"darr",
"downarrow",
"dArr",
"Downarrow",
"harr",
"leftrightarrow",
"hArr",
"Leftrightarrow",
"crarr",
"hookleftarrow",
"arccos",
"arcsin",
"arctan",
"arg",
"cos",
"cosh",
"cot",
"coth",
"csc",
"deg",
"det",
"dim",
"exp",
"gcd",
"hom",
"inf",
"ker",
"lg",
"lim",
"liminf",
"limsup",
"ln",
"log",
"max",
"min",
"Pr",
"sec",
"sin",
"sinh",
"sup",
"tan",
"tanh",
"bull",
"bullet",
"star",
"lowast",
"ast",
"odot",
"oplus",
"otimes",
"check",
"checkmark",
"para",
"ordf",
"ordm",
"cedil",
"oline",
"uml",
"zwnj",
"zwj",
"lrm",
"rlm",
"smiley",
"blacksmile",
"sad",
"frowny",
"clubs",
"clubsuit",
"spades",
"spadesuit",
"hearts",
"heartsuit",
"diams",
"diamondsuit",
"diamond",
"Diamond",
"loz",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
"_ ",
];
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn entity<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Entity<'s>> {
let (remaining, _) = tag("\\")(input)?;
let (remaining, entity_name) = name(context, remaining)?;
let (remaining, _trailing_whitespace) =
maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
Entity {
source: source.into(),
entity_name: entity_name.into(),
},
))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn name<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
// TODO: This should be defined by org-entities and optionally org-entities-user
for entity in ORG_ENTITIES {
let result = tuple((
tag::<_, _, CustomError<_>>(entity),
alt((tag("{}"), peek(recognize(entity_end)))),
))(input);
match result {
Ok((remaining, (ent, _))) => {
return Ok((remaining, ent));
}
Err(_) => {}
}
}
Err(nom::Err::Error(CustomError::MyError(MyError(
"NoEntity".into(),
))))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn entity_end<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
let (remaining, _) = alt((eof, recognize(satisfy(|c| !c.is_alphabetic()))))(input)?;
Ok((remaining, ()))
}