From 3206027b965df347d9a752318187d645cdabdce3 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 29 Aug 2023 15:10:27 -0400 Subject: [PATCH] Add all entities. --- build.rs | 1 + elisp_snippets/README.md | 1 + elisp_snippets/dump_org_entities.el | 5 + src/parser/entity.rs | 448 +++++++++++++++++++++++++++- tests/test_template | 2 +- 5 files changed, 441 insertions(+), 16 deletions(-) create mode 100644 elisp_snippets/README.md create mode 100644 elisp_snippets/dump_org_entities.el diff --git a/build.rs b/build.rs index c8b3c1dd..284d86df 100644 --- a/build.rs +++ b/build.rs @@ -75,6 +75,7 @@ fn is_expect_fail(name: &str) -> Option<&str> { "autogen_greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."), "autogen_element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."), "autogen_lesser_element_paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), + "autogen_sections_and_headings_empty_section" => Some("We are not yet handling empty sections properly."), _ => None, } } diff --git a/elisp_snippets/README.md b/elisp_snippets/README.md new file mode 100644 index 00000000..e21bab12 --- /dev/null +++ b/elisp_snippets/README.md @@ -0,0 +1 @@ +This folder is for snippets of elisp that are useful for development. diff --git a/elisp_snippets/dump_org_entities.el b/elisp_snippets/dump_org_entities.el new file mode 100644 index 00000000..530d6c3b --- /dev/null +++ b/elisp_snippets/dump_org_entities.el @@ -0,0 +1,5 @@ +(dolist (var org-entities) + (when (listp var) + (message "\"%s\"," (nth 0 var)) + ) + ) diff --git a/src/parser/entity.rs b/src/parser/entity.rs index 1774d58b..8bd7186e 100644 --- a/src/parser/entity.rs +++ b/src/parser/entity.rs @@ -9,11 +9,428 @@ use nom::combinator::recognize; use super::org_source::OrgSource; use super::Context; +use crate::error::CustomError; +use crate::error::MyError; use crate::error::Res; use crate::parser::object::Entity; -use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::get_consumed; +const ENTITIES: [&'static str; 413] = [ + "Agrave", + "agrave", + "Aacute", + "aacute", + "Acirc", + "acirc", + "Amacr", + "amacr", + "Atilde", + "atilde", + "Auml", + "auml", + "Aring", + "AA", + "aring", + "AElig", + "aelig", + "Ccedil", + "ccedil", + "Egrave", + "egrave", + "Eacute", + "eacute", + "Ecirc", + "ecirc", + "Euml", + "euml", + "Igrave", + "igrave", + "Iacute", + "iacute", + "Idot", + "inodot", + "Icirc", + "icirc", + "Iuml", + "iuml", + "Ntilde", + "ntilde", + "Ograve", + "ograve", + "Oacute", + "oacute", + "Ocirc", + "ocirc", + "Otilde", + "otilde", + "Ouml", + "ouml", + "Oslash", + "oslash", + "OElig", + "oelig", + "Scaron", + "scaron", + "szlig", + "Ugrave", + "ugrave", + "Uacute", + "uacute", + "Ucirc", + "ucirc", + "Uuml", + "uuml", + "Yacute", + "yacute", + "Yuml", + "yuml", + "fnof", + "real", + "image", + "weierp", + "ell", + "imath", + "jmath", + "Alpha", + "alpha", + "Beta", + "beta", + "Gamma", + "gamma", + "Delta", + "delta", + "Epsilon", + "epsilon", + "varepsilon", + "Zeta", + "zeta", + "Eta", + "eta", + "Theta", + "theta", + "thetasym", + "vartheta", + "Iota", + "iota", + "Kappa", + "kappa", + "Lambda", + "lambda", + "Mu", + "mu", + "nu", + "Nu", + "Xi", + "xi", + "Omicron", + "omicron", + "Pi", + "pi", + "Rho", + "rho", + "Sigma", + "sigma", + "sigmaf", + "varsigma", + "Tau", + "Upsilon", + "upsih", + "upsilon", + "Phi", + "phi", + "varphi", + "Chi", + "chi", + "acutex", + "Psi", + "psi", + "tau", + "Omega", + "omega", + "piv", + "varpi", + "partial", + "alefsym", + "aleph", + "gimel", + "beth", + "dalet", + "ETH", + "eth", + "THORN", + "thorn", + "dots", + "cdots", + "hellip", + "middot", + "iexcl", + "iquest", + "shy", + "ndash", + "mdash", + "quot", + "acute", + "ldquo", + "rdquo", + "bdquo", + "lsquo", + "rsquo", + "sbquo", + "laquo", + "raquo", + "lsaquo", + "rsaquo", + "circ", + "vert", + "vbar", + "brvbar", + "S", + "sect", + "amp", + "lt", + "gt", + "tilde", + "slash", + "plus", + "under", + "equal", + "asciicirc", + "dagger", + "dag", + "Dagger", + "ddag", + "nbsp", + "ensp", + "emsp", + "thinsp", + "curren", + "cent", + "pound", + "yen", + "euro", + "EUR", + "dollar", + "USD", + "copy", + "reg", + "trade", + "minus", + "pm", + "plusmn", + "times", + "frasl", + "colon", + "div", + "frac12", + "frac14", + "frac34", + "permil", + "sup1", + "sup2", + "sup3", + "radic", + "sum", + "prod", + "micro", + "macr", + "deg", + "prime", + "Prime", + "infin", + "infty", + "prop", + "propto", + "not", + "neg", + "land", + "wedge", + "lor", + "vee", + "cap", + "cup", + "smile", + "frown", + "int", + "therefore", + "there4", + "because", + "sim", + "cong", + "simeq", + "asymp", + "approx", + "ne", + "neq", + "equiv", + "triangleq", + "le", + "leq", + "ge", + "geq", + "lessgtr", + "lesseqgtr", + "ll", + "Ll", + "lll", + "gg", + "Gg", + "ggg", + "prec", + "preceq", + "preccurlyeq", + "succ", + "succeq", + "succcurlyeq", + "sub", + "subset", + "sup", + "supset", + "nsub", + "sube", + "nsup", + "supe", + "setminus", + "forall", + "exist", + "exists", + "nexist", + "nexists", + "empty", + "emptyset", + "isin", + "in", + "notin", + "ni", + "nabla", + "ang", + "angle", + "perp", + "parallel", + "sdot", + "cdot", + "lceil", + "rceil", + "lfloor", + "rfloor", + "lang", + "rang", + "langle", + "rangle", + "hbar", + "mho", + "larr", + "leftarrow", + "gets", + "lArr", + "Leftarrow", + "uarr", + "uparrow", + "uArr", + "Uparrow", + "rarr", + "to", + "rightarrow", + "rArr", + "Rightarrow", + "darr", + "downarrow", + "dArr", + "Downarrow", + "harr", + "leftrightarrow", + "hArr", + "Leftrightarrow", + "crarr", + "hookleftarrow", + "arccos", + "arcsin", + "arctan", + "arg", + "cos", + "cosh", + "cot", + "coth", + "csc", + "deg", + "det", + "dim", + "exp", + "gcd", + "hom", + "inf", + "ker", + "lg", + "lim", + "liminf", + "limsup", + "ln", + "log", + "max", + "min", + "Pr", + "sec", + "sin", + "sinh", + "sup", + "tan", + "tanh", + "bull", + "bullet", + "star", + "lowast", + "ast", + "odot", + "oplus", + "otimes", + "check", + "checkmark", + "para", + "ordf", + "ordm", + "cedil", + "oline", + "uml", + "zwnj", + "zwj", + "lrm", + "rlm", + "smiley", + "blacksmile", + "sad", + "frowny", + "clubs", + "clubsuit", + "spades", + "spadesuit", + "hearts", + "heartsuit", + "diams", + "diamondsuit", + "diamond", + "Diamond", + "loz", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", + "_ ", +]; + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn entity<'r, 's>( context: Context<'r, 's>, @@ -21,10 +438,7 @@ pub fn entity<'r, 's>( ) -> Res, Entity<'s>> { let (remaining, _) = tag("\\")(input)?; let (remaining, entity_name) = name(context, remaining)?; - let (remaining, _) = alt(( - tag("{}"), - peek(recognize(parser_with_context!(entity_end)(context))), - ))(remaining)?; + let (remaining, _) = alt((tag("{}"), peek(recognize(entity_end))))(remaining)?; let (remaining, _) = space0(remaining)?; let source = get_consumed(input, remaining); @@ -43,20 +457,24 @@ fn name<'r, 's>( input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { // TODO: This should be defined by org-entities and optionally org-entities-user + for entity in ENTITIES { + // foo + let result = tag_no_case::<_, _, CustomError<_>>(entity)(input); + match result { + Ok((remaining, ent)) => { + return Ok((remaining, ent)); + } + Err(_) => {} + } + } - // TODO: Add the rest of the entities, this is a very incomplete list - let (remaining, proto) = alt((alt(( - tag_no_case("delta"), - tag_no_case("pi"), - tag_no_case("ast"), - tag_no_case("lt"), - tag_no_case("gt"), - )),))(input)?; - Ok((remaining, proto)) + Err(nom::Err::Error(CustomError::MyError(MyError( + "NoEntity".into(), + )))) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn entity_end<'r, 's>(_context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { +fn entity_end<'s>(input: OrgSource<'s>) -> Res, ()> { let (remaining, _) = alt((eof, recognize(satisfy(|c| !c.is_alphabetic()))))(input)?; Ok((remaining, ())) diff --git a/tests/test_template b/tests/test_template index 442db660..a8b80318 100644 --- a/tests/test_template +++ b/tests/test_template @@ -11,7 +11,7 @@ fn {name}() {{ let diff_result = compare_document(&parsed_sexp, &rust_parsed).expect("Compare parsed documents."); diff_result - .print() + .print(org_contents.as_str()) .expect("Print document parse tree diff."); assert!(!diff_result.is_bad()); assert_eq!(remaining, "");