From f79d07a7c8f03c4c325ada7d23a89efb87ae8fe7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 6 Sep 2023 19:49:04 -0400 Subject: [PATCH 01/45] Compare howard abrams dotfiles. --- docker/organic_test/Dockerfile | 4 ++++ docker/organic_test/foreign_document_test_entrypoint.sh | 1 + 2 files changed, 5 insertions(+) diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index 246f1677..a9c39f0f 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -36,7 +36,10 @@ ENTRYPOINT ["cargo", "test"] FROM build as foreign-document-gather +ARG HOWARD_ABRAMS_DOT_FILES_VERSION=1b54fe75d74670dc7bcbb6b01ea560c45528c628 +ARG HOWARD_ABRAMS_DOT_FILES_PATH=/foreign_documents/howardabrams/dot-files RUN mkdir /foreign_documents +RUN mkdir -p $HOWARD_ABRAMS_DOT_FILES_PATH && git -C $HOWARD_ABRAMS_DOT_FILES_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DOT_FILES_PATH remote add origin https://github.com/howardabrams/dot-files.git && git -C $HOWARD_ABRAMS_DOT_FILES_PATH fetch origin $HOWARD_ABRAMS_DOT_FILES_VERSION && git -C $HOWARD_ABRAMS_DOT_FILES_PATH checkout FETCH_HEAD FROM tester as foreign-document-test @@ -44,6 +47,7 @@ RUN apk add --no-cache bash coreutils RUN mkdir /foreign_documents COPY --from=build-org-mode /root/org-mode /foreign_documents/org-mode COPY --from=build-emacs /root/emacs /foreign_documents/emacs +COPY --from=foreign-document-gather /foreign_documents/howardabrams/dot-files /foreign_documents/howardabrams/dot-files COPY foreign_document_test_entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/organic_test/foreign_document_test_entrypoint.sh b/docker/organic_test/foreign_document_test_entrypoint.sh index f722c4da..b6ba9b12 100644 --- a/docker/organic_test/foreign_document_test_entrypoint.sh +++ b/docker/organic_test/foreign_document_test_entrypoint.sh @@ -27,6 +27,7 @@ function main { run_compare_function "org-mode" compare_all_org_document "/foreign_documents/org-mode" run_compare_function "emacs" compare_all_org_document "/foreign_documents/emacs" + run_compare_function "howard_abrams_dot_files" compare_all_org_document "/foreign_documents/howardabrams/dot-files" } function green_text { From dda2b1e69f016b8b23a199eda28906d15c75d619 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 6 Sep 2023 20:56:36 -0400 Subject: [PATCH 02/45] Compare howard abrams hamacs. --- docker/organic_test/Dockerfile | 9 +++- .../foreign_document_test_entrypoint.sh | 44 ++++++++++++++++--- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index a9c39f0f..11541588 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -38,8 +38,13 @@ ENTRYPOINT ["cargo", "test"] FROM build as foreign-document-gather ARG HOWARD_ABRAMS_DOT_FILES_VERSION=1b54fe75d74670dc7bcbb6b01ea560c45528c628 ARG HOWARD_ABRAMS_DOT_FILES_PATH=/foreign_documents/howardabrams/dot-files +ARG HOWARD_ABRAMS_DOT_FILES_REPO=https://github.com/howardabrams/dot-files.git RUN mkdir /foreign_documents -RUN mkdir -p $HOWARD_ABRAMS_DOT_FILES_PATH && git -C $HOWARD_ABRAMS_DOT_FILES_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DOT_FILES_PATH remote add origin https://github.com/howardabrams/dot-files.git && git -C $HOWARD_ABRAMS_DOT_FILES_PATH fetch origin $HOWARD_ABRAMS_DOT_FILES_VERSION && git -C $HOWARD_ABRAMS_DOT_FILES_PATH checkout FETCH_HEAD +RUN mkdir -p $HOWARD_ABRAMS_DOT_FILES_PATH && git -C $HOWARD_ABRAMS_DOT_FILES_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DOT_FILES_PATH remote add origin $HOWARD_ABRAMS_DOT_FILES_REPO && git -C $HOWARD_ABRAMS_DOT_FILES_PATH fetch origin $HOWARD_ABRAMS_DOT_FILES_VERSION && git -C $HOWARD_ABRAMS_DOT_FILES_PATH checkout FETCH_HEAD +ARG HOWARD_ABRAMS_HAMACS_VERSION=da51188cc195d41882175d412fe40a8bc5730c5c +ARG HOWARD_ABRAMS_HAMACS_PATH=/foreign_documents/howardabrams/hamacs +ARG HOWARD_ABRAMS_HAMACS_REPO=https://github.com/howardabrams/hamacs.git +RUN mkdir -p $HOWARD_ABRAMS_HAMACS_PATH && git -C $HOWARD_ABRAMS_HAMACS_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_HAMACS_PATH remote add origin $HOWARD_ABRAMS_HAMACS_REPO && git -C $HOWARD_ABRAMS_HAMACS_PATH fetch origin $HOWARD_ABRAMS_HAMACS_VERSION && git -C $HOWARD_ABRAMS_HAMACS_PATH checkout FETCH_HEAD FROM tester as foreign-document-test @@ -47,7 +52,7 @@ RUN apk add --no-cache bash coreutils RUN mkdir /foreign_documents COPY --from=build-org-mode /root/org-mode /foreign_documents/org-mode COPY --from=build-emacs /root/emacs /foreign_documents/emacs -COPY --from=foreign-document-gather /foreign_documents/howardabrams/dot-files /foreign_documents/howardabrams/dot-files +COPY --from=foreign-document-gather /foreign_documents/howardabrams /foreign_documents/howardabrams COPY foreign_document_test_entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/organic_test/foreign_document_test_entrypoint.sh b/docker/organic_test/foreign_document_test_entrypoint.sh index b6ba9b12..8e561973 100644 --- a/docker/organic_test/foreign_document_test_entrypoint.sh +++ b/docker/organic_test/foreign_document_test_entrypoint.sh @@ -25,9 +25,23 @@ function main { fi PARSE="${CARGO_TARGET_DIR}/release-lto/parse" - run_compare_function "org-mode" compare_all_org_document "/foreign_documents/org-mode" - run_compare_function "emacs" compare_all_org_document "/foreign_documents/emacs" - run_compare_function "howard_abrams_dot_files" compare_all_org_document "/foreign_documents/howardabrams/dot-files" + local all_status=0 + set +e + + (run_compare_function "org-mode" compare_all_org_document "/foreign_documents/org-mode") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "emacs" compare_all_org_document "/foreign_documents/emacs") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "howard_abrams" compare_howard_abrams) + if [ "$?" -ne 0 ]; then all_status=1; fi + + set -e + if [ "$all_status" -ne 0 ]; then + echo "$(red_text "Some tests failed.")" + else + echo "$(green_text "All tests passed.")" + fi + return "$all_status" } function green_text { @@ -73,17 +87,22 @@ function run_compare_function { function compare_all_org_document { local root_dir="$1" local target_document - find "$root_dir" -type f -iname '*.org' | while read target_document; do + local all_status=0 + while read target_document; do local relative_path=$($REALPATH --relative-to "$root_dir" "$target_document") + set +e (run_compare "$relative_path" "$target_document") - done + if [ "$?" -ne 0 ]; then all_status=1; fi + set -e + done<<<$(find "$root_dir" -type f -iname '*.org') + return "$all_status" } function run_compare { local name="$1" local target_document="$2" set +e - $PARSE "$target_document" &> /dev/null + ($PARSE "$target_document" &> /dev/null) local status=$? set -e if [ "$status" -eq 0 ]; then @@ -94,4 +113,17 @@ function run_compare { fi } +function compare_howard_abrams { + local all_status=0 + set +e + + (run_compare_function "dot_files" compare_all_org_document "/foreign_documents/howardabrams/dot-files") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "hamacs" compare_all_org_document "/foreign_documents/howardabrams/hamacs") + if [ "$?" -ne 0 ]; then all_status=1; fi + + set -e + return "$all_status" +} + main "${@}" From fcea7e5a4bcf1ebf637413907f3d90e5ee94045a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 6 Sep 2023 21:11:46 -0400 Subject: [PATCH 03/45] Add howard abrams demo-it and the upstreeam doomemacs repo to compare. --- docker/organic_test/Dockerfile | 13 +++++++++++++ .../foreign_document_test_entrypoint.sh | 6 +++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index 11541588..b62517e7 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -36,16 +36,28 @@ ENTRYPOINT ["cargo", "test"] FROM build as foreign-document-gather + ARG HOWARD_ABRAMS_DOT_FILES_VERSION=1b54fe75d74670dc7bcbb6b01ea560c45528c628 ARG HOWARD_ABRAMS_DOT_FILES_PATH=/foreign_documents/howardabrams/dot-files ARG HOWARD_ABRAMS_DOT_FILES_REPO=https://github.com/howardabrams/dot-files.git RUN mkdir /foreign_documents RUN mkdir -p $HOWARD_ABRAMS_DOT_FILES_PATH && git -C $HOWARD_ABRAMS_DOT_FILES_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DOT_FILES_PATH remote add origin $HOWARD_ABRAMS_DOT_FILES_REPO && git -C $HOWARD_ABRAMS_DOT_FILES_PATH fetch origin $HOWARD_ABRAMS_DOT_FILES_VERSION && git -C $HOWARD_ABRAMS_DOT_FILES_PATH checkout FETCH_HEAD + ARG HOWARD_ABRAMS_HAMACS_VERSION=da51188cc195d41882175d412fe40a8bc5730c5c ARG HOWARD_ABRAMS_HAMACS_PATH=/foreign_documents/howardabrams/hamacs ARG HOWARD_ABRAMS_HAMACS_REPO=https://github.com/howardabrams/hamacs.git RUN mkdir -p $HOWARD_ABRAMS_HAMACS_PATH && git -C $HOWARD_ABRAMS_HAMACS_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_HAMACS_PATH remote add origin $HOWARD_ABRAMS_HAMACS_REPO && git -C $HOWARD_ABRAMS_HAMACS_PATH fetch origin $HOWARD_ABRAMS_HAMACS_VERSION && git -C $HOWARD_ABRAMS_HAMACS_PATH checkout FETCH_HEAD +ARG HOWARD_ABRAMS_DEMO_IT_VERSION=e399fd7ceb73caeae7cb50b247359bafcaee2a3f +ARG HOWARD_ABRAMS_DEMO_IT_PATH=/foreign_documents/howardabrams/demo-it +ARG HOWARD_ABRAMS_DEMO_IT_REPO=https://github.com/howardabrams/demo-it.git +RUN mkdir -p $HOWARD_ABRAMS_DEMO_IT_PATH && git -C $HOWARD_ABRAMS_DEMO_IT_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DEMO_IT_PATH remote add origin $HOWARD_ABRAMS_DEMO_IT_REPO && git -C $HOWARD_ABRAMS_DEMO_IT_PATH fetch origin $HOWARD_ABRAMS_DEMO_IT_VERSION && git -C $HOWARD_ABRAMS_DEMO_IT_PATH checkout FETCH_HEAD + +ARG DOOMEMACS_VERSION=42d5fd83504f8aa80f3248036006fbcd49222943 +ARG DOOMEMACS_PATH=/foreign_documents/doomemacs +ARG DOOMEMACS_REPO=https://github.com/doomemacs/doomemacs.git +RUN mkdir -p $DOOMEMACS_PATH && git -C $DOOMEMACS_PATH init --initial-branch=main && git -C $DOOMEMACS_PATH remote add origin $DOOMEMACS_REPO && git -C $DOOMEMACS_PATH fetch origin $DOOMEMACS_VERSION && git -C $DOOMEMACS_PATH checkout FETCH_HEAD + FROM tester as foreign-document-test RUN apk add --no-cache bash coreutils @@ -53,6 +65,7 @@ RUN mkdir /foreign_documents COPY --from=build-org-mode /root/org-mode /foreign_documents/org-mode COPY --from=build-emacs /root/emacs /foreign_documents/emacs COPY --from=foreign-document-gather /foreign_documents/howardabrams /foreign_documents/howardabrams +COPY --from=foreign-document-gather /foreign_documents/doomemacs /foreign_documents/doomemacs COPY foreign_document_test_entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/organic_test/foreign_document_test_entrypoint.sh b/docker/organic_test/foreign_document_test_entrypoint.sh index 8e561973..1fcde1f8 100644 --- a/docker/organic_test/foreign_document_test_entrypoint.sh +++ b/docker/organic_test/foreign_document_test_entrypoint.sh @@ -34,6 +34,8 @@ function main { if [ "$?" -ne 0 ]; then all_status=1; fi (run_compare_function "howard_abrams" compare_howard_abrams) if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "doomemacs" compare_all_org_document "/foreign_documents/doomemacs") + if [ "$?" -ne 0 ]; then all_status=1; fi set -e if [ "$all_status" -ne 0 ]; then @@ -117,10 +119,12 @@ function compare_howard_abrams { local all_status=0 set +e - (run_compare_function "dot_files" compare_all_org_document "/foreign_documents/howardabrams/dot-files") + (run_compare_function "dot-files" compare_all_org_document "/foreign_documents/howardabrams/dot-files") if [ "$?" -ne 0 ]; then all_status=1; fi (run_compare_function "hamacs" compare_all_org_document "/foreign_documents/howardabrams/hamacs") if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "demo-it" compare_all_org_document "/foreign_documents/howardabrams/demo-it") + if [ "$?" -ne 0 ]; then all_status=1; fi set -e return "$all_status" From 827f3e1c98230e6f2196ca4f3ce831f963538177 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 6 Sep 2023 21:37:09 -0400 Subject: [PATCH 04/45] Add the rest of the relevant howard abrams repos. --- docker/organic_test/Dockerfile | 30 +++++++++++++++++++ .../foreign_document_test_entrypoint.sh | 12 ++++++++ 2 files changed, 42 insertions(+) diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index b62517e7..d3ab62c0 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -53,6 +53,36 @@ ARG HOWARD_ABRAMS_DEMO_IT_PATH=/foreign_documents/howardabrams/demo-it ARG HOWARD_ABRAMS_DEMO_IT_REPO=https://github.com/howardabrams/demo-it.git RUN mkdir -p $HOWARD_ABRAMS_DEMO_IT_PATH && git -C $HOWARD_ABRAMS_DEMO_IT_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DEMO_IT_PATH remote add origin $HOWARD_ABRAMS_DEMO_IT_REPO && git -C $HOWARD_ABRAMS_DEMO_IT_PATH fetch origin $HOWARD_ABRAMS_DEMO_IT_VERSION && git -C $HOWARD_ABRAMS_DEMO_IT_PATH checkout FETCH_HEAD +ARG HOWARD_ABRAMS_MAGIT_DEMO_VERSION=59e82f6bc7c18f550478d86a8f680c3f2da66985 +ARG HOWARD_ABRAMS_MAGIT_DEMO_PATH=/foreign_documents/howardabrams/magit-demo +ARG HOWARD_ABRAMS_MAGIT_DEMO_REPO=https://github.com/howardabrams/magit-demo.git +RUN mkdir -p $HOWARD_ABRAMS_MAGIT_DEMO_PATH && git -C $HOWARD_ABRAMS_MAGIT_DEMO_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_MAGIT_DEMO_PATH remote add origin $HOWARD_ABRAMS_MAGIT_DEMO_REPO && git -C $HOWARD_ABRAMS_MAGIT_DEMO_PATH fetch origin $HOWARD_ABRAMS_MAGIT_DEMO_VERSION && git -C $HOWARD_ABRAMS_MAGIT_DEMO_PATH checkout FETCH_HEAD + +ARG HOWARD_ABRAMS_PDX_EMACS_HACKERS_VERSION=bfb7bd640fdf0ce3def21f9fc591ed35d776b26d +ARG HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH=/foreign_documents/howardabrams/pdx-emacs-hackers +ARG HOWARD_ABRAMS_PDX_EMACS_HACKERS_REPO=https://github.com/howardabrams/pdx-emacs-hackers.git +RUN mkdir -p $HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH && git -C $HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH remote add origin $HOWARD_ABRAMS_PDX_EMACS_HACKERS_REPO && git -C $HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH fetch origin $HOWARD_ABRAMS_PDX_EMACS_HACKERS_VERSION && git -C $HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH checkout FETCH_HEAD + +ARG HOWARD_ABRAMS_FLORA_SIMULATOR_VERSION=50de13068722b9e3878f8598b749b7ccd14e7f8e +ARG HOWARD_ABRAMS_FLORA_SIMULATOR_PATH=/foreign_documents/howardabrams/flora-simulator +ARG HOWARD_ABRAMS_FLORA_SIMULATOR_REPO=https://github.com/howardabrams/flora-simulator.git +RUN mkdir -p $HOWARD_ABRAMS_FLORA_SIMULATOR_PATH && git -C $HOWARD_ABRAMS_FLORA_SIMULATOR_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_FLORA_SIMULATOR_PATH remote add origin $HOWARD_ABRAMS_FLORA_SIMULATOR_REPO && git -C $HOWARD_ABRAMS_FLORA_SIMULATOR_PATH fetch origin $HOWARD_ABRAMS_FLORA_SIMULATOR_VERSION && git -C $HOWARD_ABRAMS_FLORA_SIMULATOR_PATH checkout FETCH_HEAD + +ARG HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_VERSION=2d7a5e41001a1adf7ec24aeb6acc8525a72d7892 +ARG HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH=/foreign_documents/howardabrams/literate-devops-demo +ARG HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_REPO=https://github.com/howardabrams/literate-devops-demo.git +RUN mkdir -p $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH && git -C $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH remote add origin $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_REPO && git -C $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH fetch origin $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_VERSION && git -C $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH checkout FETCH_HEAD + +ARG HOWARD_ABRAMS_CLOJURE_YESQL_XP_VERSION=b651c7f8b47b2710e99fce9652980902bbc1c6c9 +ARG HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH=/foreign_documents/howardabrams/clojure-yesql-xp +ARG HOWARD_ABRAMS_CLOJURE_YESQL_XP_REPO=https://github.com/howardabrams/clojure-yesql-xp.git +RUN mkdir -p $HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH && git -C $HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH remote add origin $HOWARD_ABRAMS_CLOJURE_YESQL_XP_REPO && git -C $HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH fetch origin $HOWARD_ABRAMS_CLOJURE_YESQL_XP_VERSION && git -C $HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH checkout FETCH_HEAD + +ARG HOWARD_ABRAMS_VEEP_VERSION=e37fcf63a5c4a526255735ee34955528b3b280ae +ARG HOWARD_ABRAMS_VEEP_PATH=/foreign_documents/howardabrams/veep +ARG HOWARD_ABRAMS_VEEP_REPO=https://github.com/howardabrams/veep.git +RUN mkdir -p $HOWARD_ABRAMS_VEEP_PATH && git -C $HOWARD_ABRAMS_VEEP_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_VEEP_PATH remote add origin $HOWARD_ABRAMS_VEEP_REPO && git -C $HOWARD_ABRAMS_VEEP_PATH fetch origin $HOWARD_ABRAMS_VEEP_VERSION && git -C $HOWARD_ABRAMS_VEEP_PATH checkout FETCH_HEAD + ARG DOOMEMACS_VERSION=42d5fd83504f8aa80f3248036006fbcd49222943 ARG DOOMEMACS_PATH=/foreign_documents/doomemacs ARG DOOMEMACS_REPO=https://github.com/doomemacs/doomemacs.git diff --git a/docker/organic_test/foreign_document_test_entrypoint.sh b/docker/organic_test/foreign_document_test_entrypoint.sh index 1fcde1f8..b1755d3a 100644 --- a/docker/organic_test/foreign_document_test_entrypoint.sh +++ b/docker/organic_test/foreign_document_test_entrypoint.sh @@ -125,6 +125,18 @@ function compare_howard_abrams { if [ "$?" -ne 0 ]; then all_status=1; fi (run_compare_function "demo-it" compare_all_org_document "/foreign_documents/howardabrams/demo-it") if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "magit-demo" compare_all_org_document "/foreign_documents/howardabrams/magit-demo") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "pdx-emacs-hackers" compare_all_org_document "/foreign_documents/howardabrams/pdx-emacs-hackers") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "flora-simulator" compare_all_org_document "/foreign_documents/howardabrams/flora-simulator") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "literate-devops-demo" compare_all_org_document "/foreign_documents/howardabrams/literate-devops-demo") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "clojure-yesql-xp" compare_all_org_document "/foreign_documents/howardabrams/clojure-yesql-xp") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "veep" compare_all_org_document "/foreign_documents/howardabrams/veep") + if [ "$?" -ne 0 ]; then all_status=1; fi set -e return "$all_status" From facbe716e9c8ec6f47d30df9aa4cc7271bc7e4f8 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 01:23:15 -0400 Subject: [PATCH 05/45] Cleanup --- src/parser/text_markup.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 48f6eea9..922d3f6a 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -179,7 +179,8 @@ fn _text_markup_object<'b, 'g, 'r, 's, 'c>( ) -> Res, Vec>> { let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; - let (remaining, _peek_not_whitespace) = peek(not(multispace1))(remaining)?; + let (remaining, _peek_not_whitespace) = + peek(verify(anychar, |c| !c.is_whitespace() && *c != '\u{200B}'))(remaining)?; let text_markup_end_specialized = text_markup_end(open.into()); let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { class: ExitClass::Gamma, @@ -277,7 +278,6 @@ pub fn pre<'b, 'g, 'r, 's>( None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(') | Some('{') | Some('\'') | Some('"') | Some('<') => {} Some(_) => { - // Not at start of line, cannot be a heading return Err(nom::Err::Error(CustomError::MyError(MyError( "Not a valid pre character for text markup.".into(), )))); From 6676012eb15c77438bed71b9b7715d28279da101 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 01:45:02 -0400 Subject: [PATCH 06/45] Change footnote reference class to Gamma. --- .../object/footnote_reference/nested_footnote_references.org | 3 +++ src/parser/footnote_reference.rs | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 org_mode_samples/object/footnote_reference/nested_footnote_references.org diff --git a/org_mode_samples/object/footnote_reference/nested_footnote_references.org b/org_mode_samples/object/footnote_reference/nested_footnote_references.org new file mode 100644 index 00000000..310f174b --- /dev/null +++ b/org_mode_samples/object/footnote_reference/nested_footnote_references.org @@ -0,0 +1,3 @@ +*[fn:: /abcdef[fn::ghijklmnopqrstuvw]xyz/ r]* + +*[fn:: /abcdef[fn::ghijk *lmnopq* rstuvw]xyz/ r]* diff --git a/src/parser/footnote_reference.rs b/src/parser/footnote_reference.rs index 0f3738c2..28f8b527 100644 --- a/src/parser/footnote_reference.rs +++ b/src/parser/footnote_reference.rs @@ -42,7 +42,7 @@ fn anonymous_footnote<'b, 'g, 'r, 's>( let (remaining, _) = tag_no_case("[fn::")(input)?; let exit_with_depth = footnote_definition_end(remaining.get_bracket_depth()); let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Beta, + class: ExitClass::Gamma, exit_matcher: &exit_with_depth, }); let parser_context = context.with_additional_node(&parser_context); @@ -78,7 +78,7 @@ fn inline_footnote<'b, 'g, 'r, 's>( let (remaining, _) = tag(":")(remaining)?; let exit_with_depth = footnote_definition_end(remaining.get_bracket_depth()); let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Beta, + class: ExitClass::Gamma, exit_matcher: &exit_with_depth, }); let parser_context = context.with_additional_node(&parser_context); From 6b82b46e09602e3551e3c6390d718434024f06cb Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 02:01:34 -0400 Subject: [PATCH 07/45] Prevent nesting of text markup of the same type. This greatly reduces the amount of detect element calls that are occurring. --- src/context/context.rs | 3 +++ src/parser/text_markup.rs | 40 +++++++++++++++++++++++++++++---------- src/parser/util.rs | 15 ++++++++++++++- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/src/context/context.rs b/src/context/context.rs index 0f41963b..0baa2e8b 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -21,6 +21,9 @@ pub enum ContextElement<'r, 's> { /// Stores the name of the current element to prevent directly nesting elements of the same type. Context(&'r str), + /// Stores the name of the current object to prevent directly nesting elements of the same type. + ContextObject(&'r str), + /// Indicates if elements should consume the whitespace after them. ConsumeTrailingWhitespace(bool), diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 922d3f6a..d1313709 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -18,6 +18,7 @@ use tracing::span; use super::object_parser::standard_set_object; use super::org_source::OrgSource; use super::radio_link::RematchObject; +use super::util::in_object_section; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; use crate::context::parser_with_context; use crate::context::ContextElement; @@ -177,16 +178,26 @@ fn _text_markup_object<'b, 'g, 'r, 's, 'c>( input: OrgSource<'s>, marker_symbol: &'c str, ) -> Res, Vec>> { + if in_object_section(context, marker_symbol) { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Cannot nest objects of the same type".into(), + )))); + } + let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; let (remaining, _peek_not_whitespace) = peek(verify(anychar, |c| !c.is_whitespace() && *c != '\u{200B}'))(remaining)?; let text_markup_end_specialized = text_markup_end(open.into()); - let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Gamma, - exit_matcher: &text_markup_end_specialized, - }); - let parser_context = context.with_additional_node(&parser_context); + let contexts = [ + ContextElement::ContextObject(marker_symbol), + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Gamma, + exit_matcher: &text_markup_end_specialized, + }), + ]; + let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = parser_context.with_additional_node(&contexts[1]); let (remaining, (children, _exit_contents)) = verify( many_till( @@ -230,16 +241,25 @@ fn _text_markup_string<'b, 'g, 'r, 's, 'c>( input: OrgSource<'s>, marker_symbol: &'c str, ) -> Res, OrgSource<'s>> { + if in_object_section(context, marker_symbol) { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Cannot nest objects of the same type".into(), + )))); + } let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; let (remaining, _peek_not_whitespace) = peek(verify(anychar, |c| !c.is_whitespace() && *c != '\u{200B}'))(remaining)?; let text_markup_end_specialized = text_markup_end(open.into()); - let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Gamma, - exit_matcher: &text_markup_end_specialized, - }); - let parser_context = context.with_additional_node(&parser_context); + let contexts = [ + ContextElement::ContextObject(marker_symbol), + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Gamma, + exit_matcher: &text_markup_end_specialized, + }), + ]; + let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = parser_context.with_additional_node(&contexts[1]); let (remaining, contents) = recognize(verify( many_till( diff --git a/src/parser/util.rs b/src/parser/util.rs index 32576ad2..c715a505 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -24,7 +24,6 @@ pub const WORD_CONSTITUENT_CHARACTERS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; /// Check if we are below a section of the given section type regardless of depth -#[allow(dead_code)] pub fn in_section<'b, 'g, 'r, 's, 'x>( context: RefContext<'b, 'g, 'r, 's>, section_name: &'x str, @@ -53,6 +52,20 @@ pub fn immediate_in_section<'b, 'g, 'r, 's, 'x>( false } +/// Check if we are below a section of the given section type regardless of depth +pub fn in_object_section<'b, 'g, 'r, 's, 'x>( + context: RefContext<'b, 'g, 'r, 's>, + section_name: &'x str, +) -> bool { + for thing in context.iter() { + match thing { + ContextElement::ContextObject(name) if *name == section_name => return true, + _ => {} + } + } + false +} + /// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. pub fn get_consumed<'s>(input: OrgSource<'s>, remaining: OrgSource<'s>) -> OrgSource<'s> { input.get_until(remaining) From ba291c677697b7ff48a6103f5411a6c0b1880c02 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 02:27:55 -0400 Subject: [PATCH 08/45] Unify two places checking if text was preceded by whitespace. --- src/parser/subscript_and_superscript.rs | 42 +++++++------------------ src/parser/text_markup.rs | 2 +- src/parser/util.rs | 15 +++++++-- 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index 9ad89069..00a76cdb 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -15,6 +15,7 @@ use super::org_source::BracketDepth; use super::org_source::OrgSource; use super::util::exit_matcher_parser; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; +use super::util::preceded_by_whitespace; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ContextMatcher; @@ -36,7 +37,7 @@ pub fn subscript<'b, 'g, 'r, 's>( ) -> Res, Subscript<'s>> { // We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. let (remaining, _) = tag("_")(input)?; - pre(context, input)?; + pre(input)?; let (remaining, _body) = script_body(context, remaining)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; @@ -56,7 +57,7 @@ pub fn superscript<'b, 'g, 'r, 's>( ) -> Res, Superscript<'s>> { // We check for the circumflex first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. let (remaining, _) = tag("^")(input)?; - pre(context, input)?; + pre(input)?; let (remaining, _body) = script_body(context, remaining)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; @@ -70,19 +71,8 @@ pub fn superscript<'b, 'g, 'r, 's>( } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn pre<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, ()> { - let preceding_character = input.get_preceding_character(); - match preceding_character { - Some(c) if !c.is_whitespace() => {} - _ => { - return Err(nom::Err::Error(CustomError::MyError(MyError( - "Must be preceded by a non-whitespace character.".into(), - )))); - } - }; +fn pre<'s>(input: OrgSource<'s>) -> Res, ()> { + not(preceded_by_whitespace(true))(input)?; Ok((input, ())) } @@ -120,37 +110,27 @@ fn script_asterisk<'b, 'g, 'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn script_alphanum<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, + _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { let (remaining, _sign) = opt(recognize(one_of("+-")))(input)?; - let (remaining, _script) = many_till( - parser_with_context!(script_alphanum_character)(context), - parser_with_context!(end_script_alphanum_character)(context), - )(remaining)?; + let (remaining, _script) = + many_till(script_alphanum_character, end_script_alphanum_character)(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, source)) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn script_alphanum_character<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { +fn script_alphanum_character<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { recognize(verify(anychar, |c| { c.is_alphanumeric() || r#",.\"#.contains(*c) }))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn end_script_alphanum_character<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { +fn end_script_alphanum_character<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?; - peek(not(parser_with_context!(script_alphanum_character)( - context, - )))(remaining)?; + peek(not(script_alphanum_character))(remaining)?; Ok((remaining, final_char)) } diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index d1313709..e36d1794 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -325,7 +325,7 @@ fn _text_markup_end<'b, 'g, 'r, 's, 'c>( input: OrgSource<'s>, marker_symbol: &'c str, ) -> Res, OrgSource<'s>> { - not(preceded_by_whitespace)(input)?; + not(preceded_by_whitespace(false))(input)?; let (remaining, _marker) = terminated( tag(marker_symbol), peek(parser_with_context!(post)(context)), diff --git a/src/parser/util.rs b/src/parser/util.rs index c715a505..6464fe87 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -135,16 +135,25 @@ pub fn start_of_line<'s>(input: OrgSource<'s>) -> Res, ()> { } } +pub fn preceded_by_whitespace( + allow_start_of_file: bool, +) -> impl for<'s> Fn(OrgSource<'s>) -> Res, ()> { + move |input| _preceded_by_whitespace(allow_start_of_file, input) +} + /// Check that we are at the start of a line #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn preceded_by_whitespace<'s>(input: OrgSource<'s>) -> Res, ()> { +fn _preceded_by_whitespace<'s>( + allow_start_of_file: bool, + input: OrgSource<'s>, +) -> Res, ()> { let preceding_character = input.get_preceding_character(); if !preceding_character .map(|c| c.is_whitespace() || c == '\u{200B}') // 200B = Zero-width space - .unwrap_or(false) + .unwrap_or(allow_start_of_file) { return Err(nom::Err::Error(CustomError::MyError(MyError( - "Not preceded by whitespace.".into(), + "Must be preceded by a non-whitespace character.".into(), )))); } Ok((input, ())) From 76a81b73ac4de8772da2d7e305e1e9cc3246e0c4 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 02:59:08 -0400 Subject: [PATCH 09/45] Add a detect object function similar to the detect element function. --- src/parser/object_parser.rs | 20 ++++++++++++++++++++ src/parser/plain_text.rs | 6 ++++-- src/parser/subscript_and_superscript.rs | 13 +++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index e3db5531..b65f525d 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -4,8 +4,11 @@ use nom::combinator::map; use super::org_source::OrgSource; use super::plain_text::plain_text; use super::regular_link::regular_link; +use super::subscript_and_superscript::detect_subscript_or_superscript; use crate::context::parser_with_context; use crate::context::RefContext; +use crate::error::CustomError; +use crate::error::MyError; use crate::error::Res; use crate::parser::angle_link::angle_link; use crate::parser::citation::citation; @@ -165,6 +168,23 @@ pub fn any_object_except_plain_text<'b, 'g, 'r, 's>( Ok((remaining, object)) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_any_object_except_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + if detect_subscript_or_superscript(input).is_ok() { + return Ok((input, ())); + } + if any_object_except_plain_text(context, input).is_ok() { + return Ok((input, ())); + } + + return Err(nom::Err::Error(CustomError::MyError(MyError( + "No object detected.".into(), + )))); +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn regular_link_description_object_set<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 1dbc2956..e57f4d1a 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -7,7 +7,7 @@ use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many_till; -use super::object_parser::any_object_except_plain_text; +use super::object_parser::detect_any_object_except_plain_text; use super::org_source::OrgSource; use super::radio_link::RematchObject; use super::util::exit_matcher_parser; @@ -46,7 +46,9 @@ fn plain_text_end<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { - recognize(parser_with_context!(any_object_except_plain_text)(context))(input) + recognize(parser_with_context!(detect_any_object_except_plain_text)( + context, + ))(input) } impl<'x> RematchObject<'x> for PlainText<'x> { diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index 00a76cdb..e2025e79 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -30,6 +30,19 @@ use crate::types::Object; use crate::types::Subscript; use crate::types::Superscript; +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_subscript_or_superscript<'s>(input: OrgSource<'s>) -> Res, ()> { + // This does not have to detect all valid subscript/superscript but all that it detects must be valid. + let (remaining, _) = one_of("_^")(input)?; + pre(input)?; + if tag::<_, _, CustomError<_>>("*")(remaining).is_ok() { + return Ok((input, ())); + } + let (remaining, _) = opt(one_of("+-"))(remaining)?; + let (_remaining, _) = verify(anychar, |c| c.is_alphanumeric())(remaining)?; + Ok((input, ())) +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn subscript<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, From 69512f559a2734780232adfa31542c052566b66c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 03:40:14 -0400 Subject: [PATCH 10/45] Fix end conditions for subscript and superscript. --- src/parser/subscript_and_superscript.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index e2025e79..c026c6a4 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -1,5 +1,6 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::bytes::complete::take_while; use nom::character::complete::anychar; use nom::character::complete::one_of; use nom::combinator::map; @@ -9,6 +10,7 @@ use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many_till; +use nom::sequence::tuple; use super::object_parser::standard_set_object; use super::org_source::BracketDepth; @@ -143,7 +145,10 @@ fn script_alphanum_character<'s>(input: OrgSource<'s>) -> Res, Org #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn end_script_alphanum_character<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?; - peek(not(script_alphanum_character))(remaining)?; + peek(tuple(( + take_while(|c| r#",.\"#.contains(c)), + not(script_alphanum_character), + )))(remaining)?; Ok((remaining, final_char)) } From b0930df7882cc559d54acb15f455dbc4d4bde049 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 04:15:17 -0400 Subject: [PATCH 11/45] Support zero skipped text in OrgSource slicing. --- src/parser/org_source.rs | 5 ++++- src/parser/util.rs | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parser/org_source.rs b/src/parser/org_source.rs index 87f93da6..820c01c1 100644 --- a/src/parser/org_source.rs +++ b/src/parser/org_source.rs @@ -145,6 +145,9 @@ where if new_end > self.end { panic!("Attempted to extend past the end of the WrappedInput.") } + if new_start == self.start && new_end == self.end { + return self.clone(); + } let skipped_text = &self.full_source[self.start..new_start]; let mut start_of_line = self.start_of_line; @@ -183,7 +186,7 @@ where start: new_start, end: new_end, start_of_line, - preceding_character: skipped_text.chars().last(), + preceding_character: skipped_text.chars().last().or(self.preceding_character), bracket_depth, brace_depth, parenthesis_depth, diff --git a/src/parser/util.rs b/src/parser/util.rs index 6464fe87..6625b865 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -153,7 +153,7 @@ fn _preceded_by_whitespace<'s>( .unwrap_or(allow_start_of_file) { return Err(nom::Err::Error(CustomError::MyError(MyError( - "Must be preceded by a non-whitespace character.".into(), + "Must be preceded by a whitespace character.".into(), )))); } Ok((input, ())) From c2eb1f51c8b641637963d4ff8242dcbaad27b1ae Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 12:41:48 -0400 Subject: [PATCH 12/45] Support blank lines between nested headlines. --- src/parser/document.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/document.rs b/src/parser/document.rs index 8c50dbd2..1c036d58 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -328,6 +328,7 @@ fn _heading<'b, 'g, 'r, 's>( let heading_matcher = parser_with_context!(heading(star_count))(context); let (remaining, maybe_section) = opt(map(section_matcher, DocumentElement::Section))(remaining)?; + let (remaining, _ws) = opt(tuple((start_of_line, many0(blank_line))))(remaining)?; let (remaining, mut children) = many0(map(heading_matcher, DocumentElement::Heading))(remaining)?; if let Some(section) = maybe_section { From 57c2922e4a149c11102831f9ebd385ea3015793b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 12:50:51 -0400 Subject: [PATCH 13/45] Add test showing problem is description list parser. --- .../plain_list/description_list_with_double_colon_in_tag.org | 1 + 1 file changed, 1 insertion(+) create mode 100644 org_mode_samples/greater_element/plain_list/description_list_with_double_colon_in_tag.org diff --git a/org_mode_samples/greater_element/plain_list/description_list_with_double_colon_in_tag.org b/org_mode_samples/greater_element/plain_list/description_list_with_double_colon_in_tag.org new file mode 100644 index 00000000..4ba3143e --- /dev/null +++ b/org_mode_samples/greater_element/plain_list/description_list_with_double_colon_in_tag.org @@ -0,0 +1 @@ +- =foo :: bar= :: baz From ab612f293f28f729217551d6579b380541105649 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 13:11:58 -0400 Subject: [PATCH 14/45] Update org-mode version. --- docker/organic_test/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index d3ab62c0..f0c31d70 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -14,7 +14,7 @@ RUN make DESTDIR="/root/dist" install FROM build AS build-org-mode -ARG ORG_VERSION=7bdec435ff5d86220d13c431e799c5ed44a57da1 +ARG ORG_VERSION=163bafb43dcc2bc94a2c7ccaa77d3d1dd488f1af COPY --from=build-emacs /root/dist/ / RUN mkdir /root/dist # Savannah does not allow fetching specific revisions, so we're going to have to put unnecessary load on their server by cloning main and then checking out the revision we want. From 40f22034da08e0b57df4f765c74cba297c1be70f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 14:02:15 -0400 Subject: [PATCH 15/45] Make the item tag exit matcher a lower class than all all others. This is to allow for " :: " inside a description list item's tag if it is nested inside another object. --- src/context/context.rs | 2 +- src/context/exiting.rs | 14 ++++---------- src/parser/plain_list.rs | 38 +++++++++++++++++++++++++------------- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/context/context.rs b/src/context/context.rs index 0baa2e8b..a485cdf7 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -108,7 +108,7 @@ impl<'g, 'r, 's> Context<'g, 'r, 's> { &'r self, i: OrgSource<'s>, ) -> IResult, OrgSource<'s>, CustomError>> { - let mut current_class_filter = ExitClass::Gamma; + let mut current_class_filter = ExitClass::Delta; for current_node in self.iter_context() { let context_element = current_node.get_data(); match context_element { diff --git a/src/context/exiting.rs b/src/context/exiting.rs index 6f8c359d..c989a335 100644 --- a/src/context/exiting.rs +++ b/src/context/exiting.rs @@ -1,16 +1,10 @@ #[derive(Debug, Copy, Clone)] pub enum ExitClass { - /// Headlines and sections. Document = 1, - - /// Elements who take priority over beta elements when matching. - Alpha = 20, - - /// Elements who cede priority to alpha elements when matching. - Beta = 300, - - /// Elements who cede priority to alpha and beta elements when matching. - Gamma = 4000, + Alpha = 2, + Beta = 3, + Gamma = 4, + Delta = 5, } impl std::fmt::Display for ExitClass { diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 9dbc117a..f74de757 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -152,11 +152,8 @@ pub fn plain_list_item<'b, 'g, 'r, 's>( // TODO: parse checkbox - let (remaining, maybe_tag) = opt(tuple(( - space1, - parser_with_context!(item_tag)(context), - tag(" ::"), - )))(remaining)?; + let (remaining, maybe_tag) = + opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)?; let maybe_contentless_item: Res, OrgSource<'_>> = peek(recognize(tuple((many0(blank_line), eof))))(remaining); match maybe_contentless_item { @@ -170,7 +167,7 @@ pub fn plain_list_item<'b, 'g, 'r, 's>( indentation: indent_level, bullet: bull.into(), tag: maybe_tag - .map(|(_ws, item_tag, _divider)| item_tag) + .map(|(_ws, item_tag)| item_tag) .unwrap_or(Vec::new()), children: Vec::new(), }, @@ -219,7 +216,7 @@ pub fn plain_list_item<'b, 'g, 'r, 's>( indentation: indent_level, bullet: bull.into(), tag: maybe_tag - .map(|(_ws, item_tag, _divider)| item_tag) + .map(|(_ws, item_tag)| item_tag) .unwrap_or(Vec::new()), children: children.into_iter().map(|(_start, item)| item).collect(), }, @@ -313,11 +310,18 @@ fn item_tag<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Vec>> { - let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Gamma, - exit_matcher: &item_tag_end, - }); - let parser_context = context.with_additional_node(&parser_context); + let contexts = [ + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Gamma, + exit_matcher: &item_tag_line_ending_end, + }), + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Delta, + exit_matcher: &item_tag_end, + }), + ]; + let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = parser_context.with_additional_node(&contexts[1]); let (remaining, (children, _exit_contents)) = verify( many_till( // TODO: Should this be using a different set like the minimal set? @@ -326,6 +330,7 @@ fn item_tag<'b, 'g, 'r, 's>( ), |(children, _exit_contents)| !children.is_empty(), )(input)?; + let (remaining, _) = tag(" ::")(remaining)?; Ok((remaining, children)) } @@ -335,12 +340,19 @@ fn item_tag_end<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { recognize(alt(( - line_ending, tag(" :: "), recognize(tuple((tag(" ::"), alt((line_ending, eof))))), )))(input) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn item_tag_line_ending_end<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + line_ending(input) +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_tag_post_gap<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, From 3cc22943879da4c882fbb67bf410d98d1e68ee9d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 15:05:42 -0400 Subject: [PATCH 16/45] Move headlines into their own file. --- src/parser/document.rs | 207 +------------------------------------- src/parser/headline.rs | 222 +++++++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 1 + 3 files changed, 226 insertions(+), 204 deletions(-) create mode 100644 src/parser/headline.rs diff --git a/src/parser/document.rs b/src/parser/document.rs index 1c036d58..44aca17f 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,23 +1,13 @@ -use nom::branch::alt; -use nom::bytes::complete::tag; -use nom::character::complete::anychar; -use nom::character::complete::line_ending; -use nom::character::complete::space0; -use nom::character::complete::space1; use nom::combinator::all_consuming; -use nom::combinator::eof; -use nom::combinator::map; -use nom::combinator::not; use nom::combinator::opt; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many0; -use nom::multi::many1; -use nom::multi::many1_count; use nom::multi::many_till; -use nom::multi::separated_list1; use nom::sequence::tuple; +use super::headline::detect_headline; +use super::headline::heading; use super::in_buffer_settings::apply_in_buffer_settings; use super::in_buffer_settings::scan_for_in_buffer_settings; use super::org_source::OrgSource; @@ -25,7 +15,6 @@ use super::token::AllTokensIterator; use super::token::Token; use super::util::exit_matcher_parser; use super::util::get_consumed; -use super::util::start_of_line; use crate::context::parser_with_context; use crate::context::Context; use crate::context::ContextElement; @@ -39,19 +28,15 @@ use crate::error::MyError; use crate::error::Res; use crate::parser::comment::comment; use crate::parser::element_parser::element; -use crate::parser::object_parser::standard_set_object; use crate::parser::org_source::convert_error; use crate::parser::planning::planning; use crate::parser::property_drawer::property_drawer; use crate::parser::util::blank_line; use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::types::Document; -use crate::types::DocumentElement; use crate::types::Element; -use crate::types::Heading; use crate::types::Object; use crate::types::Section; -use crate::types::TodoKeywordType; /// Parse a full org-mode document. /// @@ -245,7 +230,7 @@ fn zeroth_section<'b, 'g, 'r, 's>( } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn section<'b, 'g, 'r, 's>( +pub fn section<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, mut input: OrgSource<'s>, ) -> Res, Section<'s>> { @@ -306,192 +291,6 @@ fn section_end<'b, 'g, 'r, 's>( recognize(detect_headline)(input) } -const fn heading( - parent_stars: usize, -) -> impl for<'b, 'g, 'r, 's> Fn( - RefContext<'b, 'g, 'r, 's>, - OrgSource<'s>, -) -> Res, Heading<'s>> { - move |context, input: OrgSource<'_>| _heading(context, input, parent_stars) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn _heading<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, - parent_stars: usize, -) -> Res, Heading<'s>> { - not(|i| context.check_exit_matcher(i))(input)?; - let (remaining, (star_count, _ws, maybe_todo_keyword, title, heading_tags)) = - headline(context, input, parent_stars)?; - let section_matcher = parser_with_context!(section)(context); - let heading_matcher = parser_with_context!(heading(star_count))(context); - let (remaining, maybe_section) = - opt(map(section_matcher, DocumentElement::Section))(remaining)?; - let (remaining, _ws) = opt(tuple((start_of_line, many0(blank_line))))(remaining)?; - let (remaining, mut children) = - many0(map(heading_matcher, DocumentElement::Heading))(remaining)?; - if let Some(section) = maybe_section { - children.insert(0, section); - } - let remaining = if children.is_empty() { - // Support empty headings - let (remain, _ws) = many0(blank_line)(remaining)?; - remain - } else { - remaining - }; - - let source = get_consumed(input, remaining); - Ok(( - remaining, - Heading { - source: source.into(), - stars: star_count, - todo_keyword: maybe_todo_keyword.map(|((todo_keyword_type, todo_keyword), _ws)| { - (todo_keyword_type, Into::<&str>::into(todo_keyword)) - }), - title, - tags: heading_tags, - children, - }, - )) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn detect_headline<'s>(input: OrgSource<'s>) -> Res, ()> { - tuple((start_of_line, many1(tag("*")), space1))(input)?; - Ok((input, ())) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn headline<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, - parent_stars: usize, -) -> Res< - OrgSource<'s>, - ( - usize, - OrgSource<'s>, - Option<((TodoKeywordType, OrgSource<'s>), OrgSource<'s>)>, - Vec>, - Vec<&'s str>, - ), -> { - let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Document, - exit_matcher: &headline_title_end, - }); - let parser_context = context.with_additional_node(&parser_context); - - let ( - remaining, - (_sol, star_count, ws, maybe_todo_keyword, title, maybe_tags, _ws, _line_ending), - ) = tuple(( - start_of_line, - verify(many1_count(tag("*")), |star_count| { - *star_count > parent_stars - }), - space1, - opt(tuple(( - parser_with_context!(heading_keyword)(&parser_context), - space1, - ))), - many1(parser_with_context!(standard_set_object)(&parser_context)), - opt(tuple((space0, tags))), - space0, - alt((line_ending, eof)), - ))(input)?; - Ok(( - remaining, - ( - star_count, - ws, - maybe_todo_keyword, - title, - maybe_tags - .map(|(_ws, tags)| { - tags.into_iter() - .map(|single_tag| Into::<&str>::into(single_tag)) - .collect() - }) - .unwrap_or(Vec::new()), - ), - )) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn headline_title_end<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { - recognize(tuple(( - opt(tuple((space0, tags, space0))), - alt((line_ending, eof)), - )))(input) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn tags<'s>(input: OrgSource<'s>) -> Res, Vec>> { - let (remaining, (_open, tags, _close)) = - tuple((tag(":"), separated_list1(tag(":"), single_tag), tag(":")))(input)?; - Ok((remaining, tags)) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn single_tag<'r, 's>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { - recognize(many1(verify(anychar, |c| { - c.is_alphanumeric() || "_@#%".contains(*c) - })))(input) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn heading_keyword<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, (TodoKeywordType, OrgSource<'s>)> { - let global_settings = context.get_global_settings(); - if global_settings.in_progress_todo_keywords.is_empty() - && global_settings.complete_todo_keywords.is_empty() - { - alt(( - map(tag("TODO"), |capture| (TodoKeywordType::Todo, capture)), - map(tag("DONE"), |capture| (TodoKeywordType::Done, capture)), - ))(input) - } else { - for todo_keyword in global_settings - .in_progress_todo_keywords - .iter() - .map(String::as_str) - { - let result = tag::<_, _, CustomError<_>>(todo_keyword)(input); - match result { - Ok((remaining, ent)) => { - return Ok((remaining, (TodoKeywordType::Todo, ent))); - } - Err(_) => {} - } - } - for todo_keyword in global_settings - .complete_todo_keywords - .iter() - .map(String::as_str) - { - let result = tag::<_, _, CustomError<_>>(todo_keyword)(input); - match result { - Ok((remaining, ent)) => { - return Ok((remaining, (TodoKeywordType::Done, ent))); - } - Err(_) => {} - } - } - Err(nom::Err::Error(CustomError::MyError(MyError( - "NoTodoKeyword".into(), - )))) - } -} - impl<'s> Document<'s> { pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { AllTokensIterator::new(Token::Document(self)) diff --git a/src/parser/headline.rs b/src/parser/headline.rs new file mode 100644 index 00000000..0146badc --- /dev/null +++ b/src/parser/headline.rs @@ -0,0 +1,222 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::anychar; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::character::complete::space1; +use nom::combinator::eof; +use nom::combinator::map; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many0; +use nom::multi::many1; +use nom::multi::many1_count; +use nom::multi::separated_list1; +use nom::sequence::tuple; + +use super::document::section; +use super::org_source::OrgSource; +use super::util::get_consumed; +use super::util::start_of_line; +use crate::context::parser_with_context; +use crate::context::ContextElement; +use crate::context::ExitClass; +use crate::context::ExitMatcherNode; +use crate::context::RefContext; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; +use crate::parser::object_parser::standard_set_object; +use crate::parser::util::blank_line; +use crate::types::DocumentElement; +use crate::types::Heading; +use crate::types::Object; +use crate::types::TodoKeywordType; + +pub const fn heading( + parent_stars: usize, +) -> impl for<'b, 'g, 'r, 's> Fn( + RefContext<'b, 'g, 'r, 's>, + OrgSource<'s>, +) -> Res, Heading<'s>> { + move |context, input: OrgSource<'_>| _heading(context, input, parent_stars) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn _heading<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, + parent_stars: usize, +) -> Res, Heading<'s>> { + not(|i| context.check_exit_matcher(i))(input)?; + let (remaining, (star_count, _ws, maybe_todo_keyword, title, heading_tags)) = + headline(context, input, parent_stars)?; + let section_matcher = parser_with_context!(section)(context); + let heading_matcher = parser_with_context!(heading(star_count))(context); + let (remaining, maybe_section) = + opt(map(section_matcher, DocumentElement::Section))(remaining)?; + let (remaining, _ws) = opt(tuple((start_of_line, many0(blank_line))))(remaining)?; + let (remaining, mut children) = + many0(map(heading_matcher, DocumentElement::Heading))(remaining)?; + if let Some(section) = maybe_section { + children.insert(0, section); + } + let remaining = if children.is_empty() { + // Support empty headings + let (remain, _ws) = many0(blank_line)(remaining)?; + remain + } else { + remaining + }; + + let source = get_consumed(input, remaining); + Ok(( + remaining, + Heading { + source: source.into(), + stars: star_count, + todo_keyword: maybe_todo_keyword.map(|((todo_keyword_type, todo_keyword), _ws)| { + (todo_keyword_type, Into::<&str>::into(todo_keyword)) + }), + title, + tags: heading_tags, + children, + }, + )) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_headline<'s>(input: OrgSource<'s>) -> Res, ()> { + tuple((start_of_line, many1(tag("*")), space1))(input)?; + Ok((input, ())) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn headline<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, + parent_stars: usize, +) -> Res< + OrgSource<'s>, + ( + usize, + OrgSource<'s>, + Option<((TodoKeywordType, OrgSource<'s>), OrgSource<'s>)>, + Vec>, + Vec<&'s str>, + ), +> { + let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Document, + exit_matcher: &headline_title_end, + }); + let parser_context = context.with_additional_node(&parser_context); + + let ( + remaining, + (_sol, star_count, ws, maybe_todo_keyword, title, maybe_tags, _ws, _line_ending), + ) = tuple(( + start_of_line, + verify(many1_count(tag("*")), |star_count| { + *star_count > parent_stars + }), + space1, + opt(tuple(( + parser_with_context!(heading_keyword)(&parser_context), + space1, + ))), + many1(parser_with_context!(standard_set_object)(&parser_context)), + opt(tuple((space0, tags))), + space0, + alt((line_ending, eof)), + ))(input)?; + Ok(( + remaining, + ( + star_count, + ws, + maybe_todo_keyword, + title, + maybe_tags + .map(|(_ws, tags)| { + tags.into_iter() + .map(|single_tag| Into::<&str>::into(single_tag)) + .collect() + }) + .unwrap_or(Vec::new()), + ), + )) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn headline_title_end<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + recognize(tuple(( + opt(tuple((space0, tags, space0))), + alt((line_ending, eof)), + )))(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn tags<'s>(input: OrgSource<'s>) -> Res, Vec>> { + let (remaining, (_open, tags, _close)) = + tuple((tag(":"), separated_list1(tag(":"), single_tag), tag(":")))(input)?; + Ok((remaining, tags)) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn single_tag<'r, 's>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + recognize(many1(verify(anychar, |c| { + c.is_alphanumeric() || "_@#%".contains(*c) + })))(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn heading_keyword<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, (TodoKeywordType, OrgSource<'s>)> { + let global_settings = context.get_global_settings(); + if global_settings.in_progress_todo_keywords.is_empty() + && global_settings.complete_todo_keywords.is_empty() + { + alt(( + map(tag("TODO"), |capture| (TodoKeywordType::Todo, capture)), + map(tag("DONE"), |capture| (TodoKeywordType::Done, capture)), + ))(input) + } else { + for todo_keyword in global_settings + .in_progress_todo_keywords + .iter() + .map(String::as_str) + { + let result = tag::<_, _, CustomError<_>>(todo_keyword)(input); + match result { + Ok((remaining, ent)) => { + return Ok((remaining, (TodoKeywordType::Todo, ent))); + } + Err(_) => {} + } + } + for todo_keyword in global_settings + .complete_todo_keywords + .iter() + .map(String::as_str) + { + let result = tag::<_, _, CustomError<_>>(todo_keyword)(input); + match result { + Ok((remaining, ent)) => { + return Ok((remaining, (TodoKeywordType::Done, ent))); + } + Err(_) => {} + } + } + Err(nom::Err::Error(CustomError::MyError(MyError( + "NoTodoKeyword".into(), + )))) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0b959743..f402d2aa 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14,6 +14,7 @@ mod fixed_width_area; mod footnote_definition; mod footnote_reference; mod greater_block; +mod headline; mod horizontal_rule; mod in_buffer_settings; mod inline_babel_call; From 2e6e6fdd2bdc33bad48fa98e2a63b0e9cc3a04cf Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 15:08:16 -0400 Subject: [PATCH 17/45] Move sections to their own source file. --- src/parser/document.rs | 137 +------------------------------------- src/parser/headline.rs | 2 +- src/parser/mod.rs | 1 + src/parser/section.rs | 146 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 137 deletions(-) create mode 100644 src/parser/section.rs diff --git a/src/parser/document.rs b/src/parser/document.rs index 44aca17f..027ba70b 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,42 +1,28 @@ use nom::combinator::all_consuming; use nom::combinator::opt; -use nom::combinator::recognize; -use nom::combinator::verify; use nom::multi::many0; -use nom::multi::many_till; -use nom::sequence::tuple; -use super::headline::detect_headline; use super::headline::heading; use super::in_buffer_settings::apply_in_buffer_settings; use super::in_buffer_settings::scan_for_in_buffer_settings; use super::org_source::OrgSource; +use super::section::zeroth_section; use super::token::AllTokensIterator; use super::token::Token; -use super::util::exit_matcher_parser; use super::util::get_consumed; use crate::context::parser_with_context; use crate::context::Context; use crate::context::ContextElement; -use crate::context::ExitClass; -use crate::context::ExitMatcherNode; use crate::context::GlobalSettings; use crate::context::List; use crate::context::RefContext; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; -use crate::parser::comment::comment; -use crate::parser::element_parser::element; use crate::parser::org_source::convert_error; -use crate::parser::planning::planning; -use crate::parser::property_drawer::property_drawer; use crate::parser::util::blank_line; -use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::types::Document; -use crate::types::Element; use crate::types::Object; -use crate::types::Section; /// Parse a full org-mode document. /// @@ -170,127 +156,6 @@ fn _document<'b, 'g, 'r, 's>( )) } -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn zeroth_section<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, Section<'s>> { - // TODO: The zeroth section is specialized so it probably needs its own parser - let contexts = [ - ContextElement::ConsumeTrailingWhitespace(true), - ContextElement::Context("section"), - ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Document, - exit_matcher: §ion_end, - }), - ]; - let parser_context = context.with_additional_node(&contexts[0]); - let parser_context = parser_context.with_additional_node(&contexts[1]); - let parser_context = parser_context.with_additional_node(&contexts[2]); - let without_consuming_whitespace_context = ContextElement::ConsumeTrailingWhitespace(false); - let without_consuming_whitespace_context = - parser_context.with_additional_node(&without_consuming_whitespace_context); - - let element_matcher = parser_with_context!(element(true))(&parser_context); - let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - - let (remaining, comment_and_property_drawer_element) = opt(tuple(( - opt(parser_with_context!(comment)( - &without_consuming_whitespace_context, - )), - parser_with_context!(property_drawer)(context), - many0(blank_line), - )))(input)?; - - let (remaining, (mut children, _exit_contents)) = verify( - many_till(element_matcher, exit_matcher), - |(children, _exit_contents)| { - !children.is_empty() || comment_and_property_drawer_element.is_some() - }, - )(remaining)?; - - comment_and_property_drawer_element.map(|(comment, property_drawer, _ws)| { - children.insert(0, Element::PropertyDrawer(property_drawer)); - comment - .map(Element::Comment) - .map(|ele| children.insert(0, ele)); - }); - - let (remaining, _trailing_ws) = - maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; - - let source = get_consumed(input, remaining); - Ok(( - remaining, - Section { - source: source.into(), - children, - }, - )) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn section<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - mut input: OrgSource<'s>, -) -> Res, Section<'s>> { - // TODO: The zeroth section is specialized so it probably needs its own parser - let contexts = [ - ContextElement::ConsumeTrailingWhitespace(true), - ContextElement::Context("section"), - ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Document, - exit_matcher: §ion_end, - }), - ]; - let parser_context = context.with_additional_node(&contexts[0]); - let parser_context = parser_context.with_additional_node(&contexts[1]); - let parser_context = parser_context.with_additional_node(&contexts[2]); - let element_matcher = parser_with_context!(element(true))(&parser_context); - let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (mut remaining, (planning_element, property_drawer_element)) = tuple(( - opt(parser_with_context!(planning)(&parser_context)), - opt(parser_with_context!(property_drawer)(&parser_context)), - ))(input)?; - if planning_element.is_none() && property_drawer_element.is_none() { - let (remain, _ws) = many0(blank_line)(remaining)?; - remaining = remain; - input = remain; - } - let (remaining, (mut children, _exit_contents)) = verify( - many_till(element_matcher, exit_matcher), - |(children, _exit_contents)| { - !children.is_empty() || property_drawer_element.is_some() || planning_element.is_some() - }, - )(remaining)?; - property_drawer_element - .map(Element::PropertyDrawer) - .map(|ele| children.insert(0, ele)); - planning_element - .map(Element::Planning) - .map(|ele| children.insert(0, ele)); - - let (remaining, _trailing_ws) = - maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; - - let source = get_consumed(input, remaining); - Ok(( - remaining, - Section { - source: source.into(), - children, - }, - )) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn section_end<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { - recognize(detect_headline)(input) -} - impl<'s> Document<'s> { pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { AllTokensIterator::new(Token::Document(self)) diff --git a/src/parser/headline.rs b/src/parser/headline.rs index 0146badc..c0b67025 100644 --- a/src/parser/headline.rs +++ b/src/parser/headline.rs @@ -16,8 +16,8 @@ use nom::multi::many1_count; use nom::multi::separated_list1; use nom::sequence::tuple; -use super::document::section; use super::org_source::OrgSource; +use super::section::section; use super::util::get_consumed; use super::util::start_of_line; use crate::context::parser_with_context; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f402d2aa..30b685a0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -36,6 +36,7 @@ mod planning; mod property_drawer; mod radio_link; mod regular_link; +mod section; pub mod sexp; mod statistics_cookie; mod subscript_and_superscript; diff --git a/src/parser/section.rs b/src/parser/section.rs new file mode 100644 index 00000000..33b16854 --- /dev/null +++ b/src/parser/section.rs @@ -0,0 +1,146 @@ +use nom::combinator::opt; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many0; +use nom::multi::many_till; +use nom::sequence::tuple; + +use super::headline::detect_headline; +use super::org_source::OrgSource; +use super::util::exit_matcher_parser; +use super::util::get_consumed; +use crate::context::parser_with_context; +use crate::context::ContextElement; +use crate::context::ExitClass; +use crate::context::ExitMatcherNode; +use crate::context::RefContext; +use crate::error::Res; +use crate::parser::comment::comment; +use crate::parser::element_parser::element; +use crate::parser::planning::planning; +use crate::parser::property_drawer::property_drawer; +use crate::parser::util::blank_line; +use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; +use crate::types::Element; +use crate::types::Section; + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn zeroth_section<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Section<'s>> { + // TODO: The zeroth section is specialized so it probably needs its own parser + let contexts = [ + ContextElement::ConsumeTrailingWhitespace(true), + ContextElement::Context("section"), + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Document, + exit_matcher: §ion_end, + }), + ]; + let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = parser_context.with_additional_node(&contexts[1]); + let parser_context = parser_context.with_additional_node(&contexts[2]); + let without_consuming_whitespace_context = ContextElement::ConsumeTrailingWhitespace(false); + let without_consuming_whitespace_context = + parser_context.with_additional_node(&without_consuming_whitespace_context); + + let element_matcher = parser_with_context!(element(true))(&parser_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + + let (remaining, comment_and_property_drawer_element) = opt(tuple(( + opt(parser_with_context!(comment)( + &without_consuming_whitespace_context, + )), + parser_with_context!(property_drawer)(context), + many0(blank_line), + )))(input)?; + + let (remaining, (mut children, _exit_contents)) = verify( + many_till(element_matcher, exit_matcher), + |(children, _exit_contents)| { + !children.is_empty() || comment_and_property_drawer_element.is_some() + }, + )(remaining)?; + + comment_and_property_drawer_element.map(|(comment, property_drawer, _ws)| { + children.insert(0, Element::PropertyDrawer(property_drawer)); + comment + .map(Element::Comment) + .map(|ele| children.insert(0, ele)); + }); + + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; + + let source = get_consumed(input, remaining); + Ok(( + remaining, + Section { + source: source.into(), + children, + }, + )) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn section<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + mut input: OrgSource<'s>, +) -> Res, Section<'s>> { + // TODO: The zeroth section is specialized so it probably needs its own parser + let contexts = [ + ContextElement::ConsumeTrailingWhitespace(true), + ContextElement::Context("section"), + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Document, + exit_matcher: §ion_end, + }), + ]; + let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = parser_context.with_additional_node(&contexts[1]); + let parser_context = parser_context.with_additional_node(&contexts[2]); + let element_matcher = parser_with_context!(element(true))(&parser_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let (mut remaining, (planning_element, property_drawer_element)) = tuple(( + opt(parser_with_context!(planning)(&parser_context)), + opt(parser_with_context!(property_drawer)(&parser_context)), + ))(input)?; + if planning_element.is_none() && property_drawer_element.is_none() { + let (remain, _ws) = many0(blank_line)(remaining)?; + remaining = remain; + input = remain; + } + let (remaining, (mut children, _exit_contents)) = verify( + many_till(element_matcher, exit_matcher), + |(children, _exit_contents)| { + !children.is_empty() || property_drawer_element.is_some() || planning_element.is_some() + }, + )(remaining)?; + property_drawer_element + .map(Element::PropertyDrawer) + .map(|ele| children.insert(0, ele)); + planning_element + .map(Element::Planning) + .map(|ele| children.insert(0, ele)); + + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; + + let source = get_consumed(input, remaining); + Ok(( + remaining, + Section { + source: source.into(), + children, + }, + )) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn section_end<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + recognize(detect_headline)(input) +} From b32c21eb1d64cfbec79597636cb6be893c5b5847 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 15:59:56 -0400 Subject: [PATCH 18/45] Add a test for a comment heading. --- org_mode_samples/sections_and_headings/comment_heading.org | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 org_mode_samples/sections_and_headings/comment_heading.org diff --git a/org_mode_samples/sections_and_headings/comment_heading.org b/org_mode_samples/sections_and_headings/comment_heading.org new file mode 100644 index 00000000..76a4ce9d --- /dev/null +++ b/org_mode_samples/sections_and_headings/comment_heading.org @@ -0,0 +1,2 @@ +* TODO [#A] COMMENT foo bar +baz From c7c0deed74613c648ae34e3ff42ecdd5148dc936 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 15:43:13 -0400 Subject: [PATCH 19/45] Parse priority cookie and COMMENT from headlines. --- Cargo.toml | 2 +- src/parser/headline.rs | 46 +++++++++++++++++++++++++++++++++++++----- src/types/document.rs | 6 +++++- src/types/mod.rs | 1 + 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b2337923..e8981ec2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ tracing-subscriber = { version = "0.3.17", optional = true, features = ["env-fil walkdir = "2.3.3" [features] -default = [] +default = ["compare"] compare = [] tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"] diff --git a/src/parser/headline.rs b/src/parser/headline.rs index c0b67025..6be91599 100644 --- a/src/parser/headline.rs +++ b/src/parser/headline.rs @@ -33,6 +33,7 @@ use crate::parser::util::blank_line; use crate::types::DocumentElement; use crate::types::Heading; use crate::types::Object; +use crate::types::PriorityCookie; use crate::types::TodoKeywordType; pub const fn heading( @@ -51,8 +52,10 @@ fn _heading<'b, 'g, 'r, 's>( parent_stars: usize, ) -> Res, Heading<'s>> { not(|i| context.check_exit_matcher(i))(input)?; - let (remaining, (star_count, _ws, maybe_todo_keyword, title, heading_tags)) = - headline(context, input, parent_stars)?; + let ( + remaining, + (star_count, maybe_todo_keyword, maybe_priority, maybe_comment, title, heading_tags), + ) = headline(context, input, parent_stars)?; let section_matcher = parser_with_context!(section)(context); let heading_matcher = parser_with_context!(heading(star_count))(context); let (remaining, maybe_section) = @@ -70,6 +73,7 @@ fn _heading<'b, 'g, 'r, 's>( } else { remaining }; + let is_archived = heading_tags.contains(&"ARCHIVE"); let source = get_consumed(input, remaining); Ok(( @@ -80,9 +84,12 @@ fn _heading<'b, 'g, 'r, 's>( todo_keyword: maybe_todo_keyword.map(|((todo_keyword_type, todo_keyword), _ws)| { (todo_keyword_type, Into::<&str>::into(todo_keyword)) }), + priority_cookie: maybe_priority.map(|(priority, _)| priority), title, tags: heading_tags, children, + is_comment: maybe_comment.is_some(), + is_archived, }, )) } @@ -102,8 +109,9 @@ fn headline<'b, 'g, 'r, 's>( OrgSource<'s>, ( usize, - OrgSource<'s>, Option<((TodoKeywordType, OrgSource<'s>), OrgSource<'s>)>, + Option<(PriorityCookie, OrgSource<'s>)>, + Option<(OrgSource<'s>, OrgSource<'s>)>, Vec>, Vec<&'s str>, ), @@ -116,7 +124,18 @@ fn headline<'b, 'g, 'r, 's>( let ( remaining, - (_sol, star_count, ws, maybe_todo_keyword, title, maybe_tags, _ws, _line_ending), + ( + _, + star_count, + _, + maybe_todo_keyword, + maybe_priority, + maybe_comment, + title, + maybe_tags, + _, + _, + ), ) = tuple(( start_of_line, verify(many1_count(tag("*")), |star_count| { @@ -127,6 +146,8 @@ fn headline<'b, 'g, 'r, 's>( parser_with_context!(heading_keyword)(&parser_context), space1, ))), + opt(tuple((priority_cookie, space1))), + opt(tuple((tag("COMMENT"), space1))), many1(parser_with_context!(standard_set_object)(&parser_context)), opt(tuple((space0, tags))), space0, @@ -136,8 +157,9 @@ fn headline<'b, 'g, 'r, 's>( remaining, ( star_count, - ws, maybe_todo_keyword, + maybe_priority, + maybe_comment, title, maybe_tags .map(|(_ws, tags)| { @@ -220,3 +242,17 @@ fn heading_keyword<'b, 'g, 'r, 's>( )))) } } + +fn priority_cookie<'s>(input: OrgSource<'s>) -> Res, PriorityCookie> { + let (remaining, (_, priority_character, _)) = tuple(( + tag("[#"), + verify(anychar, |c| c.is_alphanumeric()), + tag("]"), + ))(input)?; + let cookie = PriorityCookie::try_from(priority_character).map_err(|_| { + nom::Err::Error(CustomError::MyError(MyError( + "Failed to cast priority cookie to number.".into(), + ))) + })?; + Ok((remaining, cookie)) +} diff --git a/src/types/document.rs b/src/types/document.rs index 654377a9..142762da 100644 --- a/src/types/document.rs +++ b/src/types/document.rs @@ -2,6 +2,8 @@ use super::Element; use super::Object; use super::Source; +pub type PriorityCookie = u8; + #[derive(Debug)] pub struct Document<'s> { pub source: &'s str, @@ -14,10 +16,12 @@ pub struct Heading<'s> { pub source: &'s str, pub stars: usize, pub todo_keyword: Option<(TodoKeywordType, &'s str)>, - // TODO: add todo-type enum + pub priority_cookie: Option, pub title: Vec>, pub tags: Vec<&'s str>, pub children: Vec>, + pub is_comment: bool, + pub is_archived: bool, } #[derive(Debug)] diff --git a/src/types/mod.rs b/src/types/mod.rs index efd1b047..9cf5b596 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -7,6 +7,7 @@ mod source; pub use document::Document; pub use document::DocumentElement; pub use document::Heading; +pub use document::PriorityCookie; pub use document::Section; pub use document::TodoKeywordType; pub use element::Element; From 93d3d9471fa2e8e2bf3ff3d6b7d814b6b97ac77c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 15:57:24 -0400 Subject: [PATCH 20/45] Compare priority, archived, and commented in headlines. --- src/compare/diff.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++- src/compare/util.rs | 5 +++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 0c881a59..bc812d18 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -45,6 +45,7 @@ use crate::types::PlainList; use crate::types::PlainListItem; use crate::types::PlainText; use crate::types::Planning; +use crate::types::PriorityCookie; use crate::types::PropertyDrawer; use crate::types::RadioLink; use crate::types::RadioTarget; @@ -553,7 +554,57 @@ fn compare_heading<'s>( .collect::, _>>()?; child_status.push(artificial_diff_scope("title".to_owned(), title_status)?); - // TODO: Compare priority, :footnote-section-p, :archivedp, :commentedp + // Compare priority + let priority = get_property(emacs, ":priority")?; + match (priority, rust.priority_cookie) { + (None, None) => {} + (None, Some(_)) | (Some(_), None) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Priority cookie mismatch (emacs != rust) {:?} != {:?}", + priority, rust.priority_cookie + )); + } + (Some(emacs_priority_cookie), Some(rust_priority_cookie)) => { + let emacs_priority_cookie = + emacs_priority_cookie.as_atom()?.parse::()?; + if emacs_priority_cookie != rust_priority_cookie { + this_status = DiffStatus::Bad; + message = Some(format!( + "Priority cookie mismatch (emacs != rust) {:?} != {:?}", + emacs_priority_cookie, rust_priority_cookie + )); + } + } + } + + // Compare archived + let archived = get_property(emacs, ":archivedp")?; + match (archived, rust.is_archived) { + (None, true) | (Some(_), false) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "archived mismatch (emacs != rust) {:?} != {:?}", + archived, rust.is_archived + )); + } + (None, false) | (Some(_), true) => {} + } + + // Compare commented + let commented = get_property(emacs, ":commentedp")?; + match (commented, rust.is_comment) { + (None, true) | (Some(_), false) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "commented mismatch (emacs != rust) {:?} != {:?}", + commented, rust.is_comment + )); + } + (None, false) | (Some(_), true) => {} + } + + // TODO: Compare :footnote-section-p // Compare section let section_status = children diff --git a/src/compare/util.rs b/src/compare/util.rs index 6367bf98..173ecd02 100644 --- a/src/compare/util.rs +++ b/src/compare/util.rs @@ -141,6 +141,11 @@ fn maybe_token_to_usize( .map_or(Ok(None), |r| r.map(Some))?) } +/// Get a named property from the emacs token. +/// +/// Returns Ok(None) if value is nil. +/// +/// Returns error if the attribute is not specified on the token at all. pub fn get_property<'s, 'x>( emacs: &'s Token<'s>, key: &'x str, From dc8b8d08abd6f1394d65903bc17f8fe38e1ae311 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 16:25:18 -0400 Subject: [PATCH 21/45] Add test showing we break on empty sections that contain a planning. --- .../section_with_planning_and_whitespace.org | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 org_mode_samples/sections_and_headings/section_with_planning_and_whitespace.org diff --git a/org_mode_samples/sections_and_headings/section_with_planning_and_whitespace.org b/org_mode_samples/sections_and_headings/section_with_planning_and_whitespace.org new file mode 100644 index 00000000..24ba56dc --- /dev/null +++ b/org_mode_samples/sections_and_headings/section_with_planning_and_whitespace.org @@ -0,0 +1,4 @@ +* DONE foo + DEADLINE: <2023-09-08 Fri> + +* DONE bar From 8780976c15d356408ffb83ff250d4a410b80a858 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 16:30:40 -0400 Subject: [PATCH 22/45] Consume trailing whitespace after planning. --- src/parser/planning.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser/planning.rs b/src/parser/planning.rs index 77e70c1e..10864abd 100644 --- a/src/parser/planning.rs +++ b/src/parser/planning.rs @@ -10,6 +10,7 @@ use nom::multi::separated_list1; use nom::sequence::tuple; use super::org_source::OrgSource; +use super::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::context::RefContext; use crate::error::Res; use crate::parser::util::get_consumed; @@ -18,7 +19,7 @@ use crate::types::Planning; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn planning<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, + context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Planning<'s>> { start_of_line(input)?; @@ -26,6 +27,8 @@ pub fn planning<'b, 'g, 'r, 's>( let (remaining, _planning_parameters) = separated_list1(space1, planning_parameter)(remaining)?; let (remaining, _trailing_ws) = tuple((space0, alt((line_ending, eof))))(remaining)?; + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( From 0056657b65b50e1372dfddf1440b522ac9675f37 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 17:27:02 -0400 Subject: [PATCH 23/45] Add a test showing the plain text parser is not handling subsets of objects like inside a table cell. --- .../greater_element/table/cells_with_objects.org | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 org_mode_samples/greater_element/table/cells_with_objects.org diff --git a/org_mode_samples/greater_element/table/cells_with_objects.org b/org_mode_samples/greater_element/table/cells_with_objects.org new file mode 100644 index 00000000..ffc814bc --- /dev/null +++ b/org_mode_samples/greater_element/table/cells_with_objects.org @@ -0,0 +1,6 @@ +src_elisp{(bar)} +*src_elisp{(bar)}* + +| foo *bar* | +| foo src_elisp{(bar)} | +| foo *src_elisp{(bar)}* | From 669da4073ea94746582d2bd254cb24faf50190b0 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 17:45:49 -0400 Subject: [PATCH 24/45] Accept the end condition as a parameter to the plain text parser so it can adapt to the context. --- src/parser/object_parser.rs | 179 +++++++++++++++++++++++------------- src/parser/plain_text.rs | 49 ++++++---- src/parser/regular_link.rs | 4 +- 3 files changed, 152 insertions(+), 80 deletions(-) diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index b65f525d..5542618b 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -37,54 +37,11 @@ pub fn standard_set_object<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, Object<'s>> { let (remaining, object) = alt(( - map(parser_with_context!(timestamp)(context), Object::Timestamp), - map(parser_with_context!(subscript)(context), Object::Subscript), + parser_with_context!(standard_set_object_sans_plain_text)(context), map( - parser_with_context!(superscript)(context), - Object::Superscript, + parser_with_context!(plain_text(detect_standard_set_object_sans_plain_text))(context), + Object::PlainText, ), - map( - parser_with_context!(statistics_cookie)(context), - Object::StatisticsCookie, - ), - map(parser_with_context!(target)(context), Object::Target), - map(parser_with_context!(line_break)(context), Object::LineBreak), - map( - parser_with_context!(inline_source_block)(context), - Object::InlineSourceBlock, - ), - map( - parser_with_context!(inline_babel_call)(context), - Object::InlineBabelCall, - ), - map(parser_with_context!(citation)(context), Object::Citation), - map( - parser_with_context!(footnote_reference)(context), - Object::FootnoteReference, - ), - map( - parser_with_context!(export_snippet)(context), - Object::ExportSnippet, - ), - map(parser_with_context!(entity)(context), Object::Entity), - map( - parser_with_context!(latex_fragment)(context), - Object::LatexFragment, - ), - map(parser_with_context!(radio_link)(context), Object::RadioLink), - map( - parser_with_context!(radio_target)(context), - Object::RadioTarget, - ), - parser_with_context!(text_markup)(context), - map( - parser_with_context!(regular_link)(context), - Object::RegularLink, - ), - map(parser_with_context!(plain_link)(context), Object::PlainLink), - map(parser_with_context!(angle_link)(context), Object::AngleLink), - map(parser_with_context!(org_macro)(context), Object::OrgMacro), - map(parser_with_context!(plain_text)(context), Object::PlainText), ))(input)?; Ok((remaining, object)) } @@ -95,24 +52,17 @@ pub fn minimal_set_object<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, Object<'s>> { let (remaining, object) = alt(( - map(parser_with_context!(subscript)(context), Object::Subscript), + parser_with_context!(minimal_set_object_sans_plain_text)(context), map( - parser_with_context!(superscript)(context), - Object::Superscript, + parser_with_context!(plain_text(detect_minimal_set_object_sans_plain_text))(context), + Object::PlainText, ), - map(parser_with_context!(entity)(context), Object::Entity), - map( - parser_with_context!(latex_fragment)(context), - Object::LatexFragment, - ), - parser_with_context!(text_markup)(context), - map(parser_with_context!(plain_text)(context), Object::PlainText), ))(input)?; Ok((remaining, object)) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn any_object_except_plain_text<'b, 'g, 'r, 's>( +fn standard_set_object_sans_plain_text<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Object<'s>> { @@ -169,14 +119,35 @@ pub fn any_object_except_plain_text<'b, 'g, 'r, 's>( } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn detect_any_object_except_plain_text<'b, 'g, 'r, 's>( +fn minimal_set_object_sans_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Object<'s>> { + let (remaining, object) = alt(( + map(parser_with_context!(subscript)(context), Object::Subscript), + map( + parser_with_context!(superscript)(context), + Object::Superscript, + ), + map(parser_with_context!(entity)(context), Object::Entity), + map( + parser_with_context!(latex_fragment)(context), + Object::LatexFragment, + ), + parser_with_context!(text_markup)(context), + ))(input)?; + Ok((remaining, object)) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_standard_set_object_sans_plain_text<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { if detect_subscript_or_superscript(input).is_ok() { return Ok((input, ())); } - if any_object_except_plain_text(context, input).is_ok() { + if standard_set_object_sans_plain_text(context, input).is_ok() { return Ok((input, ())); } @@ -186,7 +157,42 @@ pub fn detect_any_object_except_plain_text<'b, 'g, 'r, 's>( } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn regular_link_description_object_set<'b, 'g, 'r, 's>( +fn detect_minimal_set_object_sans_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + if detect_subscript_or_superscript(input).is_ok() { + return Ok((input, ())); + } + if minimal_set_object_sans_plain_text(context, input).is_ok() { + return Ok((input, ())); + } + + return Err(nom::Err::Error(CustomError::MyError(MyError( + "No object detected.".into(), + )))); +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn regular_link_description_set_object<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Object<'s>> { + // TODO: It can also contain another link, but only when it is a plain or angle link. It can contain square brackets, but not ]] + let (remaining, object) = alt(( + parser_with_context!(regular_link_description_set_object_sans_plain_text)(context), + map( + parser_with_context!(plain_text( + detect_regular_link_description_set_object_sans_plain_text + ))(context), + Object::PlainText, + ), + ))(input)?; + Ok((remaining, object)) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn regular_link_description_set_object_sans_plain_text<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Object<'s>> { @@ -209,15 +215,47 @@ pub fn regular_link_description_object_set<'b, 'g, 'r, 's>( Object::InlineBabelCall, ), map(parser_with_context!(org_macro)(context), Object::OrgMacro), - parser_with_context!(minimal_set_object)(context), + parser_with_context!(minimal_set_object_sans_plain_text)(context), ))(input)?; Ok((remaining, object)) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_regular_link_description_set_object_sans_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + if detect_subscript_or_superscript(input).is_ok() { + return Ok((input, ())); + } + if regular_link_description_set_object_sans_plain_text(context, input).is_ok() { + return Ok((input, ())); + } + + return Err(nom::Err::Error(CustomError::MyError(MyError( + "No object detected.".into(), + )))); +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn table_cell_set_object<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, +) -> Res, Object<'s>> { + let (remaining, object) = alt(( + parser_with_context!(table_cell_set_object_sans_plain_text)(context), + map( + parser_with_context!(plain_text(detect_table_cell_set_object_sans_plain_text))(context), + Object::PlainText, + ), + ))(input)?; + Ok((remaining, object)) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn table_cell_set_object_sans_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, ) -> Res, Object<'s>> { let (remaining, object) = alt(( map(parser_with_context!(citation)(context), Object::Citation), @@ -243,7 +281,24 @@ pub fn table_cell_set_object<'b, 'g, 'r, 's>( ), map(parser_with_context!(target)(context), Object::Target), map(parser_with_context!(timestamp)(context), Object::Timestamp), - parser_with_context!(minimal_set_object)(context), + parser_with_context!(minimal_set_object_sans_plain_text)(context), ))(input)?; Ok((remaining, object)) } + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_table_cell_set_object_sans_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + if detect_subscript_or_superscript(input).is_ok() { + return Ok((input, ())); + } + if table_cell_set_object_sans_plain_text(context, input).is_ok() { + return Ok((input, ())); + } + + return Err(nom::Err::Error(CustomError::MyError(MyError( + "No object detected.".into(), + )))); +} diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index e57f4d1a..b50bd139 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -7,7 +7,6 @@ use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many_till; -use super::object_parser::detect_any_object_except_plain_text; use super::org_source::OrgSource; use super::radio_link::RematchObject; use super::util::exit_matcher_parser; @@ -17,17 +16,42 @@ use crate::error::Res; use crate::types::Object; use crate::types::PlainText; -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn plain_text<'b, 'g, 'r, 's>( +pub fn plain_text( + end_condition: F, +) -> impl for<'b, 'g, 'r, 's> Fn( + RefContext<'b, 'g, 'r, 's>, + OrgSource<'s>, +) -> Res, PlainText<'s>> +where + F: for<'bb, 'gg, 'rr, 'ss> Fn( + RefContext<'bb, 'gg, 'rr, 'ss>, + OrgSource<'ss>, + ) -> Res, ()>, +{ + move |context, input| _plain_text(&end_condition, context, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(end_condition)) +)] +fn _plain_text<'b, 'g, 'r, 's, F>( + end_condition: F, context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, -) -> Res, PlainText<'s>> { +) -> Res, PlainText<'s>> +where + F: for<'bb, 'gg, 'rr, 'ss> Fn( + RefContext<'bb, 'gg, 'rr, 'ss>, + OrgSource<'ss>, + ) -> Res, ()>, +{ let (remaining, source) = recognize(verify( many_till( anychar, peek(alt(( parser_with_context!(exit_matcher_parser)(context), - parser_with_context!(plain_text_end)(context), + recognize(parser_with_context!(end_condition)(context)), ))), ), |(children, _exit_contents)| !children.is_empty(), @@ -41,16 +65,6 @@ pub fn plain_text<'b, 'g, 'r, 's>( )) } -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn plain_text_end<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { - recognize(parser_with_context!(detect_any_object_except_plain_text)( - context, - ))(input) -} - impl<'x> RematchObject<'x> for PlainText<'x> { #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn rematch_object<'b, 'g, 'r, 's>( @@ -75,6 +89,7 @@ mod tests { use crate::context::ContextElement; use crate::context::GlobalSettings; use crate::context::List; + use crate::parser::object_parser::detect_standard_set_object_sans_plain_text; use crate::types::Source; #[test] @@ -83,7 +98,9 @@ mod tests { let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); - let plain_text_matcher = parser_with_context!(plain_text)(&initial_context); + let plain_text_matcher = parser_with_context!(plain_text( + detect_standard_set_object_sans_plain_text + ))(&initial_context); let (remaining, result) = map(plain_text_matcher, Object::PlainText)(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(result.get_source(), Into::<&str>::into(input)); diff --git a/src/parser/regular_link.rs b/src/parser/regular_link.rs index 615e77f5..e959a28f 100644 --- a/src/parser/regular_link.rs +++ b/src/parser/regular_link.rs @@ -6,7 +6,7 @@ use nom::character::complete::one_of; use nom::combinator::verify; use nom::multi::many_till; -use super::object_parser::regular_link_description_object_set; +use super::object_parser::regular_link_description_set_object; use super::org_source::OrgSource; use super::util::exit_matcher_parser; use super::util::get_consumed; @@ -99,7 +99,7 @@ pub fn description<'b, 'g, 'r, 's>( let parser_context = context.with_additional_node(&parser_context); let (remaining, (children, _exit_contents)) = verify( many_till( - parser_with_context!(regular_link_description_object_set)(&parser_context), + parser_with_context!(regular_link_description_set_object)(&parser_context), parser_with_context!(exit_matcher_parser)(&parser_context), ), |(children, _exit_contents)| !children.is_empty(), From f82d2aada13591bcceed9d2d89add9e0741b1b7e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 18:03:50 -0400 Subject: [PATCH 25/45] Fix run_docker_compare with relative paths. --- scripts/run_docker_compare.bash | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/run_docker_compare.bash b/scripts/run_docker_compare.bash index 7ac76e64..bfd25e15 100755 --- a/scripts/run_docker_compare.bash +++ b/scripts/run_docker_compare.bash @@ -9,7 +9,6 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" : ${BACKTRACE:="NO"} # or YES to print a rust backtrace when panicking : ${NO_COLOR:=""} # Set to anything to disable color output -cd "$DIR/../" REALPATH=$(command -v uu-realpath || command -v realpath) MAKE=$(command -v gmake || command -v make) @@ -56,10 +55,10 @@ function launch_container { local full_path=$($REALPATH "$path") local containing_folder=$(dirname "$full_path") local file_name=$(basename "$full_path") - docker run "${additional_flags[@]}" --init --rm -i --mount type=tmpfs,destination=/tmp -v "${containing_folder}:/input:ro" -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test "${additional_args[@]}" -- "/input/$file_name" + docker run "${additional_flags[@]}" --init --rm -i --mount type=tmpfs,destination=/tmp -v "${containing_folder}:/input:ro" -v "$($REALPATH "$DIR/../"):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test "${additional_args[@]}" -- "/input/$file_name" done else - docker run "${additional_flags[@]}" --init --rm -i --mount type=tmpfs,destination=/tmp -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test "${additional_args[@]}" + docker run "${additional_flags[@]}" --init --rm -i --mount type=tmpfs,destination=/tmp -v "$($REALPATH "$DIR/../"):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test "${additional_args[@]}" fi } From 352c20d1d86f7132409ce5e7f82ced7ce9ea32ac Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 18:05:10 -0400 Subject: [PATCH 26/45] Fix run_docker_compare_bisect with relative paths. --- scripts/run_docker_compare_bisect.bash | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/run_docker_compare_bisect.bash b/scripts/run_docker_compare_bisect.bash index 0fa011a1..fd93ba49 100755 --- a/scripts/run_docker_compare_bisect.bash +++ b/scripts/run_docker_compare_bisect.bash @@ -5,7 +5,6 @@ set -euo pipefail IFS=$'\n\t' DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd "$DIR/../" REALPATH=$(command -v uu-realpath || command -v realpath) ############## Setup ######################### From 0d7a15bfeb0a799c5a0cde7755dd374eb2971903 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 18:35:33 -0400 Subject: [PATCH 27/45] Handle spaces after statistics cookies. --- .../sections_and_headings/statistics_cookie_with_space.org | 1 + src/parser/statistics_cookie.rs | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 org_mode_samples/sections_and_headings/statistics_cookie_with_space.org diff --git a/org_mode_samples/sections_and_headings/statistics_cookie_with_space.org b/org_mode_samples/sections_and_headings/statistics_cookie_with_space.org new file mode 100644 index 00000000..6e000eff --- /dev/null +++ b/org_mode_samples/sections_and_headings/statistics_cookie_with_space.org @@ -0,0 +1 @@ +* [0/4] foo diff --git a/src/parser/statistics_cookie.rs b/src/parser/statistics_cookie.rs index 22ec3f5a..21f52bf3 100644 --- a/src/parser/statistics_cookie.rs +++ b/src/parser/statistics_cookie.rs @@ -4,6 +4,7 @@ use nom::combinator::recognize; use nom::sequence::tuple; use super::org_source::OrgSource; +use super::util::get_consumed; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; use crate::context::parser_with_context; use crate::context::RefContext; @@ -26,10 +27,11 @@ pub fn percent_statistics_cookie<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, StatisticsCookie<'s>> { - let (remaining, source) = + let (remaining, _) = recognize(tuple((tag("["), nom::character::complete::u64, tag("%]"))))(input)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; + let source = get_consumed(input, remaining); Ok(( remaining, StatisticsCookie { @@ -43,7 +45,7 @@ pub fn fraction_statistics_cookie<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, StatisticsCookie<'s>> { - let (remaining, source) = recognize(tuple(( + let (remaining, _) = recognize(tuple(( tag("["), nom::character::complete::u64, tag("/"), @@ -52,6 +54,7 @@ pub fn fraction_statistics_cookie<'b, 'g, 'r, 's>( )))(input)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; + let source = get_consumed(input, remaining); Ok(( remaining, StatisticsCookie { From 0110d233878264033ff6a229393f9373f59d85b3 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 18:41:57 -0400 Subject: [PATCH 28/45] Update empty list test to show that we're not handling trailing whitespace for empty list items properly. --- org_mode_samples/greater_element/plain_list/empty_list_item.org | 2 ++ 1 file changed, 2 insertions(+) diff --git a/org_mode_samples/greater_element/plain_list/empty_list_item.org b/org_mode_samples/greater_element/plain_list/empty_list_item.org index 0fe3a9ab..397d6b8b 100644 --- a/org_mode_samples/greater_element/plain_list/empty_list_item.org +++ b/org_mode_samples/greater_element/plain_list/empty_list_item.org @@ -1,3 +1,5 @@ 1. 2. 3. + +* headline From 494fe5ccebffd74c02832fa9052f3cfbee726bbc Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 19:01:46 -0400 Subject: [PATCH 29/45] Handle contentless list items mid-document. --- src/parser/plain_list.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index f74de757..8c02bffb 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -154,8 +154,10 @@ pub fn plain_list_item<'b, 'g, 'r, 's>( let (remaining, maybe_tag) = opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)?; - let maybe_contentless_item: Res, OrgSource<'_>> = - peek(recognize(tuple((many0(blank_line), eof))))(remaining); + + let maybe_contentless_item: Res, ()> = peek(parser_with_context!( + detect_contentless_item_contents + )(context))(remaining); match maybe_contentless_item { Ok((_rem, _ws)) => { let (remaining, _trailing_ws) = opt(blank_line)(remaining)?; @@ -374,6 +376,18 @@ fn item_tag_post_gap<'b, 'g, 'r, 's>( )(input) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn detect_contentless_item_contents<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + let (remaining, _) = recognize(many_till( + blank_line, + parser_with_context!(exit_matcher_parser)(context), + ))(input)?; + Ok((remaining, ())) +} + #[cfg(test)] mod tests { use super::*; From b04341882c89c6950714d74c5e3727e3e52438b7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 19:24:03 -0400 Subject: [PATCH 30/45] Add test showing that we are not handling trailing spaces in description list tags correctly. --- .../greater_element/plain_list/description_list_object_key.org | 1 + 1 file changed, 1 insertion(+) diff --git a/org_mode_samples/greater_element/plain_list/description_list_object_key.org b/org_mode_samples/greater_element/plain_list/description_list_object_key.org index fcf945a1..737aede0 100644 --- a/org_mode_samples/greater_element/plain_list/description_list_object_key.org +++ b/org_mode_samples/greater_element/plain_list/description_list_object_key.org @@ -1 +1,2 @@ - {{{foo(bar)}}} :: baz +- =foo= :: bar From ceb722e47616b0f731da3a4b8ca259b6ea4e05ad Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 19:29:31 -0400 Subject: [PATCH 31/45] Check exit matcher after each space consumed for object trailing whitespace. Since description list tags need to end with a space unconsumed for " ::", we need to check the exit matcher after each space consumed. --- src/parser/util.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/parser/util.rs b/src/parser/util.rs index 6625b865..c9314c1d 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -8,6 +8,7 @@ use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; +use nom::combinator::verify; use nom::multi::many0; use nom::multi::many_till; use nom::sequence::tuple; @@ -91,11 +92,15 @@ pub fn maybe_consume_object_trailing_whitespace_if_not_exiting<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Option>> { - if exit_matcher_parser(context, input).is_err() { - opt(space0)(input) - } else { - Ok((input, None)) - } + // We have to check exit matcher after each character because description list tags need to end with a space unconsumed (" ::"). + let (remaining, _) = many_till( + verify(anychar, |c| *c == ' '), + alt(( + peek(recognize(verify(anychar, |c| *c != ' '))), + parser_with_context!(exit_matcher_parser)(context), + )), + )(input)?; + Ok((remaining, None)) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] From 344ef0445322949265b877f911812cd90305a64f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 19:53:58 -0400 Subject: [PATCH 32/45] Add tests showing we are not handling tabs appropriately for description list tags. --- .../greater_element/plain_list/description_list_tabs.org | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 org_mode_samples/greater_element/plain_list/description_list_tabs.org diff --git a/org_mode_samples/greater_element/plain_list/description_list_tabs.org b/org_mode_samples/greater_element/plain_list/description_list_tabs.org new file mode 100644 index 00000000..2d05493d --- /dev/null +++ b/org_mode_samples/greater_element/plain_list/description_list_tabs.org @@ -0,0 +1,3 @@ +- foo :: bar +- foo :: bar +- foo :: bar From a8fbf011243ee78c13679123242479c8385e5285 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 20:02:49 -0400 Subject: [PATCH 33/45] Handle tabs for plain list descriptions. This bug probably exists in hundreds of places across the code base. I am going to have to write a "fuzzer" that replaces random whitespace with tabs to find them all. --- src/parser/plain_list.rs | 9 +++++---- src/parser/util.rs | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 8c02bffb..bc3a4fec 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -332,7 +332,7 @@ fn item_tag<'b, 'g, 'r, 's>( ), |(children, _exit_contents)| !children.is_empty(), )(input)?; - let (remaining, _) = tag(" ::")(remaining)?; + let (remaining, _) = tuple((one_of(" \t"), tag("::")))(remaining)?; Ok((remaining, children)) } @@ -341,9 +341,10 @@ fn item_tag_end<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { - recognize(alt(( - tag(" :: "), - recognize(tuple((tag(" ::"), alt((line_ending, eof))))), + recognize(tuple(( + one_of(" \t"), + tag("::"), + alt((recognize(one_of(" \t")), line_ending, eof)), )))(input) } diff --git a/src/parser/util.rs b/src/parser/util.rs index c9314c1d..e521079c 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -2,13 +2,13 @@ use nom::branch::alt; use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::none_of; +use nom::character::complete::one_of; use nom::character::complete::space0; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; -use nom::combinator::verify; use nom::multi::many0; use nom::multi::many_till; use nom::sequence::tuple; @@ -94,9 +94,9 @@ pub fn maybe_consume_object_trailing_whitespace_if_not_exiting<'b, 'g, 'r, 's>( ) -> Res, Option>> { // We have to check exit matcher after each character because description list tags need to end with a space unconsumed (" ::"). let (remaining, _) = many_till( - verify(anychar, |c| *c == ' '), + one_of(" \t"), alt(( - peek(recognize(verify(anychar, |c| *c != ' '))), + peek(recognize(none_of(" \t"))), parser_with_context!(exit_matcher_parser)(context), )), )(input)?; From 5d20d3e99b580fab8f6a75bd5d73c13fae7e175b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 20:28:21 -0400 Subject: [PATCH 34/45] Add a test showing we are not handling empty space for footnote definitions correctly. --- .../empty_space_before_and_after_content.org | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org diff --git a/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org b/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org new file mode 100644 index 00000000..1d2113ea --- /dev/null +++ b/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org @@ -0,0 +1,9 @@ +* Footnotes + +[fn:1] + +#+BEGIN_EXAMPLE +baz +#+END_EXAMPLE + + From 6a1bdd5feed112b80d8c12340a642b1227d921b3 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 21:11:47 -0400 Subject: [PATCH 35/45] Support blank lines before content in footnote definitions. --- .../empty_space_before_and_after_content.org | 1 - src/parser/footnote_definition.rs | 13 +++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org b/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org index 1d2113ea..68622132 100644 --- a/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org +++ b/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org @@ -6,4 +6,3 @@ baz #+END_EXAMPLE - diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index e413d9d6..be8b0e3f 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -4,8 +4,10 @@ use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while; use nom::character::complete::digit1; use nom::character::complete::space0; +use nom::combinator::opt; use nom::combinator::recognize; use nom::combinator::verify; +use nom::multi::many0; use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; @@ -41,8 +43,15 @@ pub fn footnote_definition<'b, 'g, 'r, 's>( } start_of_line(input)?; // Cannot be indented. - let (remaining, (_lead_in, lbl, _lead_out, _ws)) = - tuple((tag_no_case("[fn:"), label, tag("]"), space0))(input)?; + let (remaining, (_, lbl, _, _, _)) = tuple(( + tag_no_case("[fn:"), + label, + tag("]"), + space0, + opt(verify(many0(blank_line), |lines: &Vec>| { + lines.len() <= 2 + })), + ))(input)?; let contexts = [ ContextElement::ConsumeTrailingWhitespace(true), ContextElement::Context("footnote definition"), From 21c60d10369dcc15d7b9f967a357e5767e9f9cbb Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 21:30:03 -0400 Subject: [PATCH 36/45] Do not consume trailing whitespace on the footnote definition's final element. --- src/parser/footnote_definition.rs | 20 ++++++++++++++++---- src/parser/plain_list.rs | 13 +------------ src/parser/util.rs | 12 ++++++++++++ 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index be8b0e3f..9d5d79a6 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -13,6 +13,7 @@ use nom::multi::many_till; use nom::sequence::tuple; use super::org_source::OrgSource; +use super::util::include_input; use super::util::WORD_CONSTITUENT_CHARACTERS; use crate::context::parser_with_context; use crate::context::ContextElement; @@ -63,11 +64,22 @@ pub fn footnote_definition<'b, 'g, 'r, 's>( let parser_context = context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); let parser_context = parser_context.with_additional_node(&contexts[2]); - // TODO: The problem is we are not accounting for trailing whitespace like we do in section. Maybe it would be easier if we passed down whether or not to parse trailing whitespace into the element matcher similar to how tag takes in parameters. let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, (children, _exit_contents)) = - many_till(element_matcher, exit_matcher)(remaining)?; + let (mut remaining, (mut children, _exit_contents)) = + many_till(include_input(element_matcher), exit_matcher)(remaining)?; + + // Re-parse the last element of the footnote definition with consume trailing whitespace off because the trailing whitespace needs to belong to the footnote definition, not the contents. + if context.should_consume_trailing_whitespace() { + if let Some((final_item_input, _)) = children.pop() { + let final_item_context = ContextElement::ConsumeTrailingWhitespace(false); + let final_item_context = parser_context.with_additional_node(&final_item_context); + let (remain, reparsed_final_item) = + parser_with_context!(element(true))(&final_item_context)(final_item_input)?; + children.push((final_item_input, reparsed_final_item)); + remaining = remain; + } + } let source = get_consumed(input, remaining); Ok(( @@ -75,7 +87,7 @@ pub fn footnote_definition<'b, 'g, 'r, 's>( FootnoteDefinition { source: source.into(), label: lbl.into(), - children, + children: children.into_iter().map(|(_, item)| item).collect(), }, )) } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index bc3a4fec..4f4bc498 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -19,6 +19,7 @@ use nom::sequence::tuple; use super::element_parser::element; use super::object_parser::standard_set_object; use super::org_source::OrgSource; +use super::util::include_input; use super::util::non_whitespace_character; use crate::context::parser_with_context; use crate::context::ContextElement; @@ -225,18 +226,6 @@ pub fn plain_list_item<'b, 'g, 'r, 's>( )); } -fn include_input<'s, F, O>( - mut inner: F, -) -> impl FnMut(OrgSource<'s>) -> Res, (OrgSource<'s>, O)> -where - F: FnMut(OrgSource<'s>) -> Res, O>, -{ - move |input: OrgSource<'_>| { - let (remaining, output) = inner(input)?; - Ok((remaining, (input, output))) - } -} - #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn bullet<'s>(i: OrgSource<'s>) -> Res, OrgSource<'s>> { alt(( diff --git a/src/parser/util.rs b/src/parser/util.rs index e521079c..53deb7c2 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -211,3 +211,15 @@ pub fn text_until_eol<'r, 's>( .map(|(_remaining, line)| Into::<&str>::into(line))?; Ok(line.trim()) } + +pub fn include_input<'s, F, O>( + mut inner: F, +) -> impl FnMut(OrgSource<'s>) -> Res, (OrgSource<'s>, O)> +where + F: FnMut(OrgSource<'s>) -> Res, O>, +{ + move |input: OrgSource<'_>| { + let (remaining, output) = inner(input)?; + Ok((remaining, (input, output))) + } +} From d1fe2f6b09ab86d2e6d17c042ba52b383cd547ae Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 21:50:32 -0400 Subject: [PATCH 37/45] Update the rest of the scripts to work with relative paths. --- scripts/callgrind.bash | 8 ++++---- scripts/perf.bash | 8 +++----- scripts/run_docker_integration_test.bash | 3 +-- scripts/run_integration_test.bash | 3 +-- scripts/time_parse.bash | 6 ++---- 5 files changed, 11 insertions(+), 17 deletions(-) diff --git a/scripts/callgrind.bash b/scripts/callgrind.bash index 3bc909c4..9f13c7b2 100755 --- a/scripts/callgrind.bash +++ b/scripts/callgrind.bash @@ -4,10 +4,10 @@ set -euo pipefail IFS=$'\n\t' DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd "$DIR/../" -RUSTFLAGS="-C opt-level=0" cargo build --no-default-features -valgrind --tool=callgrind --callgrind-out-file=callgrind.out target/debug/parse "${@}" + +(cd "$DIR/../" && RUSTFLAGS="-C opt-level=0" cargo build --no-default-features) +valgrind --tool=callgrind --callgrind-out-file="$DIR/../callgrind.out" "$DIR/../target/debug/parse" "${@}" echo "You probably want to run:" -echo "callgrind_annotate --auto=yes callgrind.out" +echo "callgrind_annotate --auto=yes '$DIR/../callgrind.out'" diff --git a/scripts/perf.bash b/scripts/perf.bash index f40e58be..aa7ae329 100755 --- a/scripts/perf.bash +++ b/scripts/perf.bash @@ -6,8 +6,6 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" : ${PROFILE:="perf"} -cd "$DIR/../" - function main { local additional_flags=() if [ "$PROFILE" = "dev" ] || [ "$PROFILE" = "debug" ]; then @@ -15,12 +13,12 @@ function main { else additional_flags+=(--profile "$PROFILE") fi - cargo build --no-default-features "${additional_flags[@]}" - perf record --freq=2000 --call-graph dwarf --output=perf.data target/${PROFILE}/parse "${@}" + (cd "$DIR/../" && cargo build --no-default-features "${additional_flags[@]}") + perf record --freq=2000 --call-graph dwarf --output="$DIR/../perf.data" "$DIR/../target/${PROFILE}/parse" "${@}" # Convert to a format firefox will read # flags to consider --show-info - perf script -F +pid --input perf.data > perf.firefox + perf script -F +pid --input "$DIR/../perf.data" > "$DIR/../perf.firefox" echo "You probably want to go to https://profiler.firefox.com/" echo "Either that or run hotspot" diff --git a/scripts/run_docker_integration_test.bash b/scripts/run_docker_integration_test.bash index 5ba5a373..bcf2646d 100755 --- a/scripts/run_docker_integration_test.bash +++ b/scripts/run_docker_integration_test.bash @@ -6,7 +6,6 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" : ${NO_COLOR:=""} # Set to anything to disable color output -cd "$DIR/../" REALPATH=$(command -v uu-realpath || command -v realpath) MAKE=$(command -v gmake || command -v make) @@ -56,7 +55,7 @@ cargo test --no-default-features --features compare --no-fail-fast --lib --test EOF ) - docker run "${additional_flags[@]}" --init --rm --read-only --mount type=tmpfs,destination=/tmp -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test sh -c "$init_script" + docker run "${additional_flags[@]}" --init --rm --read-only --mount type=tmpfs,destination=/tmp -v "$($REALPATH "$DIR/../"):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test sh -c "$init_script" } diff --git a/scripts/run_integration_test.bash b/scripts/run_integration_test.bash index 32da81da..095bee99 100755 --- a/scripts/run_integration_test.bash +++ b/scripts/run_integration_test.bash @@ -4,7 +4,6 @@ set -euo pipefail IFS=$'\n\t' DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd "$DIR/../" REALPATH=$(command -v uu-realpath || command -v realpath) function main { @@ -12,7 +11,7 @@ function main { local test while read test; do - cargo test --no-default-features --features compare --no-fail-fast --test test_loader "$test" -- --show-output + (cd "$DIR/../" && cargo test --no-default-features --features compare --no-fail-fast --test test_loader "$test" -- --show-output) done<<<"$test_names" } diff --git a/scripts/time_parse.bash b/scripts/time_parse.bash index 817bada2..6409f3f3 100755 --- a/scripts/time_parse.bash +++ b/scripts/time_parse.bash @@ -7,8 +7,6 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" : ${PROFILE:="release-lto"} -cd "$DIR/../" - function main { local additional_flags=() if [ "$PROFILE" = "dev" ] || [ "$PROFILE" = "debug" ]; then @@ -16,8 +14,8 @@ function main { else additional_flags+=(--profile "$PROFILE") fi - cargo build --no-default-features "${additional_flags[@]}" - time ./target/${PROFILE}/parse "${@}" + (cd "$DIR/../" && cargo build --no-default-features "${additional_flags[@]}") + time "$DIR/../target/${PROFILE}/parse" "${@}" } main "${@}" From f30069efe765b80e20760c97fe4428500bbcad3a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 21:59:02 -0400 Subject: [PATCH 38/45] Add a test showing we're not handling colons in keyword keys correctly. --- .../lesser_element/keyword/keyword_with_colon_in_key.org | 1 + 1 file changed, 1 insertion(+) create mode 100644 org_mode_samples/lesser_element/keyword/keyword_with_colon_in_key.org diff --git a/org_mode_samples/lesser_element/keyword/keyword_with_colon_in_key.org b/org_mode_samples/lesser_element/keyword/keyword_with_colon_in_key.org new file mode 100644 index 00000000..997a5d72 --- /dev/null +++ b/org_mode_samples/lesser_element/keyword/keyword_with_colon_in_key.org @@ -0,0 +1 @@ +#+title:foo:bar: baz: lorem: ipsum From 7545fb7e1af0cd3a39ed7cd24c863266da9e2543 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 22:17:10 -0400 Subject: [PATCH 39/45] Support keywords with colons in the key and without a space between the colon and value. --- src/parser/keyword.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index 200670a2..321118ee 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -5,8 +5,8 @@ use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while1; use nom::character::complete::anychar; use nom::character::complete::line_ending; +use nom::character::complete::one_of; use nom::character::complete::space0; -use nom::character::complete::space1; use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::not; @@ -66,7 +66,7 @@ fn _filtered_keyword<'s, F: Matcher>( } Err(_) => {} }; - let (remaining, _ws) = space1(remaining)?; + let (remaining, _ws) = space0(remaining)?; let (remaining, parsed_value) = recognize(many_till( anychar, peek(tuple((space0, alt((line_ending, eof))))), @@ -113,11 +113,15 @@ fn babel_call_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn regular_keyword_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { - recognize(tuple(( - not(peek(tag_no_case("call"))), - not(peek(tag_no_case("begin"))), - is_not(" \t\r\n:"), - )))(input) + not(peek(alt((tag_no_case("call"), tag_no_case("begin")))))(input)?; + recognize(many_till( + anychar, + peek(alt(( + recognize(one_of(" \t\r\n")), // Give up if we hit whitespace + recognize(tuple((tag(":"), one_of(" \t\r\n")))), // Stop if we see a colon followed by whitespace + recognize(tuple((tag(":"), is_not(" \t\r\n:"), not(tag(":"))))), // Stop if we see a colon that is the last colon before whitespace. This is for keywords like "#+foo:bar:baz: lorem: ipsum" which would have the key "foo:bar:baz". + ))), + ))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] From d79035e14d4713d8a2b792f124e6b9aaf0f38035 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 22:21:19 -0400 Subject: [PATCH 40/45] Add a test showing we are not handling empty statistics cookies. --- org_mode_samples/object/statistics_cookie/empty.org | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 org_mode_samples/object/statistics_cookie/empty.org diff --git a/org_mode_samples/object/statistics_cookie/empty.org b/org_mode_samples/object/statistics_cookie/empty.org new file mode 100644 index 00000000..f0168ae6 --- /dev/null +++ b/org_mode_samples/object/statistics_cookie/empty.org @@ -0,0 +1,4 @@ +[/] +[/2] +[3/] +[%] From 0105b49d0d425658b802120b9d90a18c991730c8 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 22:21:49 -0400 Subject: [PATCH 41/45] Handle empty statistics cookies. --- src/parser/statistics_cookie.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/parser/statistics_cookie.rs b/src/parser/statistics_cookie.rs index 21f52bf3..d83d0ead 100644 --- a/src/parser/statistics_cookie.rs +++ b/src/parser/statistics_cookie.rs @@ -1,5 +1,6 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::combinator::opt; use nom::combinator::recognize; use nom::sequence::tuple; @@ -27,8 +28,11 @@ pub fn percent_statistics_cookie<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, StatisticsCookie<'s>> { - let (remaining, _) = - recognize(tuple((tag("["), nom::character::complete::u64, tag("%]"))))(input)?; + let (remaining, _) = recognize(tuple(( + tag("["), + opt(nom::character::complete::u64), + tag("%]"), + )))(input)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -47,9 +51,9 @@ pub fn fraction_statistics_cookie<'b, 'g, 'r, 's>( ) -> Res, StatisticsCookie<'s>> { let (remaining, _) = recognize(tuple(( tag("["), - nom::character::complete::u64, + opt(nom::character::complete::u64), tag("/"), - nom::character::complete::u64, + opt(nom::character::complete::u64), tag("]"), )))(input)?; let (remaining, _trailing_whitespace) = From cc56b79683435b7db8f6bb7b8e47bc60c429f665 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 22:42:24 -0400 Subject: [PATCH 42/45] Add a test showing we're not handling table formulas. --- org_mode_samples/greater_element/table/with_formulas.org | 8 ++++++++ src/compare/diff.rs | 8 ++++++++ 2 files changed, 16 insertions(+) create mode 100644 org_mode_samples/greater_element/table/with_formulas.org diff --git a/org_mode_samples/greater_element/table/with_formulas.org b/org_mode_samples/greater_element/table/with_formulas.org new file mode 100644 index 00000000..a48f48a4 --- /dev/null +++ b/org_mode_samples/greater_element/table/with_formulas.org @@ -0,0 +1,8 @@ +| Name | Price | Quantity | Total | +|------+-------+----------+-------| +| foo | 7 | 4 | 28 | +| bar | 3.5 | 3 | 10.5 | +|------+-------+----------+-------| +| | | 7 | 38.5 | +#+tblfm: $4=$2*$3::@>$4=vsum(@2..@-1) +#+tblfm: @>$3=vsum(@2..@-1) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index bc812d18..7e59baf0 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1075,6 +1075,10 @@ fn compare_table<'s>( Ok(_) => {} }; + // TODO: Compare :type :tblfm :value + // + // :tblfm is a list () filled with quoted strings containing the value for any tblfm keywords at the end of the table. + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { child_status.push(compare_table_row(source, emacs_child, rust_child)?); } @@ -1112,6 +1116,10 @@ fn compare_table_row<'s>( Ok(_) => {} }; + // TODO: Compare :type + // + // :type is an unquoted atom of either standard or rule + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { child_status.push(compare_table_cell(source, emacs_child, rust_child)?); } From 84d2babda9ab6ed8b08746a0df39de7d70716c6d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 22:47:07 -0400 Subject: [PATCH 43/45] Parse table formulas. --- src/parser/keyword.rs | 13 +++++++++++++ src/parser/table.rs | 5 +++++ 2 files changed, 18 insertions(+) diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index 321118ee..c5cc4763 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -111,6 +111,19 @@ fn babel_call_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> tag_no_case("call")(input) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn table_formula_keyword<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Keyword<'s>> { + filtered_keyword(table_formula_key)(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn table_formula_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + tag_no_case("tblfm")(input) +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn regular_keyword_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { not(peek(alt((tag_no_case("call"), tag_no_case("begin")))))(input)?; diff --git a/src/parser/table.rs b/src/parser/table.rs index 2dbe014d..69c829e9 100644 --- a/src/parser/table.rs +++ b/src/parser/table.rs @@ -8,10 +8,12 @@ use nom::combinator::not; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; +use nom::multi::many0; use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; +use super::keyword::table_formula_keyword; use super::object_parser::table_cell_set_object; use super::org_source::OrgSource; use super::util::exit_matcher_parser; @@ -56,6 +58,9 @@ pub fn org_mode_table<'b, 'g, 'r, 's>( let (remaining, (children, _exit_contents)) = many_till(org_mode_table_row_matcher, exit_matcher)(input)?; + let (remaining, _formulas) = + many0(parser_with_context!(table_formula_keyword)(context))(remaining)?; + // TODO: Consume trailing formulas let source = get_consumed(input, remaining); From 80f7098f9b42f8ea6c2d1f5bf649d0cc5eee673f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 23:05:04 -0400 Subject: [PATCH 44/45] Compare table formulas. --- src/compare/diff.rs | 41 +++++++++++++++++++++++++++++++++--- src/parser/table.rs | 3 ++- src/types/greater_element.rs | 2 ++ 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 7e59baf0..c7289d1f 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeSet; use std::collections::HashSet; use super::util::assert_bounds; @@ -491,7 +492,7 @@ fn compare_heading<'s>( if rust.stars.to_string() != level { this_status = DiffStatus::Bad; message = Some(format!( - "Headline level do not much (emacs != rust): {} != {}", + "Headline level do not match (emacs != rust): {} != {}", level, rust.stars )) } @@ -1075,9 +1076,43 @@ fn compare_table<'s>( Ok(_) => {} }; - // TODO: Compare :type :tblfm :value + // Compare formulas // - // :tblfm is a list () filled with quoted strings containing the value for any tblfm keywords at the end of the table. + // :tblfm is either nil or a list () filled with quoted strings containing the value for any tblfm keywords at the end of the table. + let emacs_formulas = get_property(emacs, ":tblfm")?; + if let Some(emacs_formulas) = emacs_formulas { + let emacs_formulas = emacs_formulas.as_list()?; + if emacs_formulas.len() != rust.formulas.len() { + this_status = DiffStatus::Bad; + message = Some(format!( + "Formulas do not match (emacs != rust): {:?} != {:?}", + emacs_formulas, rust.formulas + )) + } else { + let atoms = emacs_formulas + .into_iter() + .map(Token::as_atom) + .collect::, _>>()?; + let unquoted = atoms + .into_iter() + .map(unquote) + .collect::, _>>()?; + for kw in &rust.formulas { + if !unquoted.contains(kw.value) { + this_status = DiffStatus::Bad; + message = Some(format!("Could not find formula in emacs: {}", kw.value)) + } + } + } + } else { + if !rust.formulas.is_empty() { + this_status = DiffStatus::Bad; + message = Some(format!( + "Formulas do not match (emacs != rust): {:?} != {:?}", + emacs_formulas, rust.formulas + )) + } + } for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { child_status.push(compare_table_row(source, emacs_child, rust_child)?); diff --git a/src/parser/table.rs b/src/parser/table.rs index 69c829e9..e5fd7ea5 100644 --- a/src/parser/table.rs +++ b/src/parser/table.rs @@ -58,7 +58,7 @@ pub fn org_mode_table<'b, 'g, 'r, 's>( let (remaining, (children, _exit_contents)) = many_till(org_mode_table_row_matcher, exit_matcher)(input)?; - let (remaining, _formulas) = + let (remaining, formulas) = many0(parser_with_context!(table_formula_keyword)(context))(remaining)?; // TODO: Consume trailing formulas @@ -68,6 +68,7 @@ pub fn org_mode_table<'b, 'g, 'r, 's>( remaining, Table { source: source.into(), + formulas, children, }, )) diff --git a/src/types/greater_element.rs b/src/types/greater_element.rs index 9a129006..e897945f 100644 --- a/src/types/greater_element.rs +++ b/src/types/greater_element.rs @@ -1,5 +1,6 @@ use super::element::Element; use super::lesser_element::TableCell; +use super::Keyword; use super::Object; use super::Source; @@ -63,6 +64,7 @@ pub struct NodeProperty<'s> { #[derive(Debug)] pub struct Table<'s> { pub source: &'s str, + pub formulas: Vec>, pub children: Vec>, } From 5587e19f1671b1849752572c2ea9cb726278f2ae Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 23:12:15 -0400 Subject: [PATCH 45/45] Cleanup. --- Cargo.toml | 2 +- src/parser/section.rs | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e8981ec2..b2337923 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ tracing-subscriber = { version = "0.3.17", optional = true, features = ["env-fil walkdir = "2.3.3" [features] -default = ["compare"] +default = [] compare = [] tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"] diff --git a/src/parser/section.rs b/src/parser/section.rs index 33b16854..bcc4f36c 100644 --- a/src/parser/section.rs +++ b/src/parser/section.rs @@ -29,7 +29,6 @@ pub fn zeroth_section<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Section<'s>> { - // TODO: The zeroth section is specialized so it probably needs its own parser let contexts = [ ContextElement::ConsumeTrailingWhitespace(true), ContextElement::Context("section"), @@ -88,7 +87,6 @@ pub fn section<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, mut input: OrgSource<'s>, ) -> Res, Section<'s>> { - // TODO: The zeroth section is specialized so it probably needs its own parser let contexts = [ ContextElement::ConsumeTrailingWhitespace(true), ContextElement::Context("section"),