From f79d07a7c8f03c4c325ada7d23a89efb87ae8fe7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 6 Sep 2023 19:49:04 -0400 Subject: [PATCH 01/45] Compare howard abrams dotfiles. --- docker/organic_test/Dockerfile | 4 ++++ docker/organic_test/foreign_document_test_entrypoint.sh | 1 + 2 files changed, 5 insertions(+) diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index 246f167..a9c39f0 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -36,7 +36,10 @@ ENTRYPOINT ["cargo", "test"] FROM build as foreign-document-gather +ARG HOWARD_ABRAMS_DOT_FILES_VERSION=1b54fe75d74670dc7bcbb6b01ea560c45528c628 +ARG HOWARD_ABRAMS_DOT_FILES_PATH=/foreign_documents/howardabrams/dot-files RUN mkdir /foreign_documents +RUN mkdir -p $HOWARD_ABRAMS_DOT_FILES_PATH && git -C $HOWARD_ABRAMS_DOT_FILES_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DOT_FILES_PATH remote add origin https://github.com/howardabrams/dot-files.git && git -C $HOWARD_ABRAMS_DOT_FILES_PATH fetch origin $HOWARD_ABRAMS_DOT_FILES_VERSION && git -C $HOWARD_ABRAMS_DOT_FILES_PATH checkout FETCH_HEAD FROM tester as foreign-document-test @@ -44,6 +47,7 @@ RUN apk add --no-cache bash coreutils RUN mkdir /foreign_documents COPY --from=build-org-mode /root/org-mode /foreign_documents/org-mode COPY --from=build-emacs /root/emacs /foreign_documents/emacs +COPY --from=foreign-document-gather /foreign_documents/howardabrams/dot-files /foreign_documents/howardabrams/dot-files COPY foreign_document_test_entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/organic_test/foreign_document_test_entrypoint.sh b/docker/organic_test/foreign_document_test_entrypoint.sh index f722c4d..b6ba9b1 100644 --- a/docker/organic_test/foreign_document_test_entrypoint.sh +++ b/docker/organic_test/foreign_document_test_entrypoint.sh @@ -27,6 +27,7 @@ function main { run_compare_function "org-mode" compare_all_org_document "/foreign_documents/org-mode" run_compare_function "emacs" compare_all_org_document "/foreign_documents/emacs" + run_compare_function "howard_abrams_dot_files" compare_all_org_document "/foreign_documents/howardabrams/dot-files" } function green_text { From dda2b1e69f016b8b23a199eda28906d15c75d619 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 6 Sep 2023 20:56:36 -0400 Subject: [PATCH 02/45] Compare howard abrams hamacs. --- docker/organic_test/Dockerfile | 9 +++- .../foreign_document_test_entrypoint.sh | 44 ++++++++++++++++--- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index a9c39f0..1154158 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -38,8 +38,13 @@ ENTRYPOINT ["cargo", "test"] FROM build as foreign-document-gather ARG HOWARD_ABRAMS_DOT_FILES_VERSION=1b54fe75d74670dc7bcbb6b01ea560c45528c628 ARG HOWARD_ABRAMS_DOT_FILES_PATH=/foreign_documents/howardabrams/dot-files +ARG HOWARD_ABRAMS_DOT_FILES_REPO=https://github.com/howardabrams/dot-files.git RUN mkdir /foreign_documents -RUN mkdir -p $HOWARD_ABRAMS_DOT_FILES_PATH && git -C $HOWARD_ABRAMS_DOT_FILES_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DOT_FILES_PATH remote add origin https://github.com/howardabrams/dot-files.git && git -C $HOWARD_ABRAMS_DOT_FILES_PATH fetch origin $HOWARD_ABRAMS_DOT_FILES_VERSION && git -C $HOWARD_ABRAMS_DOT_FILES_PATH checkout FETCH_HEAD +RUN mkdir -p $HOWARD_ABRAMS_DOT_FILES_PATH && git -C $HOWARD_ABRAMS_DOT_FILES_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DOT_FILES_PATH remote add origin $HOWARD_ABRAMS_DOT_FILES_REPO && git -C $HOWARD_ABRAMS_DOT_FILES_PATH fetch origin $HOWARD_ABRAMS_DOT_FILES_VERSION && git -C $HOWARD_ABRAMS_DOT_FILES_PATH checkout FETCH_HEAD +ARG HOWARD_ABRAMS_HAMACS_VERSION=da51188cc195d41882175d412fe40a8bc5730c5c +ARG HOWARD_ABRAMS_HAMACS_PATH=/foreign_documents/howardabrams/hamacs +ARG HOWARD_ABRAMS_HAMACS_REPO=https://github.com/howardabrams/hamacs.git +RUN mkdir -p $HOWARD_ABRAMS_HAMACS_PATH && git -C $HOWARD_ABRAMS_HAMACS_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_HAMACS_PATH remote add origin $HOWARD_ABRAMS_HAMACS_REPO && git -C $HOWARD_ABRAMS_HAMACS_PATH fetch origin $HOWARD_ABRAMS_HAMACS_VERSION && git -C $HOWARD_ABRAMS_HAMACS_PATH checkout FETCH_HEAD FROM tester as foreign-document-test @@ -47,7 +52,7 @@ RUN apk add --no-cache bash coreutils RUN mkdir /foreign_documents COPY --from=build-org-mode /root/org-mode /foreign_documents/org-mode COPY --from=build-emacs /root/emacs /foreign_documents/emacs -COPY --from=foreign-document-gather /foreign_documents/howardabrams/dot-files /foreign_documents/howardabrams/dot-files +COPY --from=foreign-document-gather /foreign_documents/howardabrams /foreign_documents/howardabrams COPY foreign_document_test_entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/organic_test/foreign_document_test_entrypoint.sh b/docker/organic_test/foreign_document_test_entrypoint.sh index b6ba9b1..8e56197 100644 --- a/docker/organic_test/foreign_document_test_entrypoint.sh +++ b/docker/organic_test/foreign_document_test_entrypoint.sh @@ -25,9 +25,23 @@ function main { fi PARSE="${CARGO_TARGET_DIR}/release-lto/parse" - run_compare_function "org-mode" compare_all_org_document "/foreign_documents/org-mode" - run_compare_function "emacs" compare_all_org_document "/foreign_documents/emacs" - run_compare_function "howard_abrams_dot_files" compare_all_org_document "/foreign_documents/howardabrams/dot-files" + local all_status=0 + set +e + + (run_compare_function "org-mode" compare_all_org_document "/foreign_documents/org-mode") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "emacs" compare_all_org_document "/foreign_documents/emacs") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "howard_abrams" compare_howard_abrams) + if [ "$?" -ne 0 ]; then all_status=1; fi + + set -e + if [ "$all_status" -ne 0 ]; then + echo "$(red_text "Some tests failed.")" + else + echo "$(green_text "All tests passed.")" + fi + return "$all_status" } function green_text { @@ -73,17 +87,22 @@ function run_compare_function { function compare_all_org_document { local root_dir="$1" local target_document - find "$root_dir" -type f -iname '*.org' | while read target_document; do + local all_status=0 + while read target_document; do local relative_path=$($REALPATH --relative-to "$root_dir" "$target_document") + set +e (run_compare "$relative_path" "$target_document") - done + if [ "$?" -ne 0 ]; then all_status=1; fi + set -e + done<<<$(find "$root_dir" -type f -iname '*.org') + return "$all_status" } function run_compare { local name="$1" local target_document="$2" set +e - $PARSE "$target_document" &> /dev/null + ($PARSE "$target_document" &> /dev/null) local status=$? set -e if [ "$status" -eq 0 ]; then @@ -94,4 +113,17 @@ function run_compare { fi } +function compare_howard_abrams { + local all_status=0 + set +e + + (run_compare_function "dot_files" compare_all_org_document "/foreign_documents/howardabrams/dot-files") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "hamacs" compare_all_org_document "/foreign_documents/howardabrams/hamacs") + if [ "$?" -ne 0 ]; then all_status=1; fi + + set -e + return "$all_status" +} + main "${@}" From fcea7e5a4bcf1ebf637413907f3d90e5ee94045a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 6 Sep 2023 21:11:46 -0400 Subject: [PATCH 03/45] Add howard abrams demo-it and the upstreeam doomemacs repo to compare. --- docker/organic_test/Dockerfile | 13 +++++++++++++ .../foreign_document_test_entrypoint.sh | 6 +++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index 1154158..b62517e 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -36,16 +36,28 @@ ENTRYPOINT ["cargo", "test"] FROM build as foreign-document-gather + ARG HOWARD_ABRAMS_DOT_FILES_VERSION=1b54fe75d74670dc7bcbb6b01ea560c45528c628 ARG HOWARD_ABRAMS_DOT_FILES_PATH=/foreign_documents/howardabrams/dot-files ARG HOWARD_ABRAMS_DOT_FILES_REPO=https://github.com/howardabrams/dot-files.git RUN mkdir /foreign_documents RUN mkdir -p $HOWARD_ABRAMS_DOT_FILES_PATH && git -C $HOWARD_ABRAMS_DOT_FILES_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DOT_FILES_PATH remote add origin $HOWARD_ABRAMS_DOT_FILES_REPO && git -C $HOWARD_ABRAMS_DOT_FILES_PATH fetch origin $HOWARD_ABRAMS_DOT_FILES_VERSION && git -C $HOWARD_ABRAMS_DOT_FILES_PATH checkout FETCH_HEAD + ARG HOWARD_ABRAMS_HAMACS_VERSION=da51188cc195d41882175d412fe40a8bc5730c5c ARG HOWARD_ABRAMS_HAMACS_PATH=/foreign_documents/howardabrams/hamacs ARG HOWARD_ABRAMS_HAMACS_REPO=https://github.com/howardabrams/hamacs.git RUN mkdir -p $HOWARD_ABRAMS_HAMACS_PATH && git -C $HOWARD_ABRAMS_HAMACS_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_HAMACS_PATH remote add origin $HOWARD_ABRAMS_HAMACS_REPO && git -C $HOWARD_ABRAMS_HAMACS_PATH fetch origin $HOWARD_ABRAMS_HAMACS_VERSION && git -C $HOWARD_ABRAMS_HAMACS_PATH checkout FETCH_HEAD +ARG HOWARD_ABRAMS_DEMO_IT_VERSION=e399fd7ceb73caeae7cb50b247359bafcaee2a3f +ARG HOWARD_ABRAMS_DEMO_IT_PATH=/foreign_documents/howardabrams/demo-it +ARG HOWARD_ABRAMS_DEMO_IT_REPO=https://github.com/howardabrams/demo-it.git +RUN mkdir -p $HOWARD_ABRAMS_DEMO_IT_PATH && git -C $HOWARD_ABRAMS_DEMO_IT_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DEMO_IT_PATH remote add origin $HOWARD_ABRAMS_DEMO_IT_REPO && git -C $HOWARD_ABRAMS_DEMO_IT_PATH fetch origin $HOWARD_ABRAMS_DEMO_IT_VERSION && git -C $HOWARD_ABRAMS_DEMO_IT_PATH checkout FETCH_HEAD + +ARG DOOMEMACS_VERSION=42d5fd83504f8aa80f3248036006fbcd49222943 +ARG DOOMEMACS_PATH=/foreign_documents/doomemacs +ARG DOOMEMACS_REPO=https://github.com/doomemacs/doomemacs.git +RUN mkdir -p $DOOMEMACS_PATH && git -C $DOOMEMACS_PATH init --initial-branch=main && git -C $DOOMEMACS_PATH remote add origin $DOOMEMACS_REPO && git -C $DOOMEMACS_PATH fetch origin $DOOMEMACS_VERSION && git -C $DOOMEMACS_PATH checkout FETCH_HEAD + FROM tester as foreign-document-test RUN apk add --no-cache bash coreutils @@ -53,6 +65,7 @@ RUN mkdir /foreign_documents COPY --from=build-org-mode /root/org-mode /foreign_documents/org-mode COPY --from=build-emacs /root/emacs /foreign_documents/emacs COPY --from=foreign-document-gather /foreign_documents/howardabrams /foreign_documents/howardabrams +COPY --from=foreign-document-gather /foreign_documents/doomemacs /foreign_documents/doomemacs COPY foreign_document_test_entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/organic_test/foreign_document_test_entrypoint.sh b/docker/organic_test/foreign_document_test_entrypoint.sh index 8e56197..1fcde1f 100644 --- a/docker/organic_test/foreign_document_test_entrypoint.sh +++ b/docker/organic_test/foreign_document_test_entrypoint.sh @@ -34,6 +34,8 @@ function main { if [ "$?" -ne 0 ]; then all_status=1; fi (run_compare_function "howard_abrams" compare_howard_abrams) if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "doomemacs" compare_all_org_document "/foreign_documents/doomemacs") + if [ "$?" -ne 0 ]; then all_status=1; fi set -e if [ "$all_status" -ne 0 ]; then @@ -117,10 +119,12 @@ function compare_howard_abrams { local all_status=0 set +e - (run_compare_function "dot_files" compare_all_org_document "/foreign_documents/howardabrams/dot-files") + (run_compare_function "dot-files" compare_all_org_document "/foreign_documents/howardabrams/dot-files") if [ "$?" -ne 0 ]; then all_status=1; fi (run_compare_function "hamacs" compare_all_org_document "/foreign_documents/howardabrams/hamacs") if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "demo-it" compare_all_org_document "/foreign_documents/howardabrams/demo-it") + if [ "$?" -ne 0 ]; then all_status=1; fi set -e return "$all_status" From 827f3e1c98230e6f2196ca4f3ce831f963538177 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Wed, 6 Sep 2023 21:37:09 -0400 Subject: [PATCH 04/45] Add the rest of the relevant howard abrams repos. --- docker/organic_test/Dockerfile | 30 +++++++++++++++++++ .../foreign_document_test_entrypoint.sh | 12 ++++++++ 2 files changed, 42 insertions(+) diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index b62517e..d3ab62c 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -53,6 +53,36 @@ ARG HOWARD_ABRAMS_DEMO_IT_PATH=/foreign_documents/howardabrams/demo-it ARG HOWARD_ABRAMS_DEMO_IT_REPO=https://github.com/howardabrams/demo-it.git RUN mkdir -p $HOWARD_ABRAMS_DEMO_IT_PATH && git -C $HOWARD_ABRAMS_DEMO_IT_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_DEMO_IT_PATH remote add origin $HOWARD_ABRAMS_DEMO_IT_REPO && git -C $HOWARD_ABRAMS_DEMO_IT_PATH fetch origin $HOWARD_ABRAMS_DEMO_IT_VERSION && git -C $HOWARD_ABRAMS_DEMO_IT_PATH checkout FETCH_HEAD +ARG HOWARD_ABRAMS_MAGIT_DEMO_VERSION=59e82f6bc7c18f550478d86a8f680c3f2da66985 +ARG HOWARD_ABRAMS_MAGIT_DEMO_PATH=/foreign_documents/howardabrams/magit-demo +ARG HOWARD_ABRAMS_MAGIT_DEMO_REPO=https://github.com/howardabrams/magit-demo.git +RUN mkdir -p $HOWARD_ABRAMS_MAGIT_DEMO_PATH && git -C $HOWARD_ABRAMS_MAGIT_DEMO_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_MAGIT_DEMO_PATH remote add origin $HOWARD_ABRAMS_MAGIT_DEMO_REPO && git -C $HOWARD_ABRAMS_MAGIT_DEMO_PATH fetch origin $HOWARD_ABRAMS_MAGIT_DEMO_VERSION && git -C $HOWARD_ABRAMS_MAGIT_DEMO_PATH checkout FETCH_HEAD + +ARG HOWARD_ABRAMS_PDX_EMACS_HACKERS_VERSION=bfb7bd640fdf0ce3def21f9fc591ed35d776b26d +ARG HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH=/foreign_documents/howardabrams/pdx-emacs-hackers +ARG HOWARD_ABRAMS_PDX_EMACS_HACKERS_REPO=https://github.com/howardabrams/pdx-emacs-hackers.git +RUN mkdir -p $HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH && git -C $HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH remote add origin $HOWARD_ABRAMS_PDX_EMACS_HACKERS_REPO && git -C $HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH fetch origin $HOWARD_ABRAMS_PDX_EMACS_HACKERS_VERSION && git -C $HOWARD_ABRAMS_PDX_EMACS_HACKERS_PATH checkout FETCH_HEAD + +ARG HOWARD_ABRAMS_FLORA_SIMULATOR_VERSION=50de13068722b9e3878f8598b749b7ccd14e7f8e +ARG HOWARD_ABRAMS_FLORA_SIMULATOR_PATH=/foreign_documents/howardabrams/flora-simulator +ARG HOWARD_ABRAMS_FLORA_SIMULATOR_REPO=https://github.com/howardabrams/flora-simulator.git +RUN mkdir -p $HOWARD_ABRAMS_FLORA_SIMULATOR_PATH && git -C $HOWARD_ABRAMS_FLORA_SIMULATOR_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_FLORA_SIMULATOR_PATH remote add origin $HOWARD_ABRAMS_FLORA_SIMULATOR_REPO && git -C $HOWARD_ABRAMS_FLORA_SIMULATOR_PATH fetch origin $HOWARD_ABRAMS_FLORA_SIMULATOR_VERSION && git -C $HOWARD_ABRAMS_FLORA_SIMULATOR_PATH checkout FETCH_HEAD + +ARG HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_VERSION=2d7a5e41001a1adf7ec24aeb6acc8525a72d7892 +ARG HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH=/foreign_documents/howardabrams/literate-devops-demo +ARG HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_REPO=https://github.com/howardabrams/literate-devops-demo.git +RUN mkdir -p $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH && git -C $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH remote add origin $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_REPO && git -C $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH fetch origin $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_VERSION && git -C $HOWARD_ABRAMS_LITERATE_DEVOPS_DEMO_PATH checkout FETCH_HEAD + +ARG HOWARD_ABRAMS_CLOJURE_YESQL_XP_VERSION=b651c7f8b47b2710e99fce9652980902bbc1c6c9 +ARG HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH=/foreign_documents/howardabrams/clojure-yesql-xp +ARG HOWARD_ABRAMS_CLOJURE_YESQL_XP_REPO=https://github.com/howardabrams/clojure-yesql-xp.git +RUN mkdir -p $HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH && git -C $HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH remote add origin $HOWARD_ABRAMS_CLOJURE_YESQL_XP_REPO && git -C $HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH fetch origin $HOWARD_ABRAMS_CLOJURE_YESQL_XP_VERSION && git -C $HOWARD_ABRAMS_CLOJURE_YESQL_XP_PATH checkout FETCH_HEAD + +ARG HOWARD_ABRAMS_VEEP_VERSION=e37fcf63a5c4a526255735ee34955528b3b280ae +ARG HOWARD_ABRAMS_VEEP_PATH=/foreign_documents/howardabrams/veep +ARG HOWARD_ABRAMS_VEEP_REPO=https://github.com/howardabrams/veep.git +RUN mkdir -p $HOWARD_ABRAMS_VEEP_PATH && git -C $HOWARD_ABRAMS_VEEP_PATH init --initial-branch=main && git -C $HOWARD_ABRAMS_VEEP_PATH remote add origin $HOWARD_ABRAMS_VEEP_REPO && git -C $HOWARD_ABRAMS_VEEP_PATH fetch origin $HOWARD_ABRAMS_VEEP_VERSION && git -C $HOWARD_ABRAMS_VEEP_PATH checkout FETCH_HEAD + ARG DOOMEMACS_VERSION=42d5fd83504f8aa80f3248036006fbcd49222943 ARG DOOMEMACS_PATH=/foreign_documents/doomemacs ARG DOOMEMACS_REPO=https://github.com/doomemacs/doomemacs.git diff --git a/docker/organic_test/foreign_document_test_entrypoint.sh b/docker/organic_test/foreign_document_test_entrypoint.sh index 1fcde1f..b1755d3 100644 --- a/docker/organic_test/foreign_document_test_entrypoint.sh +++ b/docker/organic_test/foreign_document_test_entrypoint.sh @@ -125,6 +125,18 @@ function compare_howard_abrams { if [ "$?" -ne 0 ]; then all_status=1; fi (run_compare_function "demo-it" compare_all_org_document "/foreign_documents/howardabrams/demo-it") if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "magit-demo" compare_all_org_document "/foreign_documents/howardabrams/magit-demo") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "pdx-emacs-hackers" compare_all_org_document "/foreign_documents/howardabrams/pdx-emacs-hackers") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "flora-simulator" compare_all_org_document "/foreign_documents/howardabrams/flora-simulator") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "literate-devops-demo" compare_all_org_document "/foreign_documents/howardabrams/literate-devops-demo") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "clojure-yesql-xp" compare_all_org_document "/foreign_documents/howardabrams/clojure-yesql-xp") + if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "veep" compare_all_org_document "/foreign_documents/howardabrams/veep") + if [ "$?" -ne 0 ]; then all_status=1; fi set -e return "$all_status" From facbe716e9c8ec6f47d30df9aa4cc7271bc7e4f8 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 01:23:15 -0400 Subject: [PATCH 05/45] Cleanup --- src/parser/text_markup.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 48f6eea..922d3f6 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -179,7 +179,8 @@ fn _text_markup_object<'b, 'g, 'r, 's, 'c>( ) -> Res, Vec>> { let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; - let (remaining, _peek_not_whitespace) = peek(not(multispace1))(remaining)?; + let (remaining, _peek_not_whitespace) = + peek(verify(anychar, |c| !c.is_whitespace() && *c != '\u{200B}'))(remaining)?; let text_markup_end_specialized = text_markup_end(open.into()); let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { class: ExitClass::Gamma, @@ -277,7 +278,6 @@ pub fn pre<'b, 'g, 'r, 's>( None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(') | Some('{') | Some('\'') | Some('"') | Some('<') => {} Some(_) => { - // Not at start of line, cannot be a heading return Err(nom::Err::Error(CustomError::MyError(MyError( "Not a valid pre character for text markup.".into(), )))); From 6676012eb15c77438bed71b9b7715d28279da101 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 01:45:02 -0400 Subject: [PATCH 06/45] Change footnote reference class to Gamma. --- .../object/footnote_reference/nested_footnote_references.org | 3 +++ src/parser/footnote_reference.rs | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 org_mode_samples/object/footnote_reference/nested_footnote_references.org diff --git a/org_mode_samples/object/footnote_reference/nested_footnote_references.org b/org_mode_samples/object/footnote_reference/nested_footnote_references.org new file mode 100644 index 0000000..310f174 --- /dev/null +++ b/org_mode_samples/object/footnote_reference/nested_footnote_references.org @@ -0,0 +1,3 @@ +*[fn:: /abcdef[fn::ghijklmnopqrstuvw]xyz/ r]* + +*[fn:: /abcdef[fn::ghijk *lmnopq* rstuvw]xyz/ r]* diff --git a/src/parser/footnote_reference.rs b/src/parser/footnote_reference.rs index 0f3738c..28f8b52 100644 --- a/src/parser/footnote_reference.rs +++ b/src/parser/footnote_reference.rs @@ -42,7 +42,7 @@ fn anonymous_footnote<'b, 'g, 'r, 's>( let (remaining, _) = tag_no_case("[fn::")(input)?; let exit_with_depth = footnote_definition_end(remaining.get_bracket_depth()); let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Beta, + class: ExitClass::Gamma, exit_matcher: &exit_with_depth, }); let parser_context = context.with_additional_node(&parser_context); @@ -78,7 +78,7 @@ fn inline_footnote<'b, 'g, 'r, 's>( let (remaining, _) = tag(":")(remaining)?; let exit_with_depth = footnote_definition_end(remaining.get_bracket_depth()); let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Beta, + class: ExitClass::Gamma, exit_matcher: &exit_with_depth, }); let parser_context = context.with_additional_node(&parser_context); From 6b82b46e09602e3551e3c6390d718434024f06cb Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 02:01:34 -0400 Subject: [PATCH 07/45] Prevent nesting of text markup of the same type. This greatly reduces the amount of detect element calls that are occurring. --- src/context/context.rs | 3 +++ src/parser/text_markup.rs | 40 +++++++++++++++++++++++++++++---------- src/parser/util.rs | 15 ++++++++++++++- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/src/context/context.rs b/src/context/context.rs index 0f41963..0baa2e8 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -21,6 +21,9 @@ pub enum ContextElement<'r, 's> { /// Stores the name of the current element to prevent directly nesting elements of the same type. Context(&'r str), + /// Stores the name of the current object to prevent directly nesting elements of the same type. + ContextObject(&'r str), + /// Indicates if elements should consume the whitespace after them. ConsumeTrailingWhitespace(bool), diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index 922d3f6..d131370 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -18,6 +18,7 @@ use tracing::span; use super::object_parser::standard_set_object; use super::org_source::OrgSource; use super::radio_link::RematchObject; +use super::util::in_object_section; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; use crate::context::parser_with_context; use crate::context::ContextElement; @@ -177,16 +178,26 @@ fn _text_markup_object<'b, 'g, 'r, 's, 'c>( input: OrgSource<'s>, marker_symbol: &'c str, ) -> Res, Vec>> { + if in_object_section(context, marker_symbol) { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Cannot nest objects of the same type".into(), + )))); + } + let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; let (remaining, _peek_not_whitespace) = peek(verify(anychar, |c| !c.is_whitespace() && *c != '\u{200B}'))(remaining)?; let text_markup_end_specialized = text_markup_end(open.into()); - let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Gamma, - exit_matcher: &text_markup_end_specialized, - }); - let parser_context = context.with_additional_node(&parser_context); + let contexts = [ + ContextElement::ContextObject(marker_symbol), + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Gamma, + exit_matcher: &text_markup_end_specialized, + }), + ]; + let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = parser_context.with_additional_node(&contexts[1]); let (remaining, (children, _exit_contents)) = verify( many_till( @@ -230,16 +241,25 @@ fn _text_markup_string<'b, 'g, 'r, 's, 'c>( input: OrgSource<'s>, marker_symbol: &'c str, ) -> Res, OrgSource<'s>> { + if in_object_section(context, marker_symbol) { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Cannot nest objects of the same type".into(), + )))); + } let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; let (remaining, _peek_not_whitespace) = peek(verify(anychar, |c| !c.is_whitespace() && *c != '\u{200B}'))(remaining)?; let text_markup_end_specialized = text_markup_end(open.into()); - let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Gamma, - exit_matcher: &text_markup_end_specialized, - }); - let parser_context = context.with_additional_node(&parser_context); + let contexts = [ + ContextElement::ContextObject(marker_symbol), + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Gamma, + exit_matcher: &text_markup_end_specialized, + }), + ]; + let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = parser_context.with_additional_node(&contexts[1]); let (remaining, contents) = recognize(verify( many_till( diff --git a/src/parser/util.rs b/src/parser/util.rs index 32576ad..c715a50 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -24,7 +24,6 @@ pub const WORD_CONSTITUENT_CHARACTERS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; /// Check if we are below a section of the given section type regardless of depth -#[allow(dead_code)] pub fn in_section<'b, 'g, 'r, 's, 'x>( context: RefContext<'b, 'g, 'r, 's>, section_name: &'x str, @@ -53,6 +52,20 @@ pub fn immediate_in_section<'b, 'g, 'r, 's, 'x>( false } +/// Check if we are below a section of the given section type regardless of depth +pub fn in_object_section<'b, 'g, 'r, 's, 'x>( + context: RefContext<'b, 'g, 'r, 's>, + section_name: &'x str, +) -> bool { + for thing in context.iter() { + match thing { + ContextElement::ContextObject(name) if *name == section_name => return true, + _ => {} + } + } + false +} + /// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. pub fn get_consumed<'s>(input: OrgSource<'s>, remaining: OrgSource<'s>) -> OrgSource<'s> { input.get_until(remaining) From ba291c677697b7ff48a6103f5411a6c0b1880c02 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 02:27:55 -0400 Subject: [PATCH 08/45] Unify two places checking if text was preceded by whitespace. --- src/parser/subscript_and_superscript.rs | 42 +++++++------------------ src/parser/text_markup.rs | 2 +- src/parser/util.rs | 15 +++++++-- 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index 9ad8906..00a76cd 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -15,6 +15,7 @@ use super::org_source::BracketDepth; use super::org_source::OrgSource; use super::util::exit_matcher_parser; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; +use super::util::preceded_by_whitespace; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ContextMatcher; @@ -36,7 +37,7 @@ pub fn subscript<'b, 'g, 'r, 's>( ) -> Res, Subscript<'s>> { // We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. let (remaining, _) = tag("_")(input)?; - pre(context, input)?; + pre(input)?; let (remaining, _body) = script_body(context, remaining)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; @@ -56,7 +57,7 @@ pub fn superscript<'b, 'g, 'r, 's>( ) -> Res, Superscript<'s>> { // We check for the circumflex first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. let (remaining, _) = tag("^")(input)?; - pre(context, input)?; + pre(input)?; let (remaining, _body) = script_body(context, remaining)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; @@ -70,19 +71,8 @@ pub fn superscript<'b, 'g, 'r, 's>( } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn pre<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, ()> { - let preceding_character = input.get_preceding_character(); - match preceding_character { - Some(c) if !c.is_whitespace() => {} - _ => { - return Err(nom::Err::Error(CustomError::MyError(MyError( - "Must be preceded by a non-whitespace character.".into(), - )))); - } - }; +fn pre<'s>(input: OrgSource<'s>) -> Res, ()> { + not(preceded_by_whitespace(true))(input)?; Ok((input, ())) } @@ -120,37 +110,27 @@ fn script_asterisk<'b, 'g, 'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn script_alphanum<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, + _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { let (remaining, _sign) = opt(recognize(one_of("+-")))(input)?; - let (remaining, _script) = many_till( - parser_with_context!(script_alphanum_character)(context), - parser_with_context!(end_script_alphanum_character)(context), - )(remaining)?; + let (remaining, _script) = + many_till(script_alphanum_character, end_script_alphanum_character)(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, source)) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn script_alphanum_character<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { +fn script_alphanum_character<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { recognize(verify(anychar, |c| { c.is_alphanumeric() || r#",.\"#.contains(*c) }))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn end_script_alphanum_character<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { +fn end_script_alphanum_character<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?; - peek(not(parser_with_context!(script_alphanum_character)( - context, - )))(remaining)?; + peek(not(script_alphanum_character))(remaining)?; Ok((remaining, final_char)) } diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index d131370..e36d179 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -325,7 +325,7 @@ fn _text_markup_end<'b, 'g, 'r, 's, 'c>( input: OrgSource<'s>, marker_symbol: &'c str, ) -> Res, OrgSource<'s>> { - not(preceded_by_whitespace)(input)?; + not(preceded_by_whitespace(false))(input)?; let (remaining, _marker) = terminated( tag(marker_symbol), peek(parser_with_context!(post)(context)), diff --git a/src/parser/util.rs b/src/parser/util.rs index c715a50..6464fe8 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -135,16 +135,25 @@ pub fn start_of_line<'s>(input: OrgSource<'s>) -> Res, ()> { } } +pub fn preceded_by_whitespace( + allow_start_of_file: bool, +) -> impl for<'s> Fn(OrgSource<'s>) -> Res, ()> { + move |input| _preceded_by_whitespace(allow_start_of_file, input) +} + /// Check that we are at the start of a line #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn preceded_by_whitespace<'s>(input: OrgSource<'s>) -> Res, ()> { +fn _preceded_by_whitespace<'s>( + allow_start_of_file: bool, + input: OrgSource<'s>, +) -> Res, ()> { let preceding_character = input.get_preceding_character(); if !preceding_character .map(|c| c.is_whitespace() || c == '\u{200B}') // 200B = Zero-width space - .unwrap_or(false) + .unwrap_or(allow_start_of_file) { return Err(nom::Err::Error(CustomError::MyError(MyError( - "Not preceded by whitespace.".into(), + "Must be preceded by a non-whitespace character.".into(), )))); } Ok((input, ())) From 76a81b73ac4de8772da2d7e305e1e9cc3246e0c4 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 02:59:08 -0400 Subject: [PATCH 09/45] Add a detect object function similar to the detect element function. --- src/parser/object_parser.rs | 20 ++++++++++++++++++++ src/parser/plain_text.rs | 6 ++++-- src/parser/subscript_and_superscript.rs | 13 +++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index e3db553..b65f525 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -4,8 +4,11 @@ use nom::combinator::map; use super::org_source::OrgSource; use super::plain_text::plain_text; use super::regular_link::regular_link; +use super::subscript_and_superscript::detect_subscript_or_superscript; use crate::context::parser_with_context; use crate::context::RefContext; +use crate::error::CustomError; +use crate::error::MyError; use crate::error::Res; use crate::parser::angle_link::angle_link; use crate::parser::citation::citation; @@ -165,6 +168,23 @@ pub fn any_object_except_plain_text<'b, 'g, 'r, 's>( Ok((remaining, object)) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_any_object_except_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + if detect_subscript_or_superscript(input).is_ok() { + return Ok((input, ())); + } + if any_object_except_plain_text(context, input).is_ok() { + return Ok((input, ())); + } + + return Err(nom::Err::Error(CustomError::MyError(MyError( + "No object detected.".into(), + )))); +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn regular_link_description_object_set<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 1dbc295..e57f4d1 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -7,7 +7,7 @@ use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many_till; -use super::object_parser::any_object_except_plain_text; +use super::object_parser::detect_any_object_except_plain_text; use super::org_source::OrgSource; use super::radio_link::RematchObject; use super::util::exit_matcher_parser; @@ -46,7 +46,9 @@ fn plain_text_end<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { - recognize(parser_with_context!(any_object_except_plain_text)(context))(input) + recognize(parser_with_context!(detect_any_object_except_plain_text)( + context, + ))(input) } impl<'x> RematchObject<'x> for PlainText<'x> { diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index 00a76cd..e2025e7 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -30,6 +30,19 @@ use crate::types::Object; use crate::types::Subscript; use crate::types::Superscript; +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_subscript_or_superscript<'s>(input: OrgSource<'s>) -> Res, ()> { + // This does not have to detect all valid subscript/superscript but all that it detects must be valid. + let (remaining, _) = one_of("_^")(input)?; + pre(input)?; + if tag::<_, _, CustomError<_>>("*")(remaining).is_ok() { + return Ok((input, ())); + } + let (remaining, _) = opt(one_of("+-"))(remaining)?; + let (_remaining, _) = verify(anychar, |c| c.is_alphanumeric())(remaining)?; + Ok((input, ())) +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn subscript<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, From 69512f559a2734780232adfa31542c052566b66c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 03:40:14 -0400 Subject: [PATCH 10/45] Fix end conditions for subscript and superscript. --- src/parser/subscript_and_superscript.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index e2025e7..c026c6a 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -1,5 +1,6 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::bytes::complete::take_while; use nom::character::complete::anychar; use nom::character::complete::one_of; use nom::combinator::map; @@ -9,6 +10,7 @@ use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many_till; +use nom::sequence::tuple; use super::object_parser::standard_set_object; use super::org_source::BracketDepth; @@ -143,7 +145,10 @@ fn script_alphanum_character<'s>(input: OrgSource<'s>) -> Res, Org #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn end_script_alphanum_character<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?; - peek(not(script_alphanum_character))(remaining)?; + peek(tuple(( + take_while(|c| r#",.\"#.contains(c)), + not(script_alphanum_character), + )))(remaining)?; Ok((remaining, final_char)) } From b0930df7882cc559d54acb15f455dbc4d4bde049 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 7 Sep 2023 04:15:17 -0400 Subject: [PATCH 11/45] Support zero skipped text in OrgSource slicing. --- src/parser/org_source.rs | 5 ++++- src/parser/util.rs | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parser/org_source.rs b/src/parser/org_source.rs index 87f93da..820c01c 100644 --- a/src/parser/org_source.rs +++ b/src/parser/org_source.rs @@ -145,6 +145,9 @@ where if new_end > self.end { panic!("Attempted to extend past the end of the WrappedInput.") } + if new_start == self.start && new_end == self.end { + return self.clone(); + } let skipped_text = &self.full_source[self.start..new_start]; let mut start_of_line = self.start_of_line; @@ -183,7 +186,7 @@ where start: new_start, end: new_end, start_of_line, - preceding_character: skipped_text.chars().last(), + preceding_character: skipped_text.chars().last().or(self.preceding_character), bracket_depth, brace_depth, parenthesis_depth, diff --git a/src/parser/util.rs b/src/parser/util.rs index 6464fe8..6625b86 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -153,7 +153,7 @@ fn _preceded_by_whitespace<'s>( .unwrap_or(allow_start_of_file) { return Err(nom::Err::Error(CustomError::MyError(MyError( - "Must be preceded by a non-whitespace character.".into(), + "Must be preceded by a whitespace character.".into(), )))); } Ok((input, ())) From c2eb1f51c8b641637963d4ff8242dcbaad27b1ae Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 12:41:48 -0400 Subject: [PATCH 12/45] Support blank lines between nested headlines. --- src/parser/document.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/document.rs b/src/parser/document.rs index 8c50dbd..1c036d5 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -328,6 +328,7 @@ fn _heading<'b, 'g, 'r, 's>( let heading_matcher = parser_with_context!(heading(star_count))(context); let (remaining, maybe_section) = opt(map(section_matcher, DocumentElement::Section))(remaining)?; + let (remaining, _ws) = opt(tuple((start_of_line, many0(blank_line))))(remaining)?; let (remaining, mut children) = many0(map(heading_matcher, DocumentElement::Heading))(remaining)?; if let Some(section) = maybe_section { From 57c2922e4a149c11102831f9ebd385ea3015793b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 12:50:51 -0400 Subject: [PATCH 13/45] Add test showing problem is description list parser. --- .../plain_list/description_list_with_double_colon_in_tag.org | 1 + 1 file changed, 1 insertion(+) create mode 100644 org_mode_samples/greater_element/plain_list/description_list_with_double_colon_in_tag.org diff --git a/org_mode_samples/greater_element/plain_list/description_list_with_double_colon_in_tag.org b/org_mode_samples/greater_element/plain_list/description_list_with_double_colon_in_tag.org new file mode 100644 index 0000000..4ba3143 --- /dev/null +++ b/org_mode_samples/greater_element/plain_list/description_list_with_double_colon_in_tag.org @@ -0,0 +1 @@ +- =foo :: bar= :: baz From ab612f293f28f729217551d6579b380541105649 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 13:11:58 -0400 Subject: [PATCH 14/45] Update org-mode version. --- docker/organic_test/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index d3ab62c..f0c31d7 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -14,7 +14,7 @@ RUN make DESTDIR="/root/dist" install FROM build AS build-org-mode -ARG ORG_VERSION=7bdec435ff5d86220d13c431e799c5ed44a57da1 +ARG ORG_VERSION=163bafb43dcc2bc94a2c7ccaa77d3d1dd488f1af COPY --from=build-emacs /root/dist/ / RUN mkdir /root/dist # Savannah does not allow fetching specific revisions, so we're going to have to put unnecessary load on their server by cloning main and then checking out the revision we want. From 40f22034da08e0b57df4f765c74cba297c1be70f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 14:02:15 -0400 Subject: [PATCH 15/45] Make the item tag exit matcher a lower class than all all others. This is to allow for " :: " inside a description list item's tag if it is nested inside another object. --- src/context/context.rs | 2 +- src/context/exiting.rs | 14 ++++---------- src/parser/plain_list.rs | 38 +++++++++++++++++++++++++------------- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/context/context.rs b/src/context/context.rs index 0baa2e8..a485cdf 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -108,7 +108,7 @@ impl<'g, 'r, 's> Context<'g, 'r, 's> { &'r self, i: OrgSource<'s>, ) -> IResult, OrgSource<'s>, CustomError>> { - let mut current_class_filter = ExitClass::Gamma; + let mut current_class_filter = ExitClass::Delta; for current_node in self.iter_context() { let context_element = current_node.get_data(); match context_element { diff --git a/src/context/exiting.rs b/src/context/exiting.rs index 6f8c359..c989a33 100644 --- a/src/context/exiting.rs +++ b/src/context/exiting.rs @@ -1,16 +1,10 @@ #[derive(Debug, Copy, Clone)] pub enum ExitClass { - /// Headlines and sections. Document = 1, - - /// Elements who take priority over beta elements when matching. - Alpha = 20, - - /// Elements who cede priority to alpha elements when matching. - Beta = 300, - - /// Elements who cede priority to alpha and beta elements when matching. - Gamma = 4000, + Alpha = 2, + Beta = 3, + Gamma = 4, + Delta = 5, } impl std::fmt::Display for ExitClass { diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 9dbc117..f74de75 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -152,11 +152,8 @@ pub fn plain_list_item<'b, 'g, 'r, 's>( // TODO: parse checkbox - let (remaining, maybe_tag) = opt(tuple(( - space1, - parser_with_context!(item_tag)(context), - tag(" ::"), - )))(remaining)?; + let (remaining, maybe_tag) = + opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)?; let maybe_contentless_item: Res, OrgSource<'_>> = peek(recognize(tuple((many0(blank_line), eof))))(remaining); match maybe_contentless_item { @@ -170,7 +167,7 @@ pub fn plain_list_item<'b, 'g, 'r, 's>( indentation: indent_level, bullet: bull.into(), tag: maybe_tag - .map(|(_ws, item_tag, _divider)| item_tag) + .map(|(_ws, item_tag)| item_tag) .unwrap_or(Vec::new()), children: Vec::new(), }, @@ -219,7 +216,7 @@ pub fn plain_list_item<'b, 'g, 'r, 's>( indentation: indent_level, bullet: bull.into(), tag: maybe_tag - .map(|(_ws, item_tag, _divider)| item_tag) + .map(|(_ws, item_tag)| item_tag) .unwrap_or(Vec::new()), children: children.into_iter().map(|(_start, item)| item).collect(), }, @@ -313,11 +310,18 @@ fn item_tag<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Vec>> { - let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Gamma, - exit_matcher: &item_tag_end, - }); - let parser_context = context.with_additional_node(&parser_context); + let contexts = [ + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Gamma, + exit_matcher: &item_tag_line_ending_end, + }), + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Delta, + exit_matcher: &item_tag_end, + }), + ]; + let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = parser_context.with_additional_node(&contexts[1]); let (remaining, (children, _exit_contents)) = verify( many_till( // TODO: Should this be using a different set like the minimal set? @@ -326,6 +330,7 @@ fn item_tag<'b, 'g, 'r, 's>( ), |(children, _exit_contents)| !children.is_empty(), )(input)?; + let (remaining, _) = tag(" ::")(remaining)?; Ok((remaining, children)) } @@ -335,12 +340,19 @@ fn item_tag_end<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { recognize(alt(( - line_ending, tag(" :: "), recognize(tuple((tag(" ::"), alt((line_ending, eof))))), )))(input) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn item_tag_line_ending_end<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + line_ending(input) +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_tag_post_gap<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, From 3cc22943879da4c882fbb67bf410d98d1e68ee9d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 15:05:42 -0400 Subject: [PATCH 16/45] Move headlines into their own file. --- src/parser/document.rs | 207 +------------------------------------- src/parser/headline.rs | 222 +++++++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 1 + 3 files changed, 226 insertions(+), 204 deletions(-) create mode 100644 src/parser/headline.rs diff --git a/src/parser/document.rs b/src/parser/document.rs index 1c036d5..44aca17 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,23 +1,13 @@ -use nom::branch::alt; -use nom::bytes::complete::tag; -use nom::character::complete::anychar; -use nom::character::complete::line_ending; -use nom::character::complete::space0; -use nom::character::complete::space1; use nom::combinator::all_consuming; -use nom::combinator::eof; -use nom::combinator::map; -use nom::combinator::not; use nom::combinator::opt; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many0; -use nom::multi::many1; -use nom::multi::many1_count; use nom::multi::many_till; -use nom::multi::separated_list1; use nom::sequence::tuple; +use super::headline::detect_headline; +use super::headline::heading; use super::in_buffer_settings::apply_in_buffer_settings; use super::in_buffer_settings::scan_for_in_buffer_settings; use super::org_source::OrgSource; @@ -25,7 +15,6 @@ use super::token::AllTokensIterator; use super::token::Token; use super::util::exit_matcher_parser; use super::util::get_consumed; -use super::util::start_of_line; use crate::context::parser_with_context; use crate::context::Context; use crate::context::ContextElement; @@ -39,19 +28,15 @@ use crate::error::MyError; use crate::error::Res; use crate::parser::comment::comment; use crate::parser::element_parser::element; -use crate::parser::object_parser::standard_set_object; use crate::parser::org_source::convert_error; use crate::parser::planning::planning; use crate::parser::property_drawer::property_drawer; use crate::parser::util::blank_line; use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::types::Document; -use crate::types::DocumentElement; use crate::types::Element; -use crate::types::Heading; use crate::types::Object; use crate::types::Section; -use crate::types::TodoKeywordType; /// Parse a full org-mode document. /// @@ -245,7 +230,7 @@ fn zeroth_section<'b, 'g, 'r, 's>( } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn section<'b, 'g, 'r, 's>( +pub fn section<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, mut input: OrgSource<'s>, ) -> Res, Section<'s>> { @@ -306,192 +291,6 @@ fn section_end<'b, 'g, 'r, 's>( recognize(detect_headline)(input) } -const fn heading( - parent_stars: usize, -) -> impl for<'b, 'g, 'r, 's> Fn( - RefContext<'b, 'g, 'r, 's>, - OrgSource<'s>, -) -> Res, Heading<'s>> { - move |context, input: OrgSource<'_>| _heading(context, input, parent_stars) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn _heading<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, - parent_stars: usize, -) -> Res, Heading<'s>> { - not(|i| context.check_exit_matcher(i))(input)?; - let (remaining, (star_count, _ws, maybe_todo_keyword, title, heading_tags)) = - headline(context, input, parent_stars)?; - let section_matcher = parser_with_context!(section)(context); - let heading_matcher = parser_with_context!(heading(star_count))(context); - let (remaining, maybe_section) = - opt(map(section_matcher, DocumentElement::Section))(remaining)?; - let (remaining, _ws) = opt(tuple((start_of_line, many0(blank_line))))(remaining)?; - let (remaining, mut children) = - many0(map(heading_matcher, DocumentElement::Heading))(remaining)?; - if let Some(section) = maybe_section { - children.insert(0, section); - } - let remaining = if children.is_empty() { - // Support empty headings - let (remain, _ws) = many0(blank_line)(remaining)?; - remain - } else { - remaining - }; - - let source = get_consumed(input, remaining); - Ok(( - remaining, - Heading { - source: source.into(), - stars: star_count, - todo_keyword: maybe_todo_keyword.map(|((todo_keyword_type, todo_keyword), _ws)| { - (todo_keyword_type, Into::<&str>::into(todo_keyword)) - }), - title, - tags: heading_tags, - children, - }, - )) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn detect_headline<'s>(input: OrgSource<'s>) -> Res, ()> { - tuple((start_of_line, many1(tag("*")), space1))(input)?; - Ok((input, ())) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn headline<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, - parent_stars: usize, -) -> Res< - OrgSource<'s>, - ( - usize, - OrgSource<'s>, - Option<((TodoKeywordType, OrgSource<'s>), OrgSource<'s>)>, - Vec>, - Vec<&'s str>, - ), -> { - let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Document, - exit_matcher: &headline_title_end, - }); - let parser_context = context.with_additional_node(&parser_context); - - let ( - remaining, - (_sol, star_count, ws, maybe_todo_keyword, title, maybe_tags, _ws, _line_ending), - ) = tuple(( - start_of_line, - verify(many1_count(tag("*")), |star_count| { - *star_count > parent_stars - }), - space1, - opt(tuple(( - parser_with_context!(heading_keyword)(&parser_context), - space1, - ))), - many1(parser_with_context!(standard_set_object)(&parser_context)), - opt(tuple((space0, tags))), - space0, - alt((line_ending, eof)), - ))(input)?; - Ok(( - remaining, - ( - star_count, - ws, - maybe_todo_keyword, - title, - maybe_tags - .map(|(_ws, tags)| { - tags.into_iter() - .map(|single_tag| Into::<&str>::into(single_tag)) - .collect() - }) - .unwrap_or(Vec::new()), - ), - )) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn headline_title_end<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { - recognize(tuple(( - opt(tuple((space0, tags, space0))), - alt((line_ending, eof)), - )))(input) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn tags<'s>(input: OrgSource<'s>) -> Res, Vec>> { - let (remaining, (_open, tags, _close)) = - tuple((tag(":"), separated_list1(tag(":"), single_tag), tag(":")))(input)?; - Ok((remaining, tags)) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn single_tag<'r, 's>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { - recognize(many1(verify(anychar, |c| { - c.is_alphanumeric() || "_@#%".contains(*c) - })))(input) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn heading_keyword<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, (TodoKeywordType, OrgSource<'s>)> { - let global_settings = context.get_global_settings(); - if global_settings.in_progress_todo_keywords.is_empty() - && global_settings.complete_todo_keywords.is_empty() - { - alt(( - map(tag("TODO"), |capture| (TodoKeywordType::Todo, capture)), - map(tag("DONE"), |capture| (TodoKeywordType::Done, capture)), - ))(input) - } else { - for todo_keyword in global_settings - .in_progress_todo_keywords - .iter() - .map(String::as_str) - { - let result = tag::<_, _, CustomError<_>>(todo_keyword)(input); - match result { - Ok((remaining, ent)) => { - return Ok((remaining, (TodoKeywordType::Todo, ent))); - } - Err(_) => {} - } - } - for todo_keyword in global_settings - .complete_todo_keywords - .iter() - .map(String::as_str) - { - let result = tag::<_, _, CustomError<_>>(todo_keyword)(input); - match result { - Ok((remaining, ent)) => { - return Ok((remaining, (TodoKeywordType::Done, ent))); - } - Err(_) => {} - } - } - Err(nom::Err::Error(CustomError::MyError(MyError( - "NoTodoKeyword".into(), - )))) - } -} - impl<'s> Document<'s> { pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { AllTokensIterator::new(Token::Document(self)) diff --git a/src/parser/headline.rs b/src/parser/headline.rs new file mode 100644 index 0000000..0146bad --- /dev/null +++ b/src/parser/headline.rs @@ -0,0 +1,222 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::anychar; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::character::complete::space1; +use nom::combinator::eof; +use nom::combinator::map; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many0; +use nom::multi::many1; +use nom::multi::many1_count; +use nom::multi::separated_list1; +use nom::sequence::tuple; + +use super::document::section; +use super::org_source::OrgSource; +use super::util::get_consumed; +use super::util::start_of_line; +use crate::context::parser_with_context; +use crate::context::ContextElement; +use crate::context::ExitClass; +use crate::context::ExitMatcherNode; +use crate::context::RefContext; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; +use crate::parser::object_parser::standard_set_object; +use crate::parser::util::blank_line; +use crate::types::DocumentElement; +use crate::types::Heading; +use crate::types::Object; +use crate::types::TodoKeywordType; + +pub const fn heading( + parent_stars: usize, +) -> impl for<'b, 'g, 'r, 's> Fn( + RefContext<'b, 'g, 'r, 's>, + OrgSource<'s>, +) -> Res, Heading<'s>> { + move |context, input: OrgSource<'_>| _heading(context, input, parent_stars) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn _heading<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, + parent_stars: usize, +) -> Res, Heading<'s>> { + not(|i| context.check_exit_matcher(i))(input)?; + let (remaining, (star_count, _ws, maybe_todo_keyword, title, heading_tags)) = + headline(context, input, parent_stars)?; + let section_matcher = parser_with_context!(section)(context); + let heading_matcher = parser_with_context!(heading(star_count))(context); + let (remaining, maybe_section) = + opt(map(section_matcher, DocumentElement::Section))(remaining)?; + let (remaining, _ws) = opt(tuple((start_of_line, many0(blank_line))))(remaining)?; + let (remaining, mut children) = + many0(map(heading_matcher, DocumentElement::Heading))(remaining)?; + if let Some(section) = maybe_section { + children.insert(0, section); + } + let remaining = if children.is_empty() { + // Support empty headings + let (remain, _ws) = many0(blank_line)(remaining)?; + remain + } else { + remaining + }; + + let source = get_consumed(input, remaining); + Ok(( + remaining, + Heading { + source: source.into(), + stars: star_count, + todo_keyword: maybe_todo_keyword.map(|((todo_keyword_type, todo_keyword), _ws)| { + (todo_keyword_type, Into::<&str>::into(todo_keyword)) + }), + title, + tags: heading_tags, + children, + }, + )) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_headline<'s>(input: OrgSource<'s>) -> Res, ()> { + tuple((start_of_line, many1(tag("*")), space1))(input)?; + Ok((input, ())) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn headline<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, + parent_stars: usize, +) -> Res< + OrgSource<'s>, + ( + usize, + OrgSource<'s>, + Option<((TodoKeywordType, OrgSource<'s>), OrgSource<'s>)>, + Vec>, + Vec<&'s str>, + ), +> { + let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Document, + exit_matcher: &headline_title_end, + }); + let parser_context = context.with_additional_node(&parser_context); + + let ( + remaining, + (_sol, star_count, ws, maybe_todo_keyword, title, maybe_tags, _ws, _line_ending), + ) = tuple(( + start_of_line, + verify(many1_count(tag("*")), |star_count| { + *star_count > parent_stars + }), + space1, + opt(tuple(( + parser_with_context!(heading_keyword)(&parser_context), + space1, + ))), + many1(parser_with_context!(standard_set_object)(&parser_context)), + opt(tuple((space0, tags))), + space0, + alt((line_ending, eof)), + ))(input)?; + Ok(( + remaining, + ( + star_count, + ws, + maybe_todo_keyword, + title, + maybe_tags + .map(|(_ws, tags)| { + tags.into_iter() + .map(|single_tag| Into::<&str>::into(single_tag)) + .collect() + }) + .unwrap_or(Vec::new()), + ), + )) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn headline_title_end<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + recognize(tuple(( + opt(tuple((space0, tags, space0))), + alt((line_ending, eof)), + )))(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn tags<'s>(input: OrgSource<'s>) -> Res, Vec>> { + let (remaining, (_open, tags, _close)) = + tuple((tag(":"), separated_list1(tag(":"), single_tag), tag(":")))(input)?; + Ok((remaining, tags)) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn single_tag<'r, 's>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + recognize(many1(verify(anychar, |c| { + c.is_alphanumeric() || "_@#%".contains(*c) + })))(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn heading_keyword<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, (TodoKeywordType, OrgSource<'s>)> { + let global_settings = context.get_global_settings(); + if global_settings.in_progress_todo_keywords.is_empty() + && global_settings.complete_todo_keywords.is_empty() + { + alt(( + map(tag("TODO"), |capture| (TodoKeywordType::Todo, capture)), + map(tag("DONE"), |capture| (TodoKeywordType::Done, capture)), + ))(input) + } else { + for todo_keyword in global_settings + .in_progress_todo_keywords + .iter() + .map(String::as_str) + { + let result = tag::<_, _, CustomError<_>>(todo_keyword)(input); + match result { + Ok((remaining, ent)) => { + return Ok((remaining, (TodoKeywordType::Todo, ent))); + } + Err(_) => {} + } + } + for todo_keyword in global_settings + .complete_todo_keywords + .iter() + .map(String::as_str) + { + let result = tag::<_, _, CustomError<_>>(todo_keyword)(input); + match result { + Ok((remaining, ent)) => { + return Ok((remaining, (TodoKeywordType::Done, ent))); + } + Err(_) => {} + } + } + Err(nom::Err::Error(CustomError::MyError(MyError( + "NoTodoKeyword".into(), + )))) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0b95974..f402d2a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14,6 +14,7 @@ mod fixed_width_area; mod footnote_definition; mod footnote_reference; mod greater_block; +mod headline; mod horizontal_rule; mod in_buffer_settings; mod inline_babel_call; From 2e6e6fdd2bdc33bad48fa98e2a63b0e9cc3a04cf Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 15:08:16 -0400 Subject: [PATCH 17/45] Move sections to their own source file. --- src/parser/document.rs | 137 +------------------------------------- src/parser/headline.rs | 2 +- src/parser/mod.rs | 1 + src/parser/section.rs | 146 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 137 deletions(-) create mode 100644 src/parser/section.rs diff --git a/src/parser/document.rs b/src/parser/document.rs index 44aca17..027ba70 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,42 +1,28 @@ use nom::combinator::all_consuming; use nom::combinator::opt; -use nom::combinator::recognize; -use nom::combinator::verify; use nom::multi::many0; -use nom::multi::many_till; -use nom::sequence::tuple; -use super::headline::detect_headline; use super::headline::heading; use super::in_buffer_settings::apply_in_buffer_settings; use super::in_buffer_settings::scan_for_in_buffer_settings; use super::org_source::OrgSource; +use super::section::zeroth_section; use super::token::AllTokensIterator; use super::token::Token; -use super::util::exit_matcher_parser; use super::util::get_consumed; use crate::context::parser_with_context; use crate::context::Context; use crate::context::ContextElement; -use crate::context::ExitClass; -use crate::context::ExitMatcherNode; use crate::context::GlobalSettings; use crate::context::List; use crate::context::RefContext; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; -use crate::parser::comment::comment; -use crate::parser::element_parser::element; use crate::parser::org_source::convert_error; -use crate::parser::planning::planning; -use crate::parser::property_drawer::property_drawer; use crate::parser::util::blank_line; -use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::types::Document; -use crate::types::Element; use crate::types::Object; -use crate::types::Section; /// Parse a full org-mode document. /// @@ -170,127 +156,6 @@ fn _document<'b, 'g, 'r, 's>( )) } -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn zeroth_section<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, Section<'s>> { - // TODO: The zeroth section is specialized so it probably needs its own parser - let contexts = [ - ContextElement::ConsumeTrailingWhitespace(true), - ContextElement::Context("section"), - ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Document, - exit_matcher: §ion_end, - }), - ]; - let parser_context = context.with_additional_node(&contexts[0]); - let parser_context = parser_context.with_additional_node(&contexts[1]); - let parser_context = parser_context.with_additional_node(&contexts[2]); - let without_consuming_whitespace_context = ContextElement::ConsumeTrailingWhitespace(false); - let without_consuming_whitespace_context = - parser_context.with_additional_node(&without_consuming_whitespace_context); - - let element_matcher = parser_with_context!(element(true))(&parser_context); - let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - - let (remaining, comment_and_property_drawer_element) = opt(tuple(( - opt(parser_with_context!(comment)( - &without_consuming_whitespace_context, - )), - parser_with_context!(property_drawer)(context), - many0(blank_line), - )))(input)?; - - let (remaining, (mut children, _exit_contents)) = verify( - many_till(element_matcher, exit_matcher), - |(children, _exit_contents)| { - !children.is_empty() || comment_and_property_drawer_element.is_some() - }, - )(remaining)?; - - comment_and_property_drawer_element.map(|(comment, property_drawer, _ws)| { - children.insert(0, Element::PropertyDrawer(property_drawer)); - comment - .map(Element::Comment) - .map(|ele| children.insert(0, ele)); - }); - - let (remaining, _trailing_ws) = - maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; - - let source = get_consumed(input, remaining); - Ok(( - remaining, - Section { - source: source.into(), - children, - }, - )) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn section<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - mut input: OrgSource<'s>, -) -> Res, Section<'s>> { - // TODO: The zeroth section is specialized so it probably needs its own parser - let contexts = [ - ContextElement::ConsumeTrailingWhitespace(true), - ContextElement::Context("section"), - ContextElement::ExitMatcherNode(ExitMatcherNode { - class: ExitClass::Document, - exit_matcher: §ion_end, - }), - ]; - let parser_context = context.with_additional_node(&contexts[0]); - let parser_context = parser_context.with_additional_node(&contexts[1]); - let parser_context = parser_context.with_additional_node(&contexts[2]); - let element_matcher = parser_with_context!(element(true))(&parser_context); - let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (mut remaining, (planning_element, property_drawer_element)) = tuple(( - opt(parser_with_context!(planning)(&parser_context)), - opt(parser_with_context!(property_drawer)(&parser_context)), - ))(input)?; - if planning_element.is_none() && property_drawer_element.is_none() { - let (remain, _ws) = many0(blank_line)(remaining)?; - remaining = remain; - input = remain; - } - let (remaining, (mut children, _exit_contents)) = verify( - many_till(element_matcher, exit_matcher), - |(children, _exit_contents)| { - !children.is_empty() || property_drawer_element.is_some() || planning_element.is_some() - }, - )(remaining)?; - property_drawer_element - .map(Element::PropertyDrawer) - .map(|ele| children.insert(0, ele)); - planning_element - .map(Element::Planning) - .map(|ele| children.insert(0, ele)); - - let (remaining, _trailing_ws) = - maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; - - let source = get_consumed(input, remaining); - Ok(( - remaining, - Section { - source: source.into(), - children, - }, - )) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn section_end<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { - recognize(detect_headline)(input) -} - impl<'s> Document<'s> { pub fn iter_tokens<'r>(&'r self) -> impl Iterator> { AllTokensIterator::new(Token::Document(self)) diff --git a/src/parser/headline.rs b/src/parser/headline.rs index 0146bad..c0b6702 100644 --- a/src/parser/headline.rs +++ b/src/parser/headline.rs @@ -16,8 +16,8 @@ use nom::multi::many1_count; use nom::multi::separated_list1; use nom::sequence::tuple; -use super::document::section; use super::org_source::OrgSource; +use super::section::section; use super::util::get_consumed; use super::util::start_of_line; use crate::context::parser_with_context; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f402d2a..30b685a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -36,6 +36,7 @@ mod planning; mod property_drawer; mod radio_link; mod regular_link; +mod section; pub mod sexp; mod statistics_cookie; mod subscript_and_superscript; diff --git a/src/parser/section.rs b/src/parser/section.rs new file mode 100644 index 0000000..33b1685 --- /dev/null +++ b/src/parser/section.rs @@ -0,0 +1,146 @@ +use nom::combinator::opt; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many0; +use nom::multi::many_till; +use nom::sequence::tuple; + +use super::headline::detect_headline; +use super::org_source::OrgSource; +use super::util::exit_matcher_parser; +use super::util::get_consumed; +use crate::context::parser_with_context; +use crate::context::ContextElement; +use crate::context::ExitClass; +use crate::context::ExitMatcherNode; +use crate::context::RefContext; +use crate::error::Res; +use crate::parser::comment::comment; +use crate::parser::element_parser::element; +use crate::parser::planning::planning; +use crate::parser::property_drawer::property_drawer; +use crate::parser::util::blank_line; +use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; +use crate::types::Element; +use crate::types::Section; + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn zeroth_section<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Section<'s>> { + // TODO: The zeroth section is specialized so it probably needs its own parser + let contexts = [ + ContextElement::ConsumeTrailingWhitespace(true), + ContextElement::Context("section"), + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Document, + exit_matcher: §ion_end, + }), + ]; + let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = parser_context.with_additional_node(&contexts[1]); + let parser_context = parser_context.with_additional_node(&contexts[2]); + let without_consuming_whitespace_context = ContextElement::ConsumeTrailingWhitespace(false); + let without_consuming_whitespace_context = + parser_context.with_additional_node(&without_consuming_whitespace_context); + + let element_matcher = parser_with_context!(element(true))(&parser_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + + let (remaining, comment_and_property_drawer_element) = opt(tuple(( + opt(parser_with_context!(comment)( + &without_consuming_whitespace_context, + )), + parser_with_context!(property_drawer)(context), + many0(blank_line), + )))(input)?; + + let (remaining, (mut children, _exit_contents)) = verify( + many_till(element_matcher, exit_matcher), + |(children, _exit_contents)| { + !children.is_empty() || comment_and_property_drawer_element.is_some() + }, + )(remaining)?; + + comment_and_property_drawer_element.map(|(comment, property_drawer, _ws)| { + children.insert(0, Element::PropertyDrawer(property_drawer)); + comment + .map(Element::Comment) + .map(|ele| children.insert(0, ele)); + }); + + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; + + let source = get_consumed(input, remaining); + Ok(( + remaining, + Section { + source: source.into(), + children, + }, + )) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn section<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + mut input: OrgSource<'s>, +) -> Res, Section<'s>> { + // TODO: The zeroth section is specialized so it probably needs its own parser + let contexts = [ + ContextElement::ConsumeTrailingWhitespace(true), + ContextElement::Context("section"), + ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Document, + exit_matcher: §ion_end, + }), + ]; + let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = parser_context.with_additional_node(&contexts[1]); + let parser_context = parser_context.with_additional_node(&contexts[2]); + let element_matcher = parser_with_context!(element(true))(&parser_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let (mut remaining, (planning_element, property_drawer_element)) = tuple(( + opt(parser_with_context!(planning)(&parser_context)), + opt(parser_with_context!(property_drawer)(&parser_context)), + ))(input)?; + if planning_element.is_none() && property_drawer_element.is_none() { + let (remain, _ws) = many0(blank_line)(remaining)?; + remaining = remain; + input = remain; + } + let (remaining, (mut children, _exit_contents)) = verify( + many_till(element_matcher, exit_matcher), + |(children, _exit_contents)| { + !children.is_empty() || property_drawer_element.is_some() || planning_element.is_some() + }, + )(remaining)?; + property_drawer_element + .map(Element::PropertyDrawer) + .map(|ele| children.insert(0, ele)); + planning_element + .map(Element::Planning) + .map(|ele| children.insert(0, ele)); + + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; + + let source = get_consumed(input, remaining); + Ok(( + remaining, + Section { + source: source.into(), + children, + }, + )) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn section_end<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + recognize(detect_headline)(input) +} From b32c21eb1d64cfbec79597636cb6be893c5b5847 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 15:59:56 -0400 Subject: [PATCH 18/45] Add a test for a comment heading. --- org_mode_samples/sections_and_headings/comment_heading.org | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 org_mode_samples/sections_and_headings/comment_heading.org diff --git a/org_mode_samples/sections_and_headings/comment_heading.org b/org_mode_samples/sections_and_headings/comment_heading.org new file mode 100644 index 0000000..76a4ce9 --- /dev/null +++ b/org_mode_samples/sections_and_headings/comment_heading.org @@ -0,0 +1,2 @@ +* TODO [#A] COMMENT foo bar +baz From c7c0deed74613c648ae34e3ff42ecdd5148dc936 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 15:43:13 -0400 Subject: [PATCH 19/45] Parse priority cookie and COMMENT from headlines. --- Cargo.toml | 2 +- src/parser/headline.rs | 46 +++++++++++++++++++++++++++++++++++++----- src/types/document.rs | 6 +++++- src/types/mod.rs | 1 + 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b233792..e8981ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ tracing-subscriber = { version = "0.3.17", optional = true, features = ["env-fil walkdir = "2.3.3" [features] -default = [] +default = ["compare"] compare = [] tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"] diff --git a/src/parser/headline.rs b/src/parser/headline.rs index c0b6702..6be9159 100644 --- a/src/parser/headline.rs +++ b/src/parser/headline.rs @@ -33,6 +33,7 @@ use crate::parser::util::blank_line; use crate::types::DocumentElement; use crate::types::Heading; use crate::types::Object; +use crate::types::PriorityCookie; use crate::types::TodoKeywordType; pub const fn heading( @@ -51,8 +52,10 @@ fn _heading<'b, 'g, 'r, 's>( parent_stars: usize, ) -> Res, Heading<'s>> { not(|i| context.check_exit_matcher(i))(input)?; - let (remaining, (star_count, _ws, maybe_todo_keyword, title, heading_tags)) = - headline(context, input, parent_stars)?; + let ( + remaining, + (star_count, maybe_todo_keyword, maybe_priority, maybe_comment, title, heading_tags), + ) = headline(context, input, parent_stars)?; let section_matcher = parser_with_context!(section)(context); let heading_matcher = parser_with_context!(heading(star_count))(context); let (remaining, maybe_section) = @@ -70,6 +73,7 @@ fn _heading<'b, 'g, 'r, 's>( } else { remaining }; + let is_archived = heading_tags.contains(&"ARCHIVE"); let source = get_consumed(input, remaining); Ok(( @@ -80,9 +84,12 @@ fn _heading<'b, 'g, 'r, 's>( todo_keyword: maybe_todo_keyword.map(|((todo_keyword_type, todo_keyword), _ws)| { (todo_keyword_type, Into::<&str>::into(todo_keyword)) }), + priority_cookie: maybe_priority.map(|(priority, _)| priority), title, tags: heading_tags, children, + is_comment: maybe_comment.is_some(), + is_archived, }, )) } @@ -102,8 +109,9 @@ fn headline<'b, 'g, 'r, 's>( OrgSource<'s>, ( usize, - OrgSource<'s>, Option<((TodoKeywordType, OrgSource<'s>), OrgSource<'s>)>, + Option<(PriorityCookie, OrgSource<'s>)>, + Option<(OrgSource<'s>, OrgSource<'s>)>, Vec>, Vec<&'s str>, ), @@ -116,7 +124,18 @@ fn headline<'b, 'g, 'r, 's>( let ( remaining, - (_sol, star_count, ws, maybe_todo_keyword, title, maybe_tags, _ws, _line_ending), + ( + _, + star_count, + _, + maybe_todo_keyword, + maybe_priority, + maybe_comment, + title, + maybe_tags, + _, + _, + ), ) = tuple(( start_of_line, verify(many1_count(tag("*")), |star_count| { @@ -127,6 +146,8 @@ fn headline<'b, 'g, 'r, 's>( parser_with_context!(heading_keyword)(&parser_context), space1, ))), + opt(tuple((priority_cookie, space1))), + opt(tuple((tag("COMMENT"), space1))), many1(parser_with_context!(standard_set_object)(&parser_context)), opt(tuple((space0, tags))), space0, @@ -136,8 +157,9 @@ fn headline<'b, 'g, 'r, 's>( remaining, ( star_count, - ws, maybe_todo_keyword, + maybe_priority, + maybe_comment, title, maybe_tags .map(|(_ws, tags)| { @@ -220,3 +242,17 @@ fn heading_keyword<'b, 'g, 'r, 's>( )))) } } + +fn priority_cookie<'s>(input: OrgSource<'s>) -> Res, PriorityCookie> { + let (remaining, (_, priority_character, _)) = tuple(( + tag("[#"), + verify(anychar, |c| c.is_alphanumeric()), + tag("]"), + ))(input)?; + let cookie = PriorityCookie::try_from(priority_character).map_err(|_| { + nom::Err::Error(CustomError::MyError(MyError( + "Failed to cast priority cookie to number.".into(), + ))) + })?; + Ok((remaining, cookie)) +} diff --git a/src/types/document.rs b/src/types/document.rs index 654377a..142762d 100644 --- a/src/types/document.rs +++ b/src/types/document.rs @@ -2,6 +2,8 @@ use super::Element; use super::Object; use super::Source; +pub type PriorityCookie = u8; + #[derive(Debug)] pub struct Document<'s> { pub source: &'s str, @@ -14,10 +16,12 @@ pub struct Heading<'s> { pub source: &'s str, pub stars: usize, pub todo_keyword: Option<(TodoKeywordType, &'s str)>, - // TODO: add todo-type enum + pub priority_cookie: Option, pub title: Vec>, pub tags: Vec<&'s str>, pub children: Vec>, + pub is_comment: bool, + pub is_archived: bool, } #[derive(Debug)] diff --git a/src/types/mod.rs b/src/types/mod.rs index efd1b04..9cf5b59 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -7,6 +7,7 @@ mod source; pub use document::Document; pub use document::DocumentElement; pub use document::Heading; +pub use document::PriorityCookie; pub use document::Section; pub use document::TodoKeywordType; pub use element::Element; From 93d3d9471fa2e8e2bf3ff3d6b7d814b6b97ac77c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 15:57:24 -0400 Subject: [PATCH 20/45] Compare priority, archived, and commented in headlines. --- src/compare/diff.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++- src/compare/util.rs | 5 +++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 0c881a5..bc812d1 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -45,6 +45,7 @@ use crate::types::PlainList; use crate::types::PlainListItem; use crate::types::PlainText; use crate::types::Planning; +use crate::types::PriorityCookie; use crate::types::PropertyDrawer; use crate::types::RadioLink; use crate::types::RadioTarget; @@ -553,7 +554,57 @@ fn compare_heading<'s>( .collect::, _>>()?; child_status.push(artificial_diff_scope("title".to_owned(), title_status)?); - // TODO: Compare priority, :footnote-section-p, :archivedp, :commentedp + // Compare priority + let priority = get_property(emacs, ":priority")?; + match (priority, rust.priority_cookie) { + (None, None) => {} + (None, Some(_)) | (Some(_), None) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Priority cookie mismatch (emacs != rust) {:?} != {:?}", + priority, rust.priority_cookie + )); + } + (Some(emacs_priority_cookie), Some(rust_priority_cookie)) => { + let emacs_priority_cookie = + emacs_priority_cookie.as_atom()?.parse::()?; + if emacs_priority_cookie != rust_priority_cookie { + this_status = DiffStatus::Bad; + message = Some(format!( + "Priority cookie mismatch (emacs != rust) {:?} != {:?}", + emacs_priority_cookie, rust_priority_cookie + )); + } + } + } + + // Compare archived + let archived = get_property(emacs, ":archivedp")?; + match (archived, rust.is_archived) { + (None, true) | (Some(_), false) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "archived mismatch (emacs != rust) {:?} != {:?}", + archived, rust.is_archived + )); + } + (None, false) | (Some(_), true) => {} + } + + // Compare commented + let commented = get_property(emacs, ":commentedp")?; + match (commented, rust.is_comment) { + (None, true) | (Some(_), false) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "commented mismatch (emacs != rust) {:?} != {:?}", + commented, rust.is_comment + )); + } + (None, false) | (Some(_), true) => {} + } + + // TODO: Compare :footnote-section-p // Compare section let section_status = children diff --git a/src/compare/util.rs b/src/compare/util.rs index 6367bf9..173ecd0 100644 --- a/src/compare/util.rs +++ b/src/compare/util.rs @@ -141,6 +141,11 @@ fn maybe_token_to_usize( .map_or(Ok(None), |r| r.map(Some))?) } +/// Get a named property from the emacs token. +/// +/// Returns Ok(None) if value is nil. +/// +/// Returns error if the attribute is not specified on the token at all. pub fn get_property<'s, 'x>( emacs: &'s Token<'s>, key: &'x str, From dc8b8d08abd6f1394d65903bc17f8fe38e1ae311 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 16:25:18 -0400 Subject: [PATCH 21/45] Add test showing we break on empty sections that contain a planning. --- .../section_with_planning_and_whitespace.org | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 org_mode_samples/sections_and_headings/section_with_planning_and_whitespace.org diff --git a/org_mode_samples/sections_and_headings/section_with_planning_and_whitespace.org b/org_mode_samples/sections_and_headings/section_with_planning_and_whitespace.org new file mode 100644 index 0000000..24ba56d --- /dev/null +++ b/org_mode_samples/sections_and_headings/section_with_planning_and_whitespace.org @@ -0,0 +1,4 @@ +* DONE foo + DEADLINE: <2023-09-08 Fri> + +* DONE bar From 8780976c15d356408ffb83ff250d4a410b80a858 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 16:30:40 -0400 Subject: [PATCH 22/45] Consume trailing whitespace after planning. --- src/parser/planning.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser/planning.rs b/src/parser/planning.rs index 77e70c1..10864ab 100644 --- a/src/parser/planning.rs +++ b/src/parser/planning.rs @@ -10,6 +10,7 @@ use nom::multi::separated_list1; use nom::sequence::tuple; use super::org_source::OrgSource; +use super::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::context::RefContext; use crate::error::Res; use crate::parser::util::get_consumed; @@ -18,7 +19,7 @@ use crate::types::Planning; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn planning<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, + context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Planning<'s>> { start_of_line(input)?; @@ -26,6 +27,8 @@ pub fn planning<'b, 'g, 'r, 's>( let (remaining, _planning_parameters) = separated_list1(space1, planning_parameter)(remaining)?; let (remaining, _trailing_ws) = tuple((space0, alt((line_ending, eof))))(remaining)?; + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok(( From 0056657b65b50e1372dfddf1440b522ac9675f37 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 17:27:02 -0400 Subject: [PATCH 23/45] Add a test showing the plain text parser is not handling subsets of objects like inside a table cell. --- .../greater_element/table/cells_with_objects.org | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 org_mode_samples/greater_element/table/cells_with_objects.org diff --git a/org_mode_samples/greater_element/table/cells_with_objects.org b/org_mode_samples/greater_element/table/cells_with_objects.org new file mode 100644 index 0000000..ffc814b --- /dev/null +++ b/org_mode_samples/greater_element/table/cells_with_objects.org @@ -0,0 +1,6 @@ +src_elisp{(bar)} +*src_elisp{(bar)}* + +| foo *bar* | +| foo src_elisp{(bar)} | +| foo *src_elisp{(bar)}* | From 669da4073ea94746582d2bd254cb24faf50190b0 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 17:45:49 -0400 Subject: [PATCH 24/45] Accept the end condition as a parameter to the plain text parser so it can adapt to the context. --- src/parser/object_parser.rs | 179 +++++++++++++++++++++++------------- src/parser/plain_text.rs | 49 ++++++---- src/parser/regular_link.rs | 4 +- 3 files changed, 152 insertions(+), 80 deletions(-) diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index b65f525..5542618 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -37,54 +37,11 @@ pub fn standard_set_object<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, Object<'s>> { let (remaining, object) = alt(( - map(parser_with_context!(timestamp)(context), Object::Timestamp), - map(parser_with_context!(subscript)(context), Object::Subscript), + parser_with_context!(standard_set_object_sans_plain_text)(context), map( - parser_with_context!(superscript)(context), - Object::Superscript, + parser_with_context!(plain_text(detect_standard_set_object_sans_plain_text))(context), + Object::PlainText, ), - map( - parser_with_context!(statistics_cookie)(context), - Object::StatisticsCookie, - ), - map(parser_with_context!(target)(context), Object::Target), - map(parser_with_context!(line_break)(context), Object::LineBreak), - map( - parser_with_context!(inline_source_block)(context), - Object::InlineSourceBlock, - ), - map( - parser_with_context!(inline_babel_call)(context), - Object::InlineBabelCall, - ), - map(parser_with_context!(citation)(context), Object::Citation), - map( - parser_with_context!(footnote_reference)(context), - Object::FootnoteReference, - ), - map( - parser_with_context!(export_snippet)(context), - Object::ExportSnippet, - ), - map(parser_with_context!(entity)(context), Object::Entity), - map( - parser_with_context!(latex_fragment)(context), - Object::LatexFragment, - ), - map(parser_with_context!(radio_link)(context), Object::RadioLink), - map( - parser_with_context!(radio_target)(context), - Object::RadioTarget, - ), - parser_with_context!(text_markup)(context), - map( - parser_with_context!(regular_link)(context), - Object::RegularLink, - ), - map(parser_with_context!(plain_link)(context), Object::PlainLink), - map(parser_with_context!(angle_link)(context), Object::AngleLink), - map(parser_with_context!(org_macro)(context), Object::OrgMacro), - map(parser_with_context!(plain_text)(context), Object::PlainText), ))(input)?; Ok((remaining, object)) } @@ -95,24 +52,17 @@ pub fn minimal_set_object<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, Object<'s>> { let (remaining, object) = alt(( - map(parser_with_context!(subscript)(context), Object::Subscript), + parser_with_context!(minimal_set_object_sans_plain_text)(context), map( - parser_with_context!(superscript)(context), - Object::Superscript, + parser_with_context!(plain_text(detect_minimal_set_object_sans_plain_text))(context), + Object::PlainText, ), - map(parser_with_context!(entity)(context), Object::Entity), - map( - parser_with_context!(latex_fragment)(context), - Object::LatexFragment, - ), - parser_with_context!(text_markup)(context), - map(parser_with_context!(plain_text)(context), Object::PlainText), ))(input)?; Ok((remaining, object)) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn any_object_except_plain_text<'b, 'g, 'r, 's>( +fn standard_set_object_sans_plain_text<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Object<'s>> { @@ -169,14 +119,35 @@ pub fn any_object_except_plain_text<'b, 'g, 'r, 's>( } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn detect_any_object_except_plain_text<'b, 'g, 'r, 's>( +fn minimal_set_object_sans_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Object<'s>> { + let (remaining, object) = alt(( + map(parser_with_context!(subscript)(context), Object::Subscript), + map( + parser_with_context!(superscript)(context), + Object::Superscript, + ), + map(parser_with_context!(entity)(context), Object::Entity), + map( + parser_with_context!(latex_fragment)(context), + Object::LatexFragment, + ), + parser_with_context!(text_markup)(context), + ))(input)?; + Ok((remaining, object)) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_standard_set_object_sans_plain_text<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { if detect_subscript_or_superscript(input).is_ok() { return Ok((input, ())); } - if any_object_except_plain_text(context, input).is_ok() { + if standard_set_object_sans_plain_text(context, input).is_ok() { return Ok((input, ())); } @@ -186,7 +157,42 @@ pub fn detect_any_object_except_plain_text<'b, 'g, 'r, 's>( } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn regular_link_description_object_set<'b, 'g, 'r, 's>( +fn detect_minimal_set_object_sans_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + if detect_subscript_or_superscript(input).is_ok() { + return Ok((input, ())); + } + if minimal_set_object_sans_plain_text(context, input).is_ok() { + return Ok((input, ())); + } + + return Err(nom::Err::Error(CustomError::MyError(MyError( + "No object detected.".into(), + )))); +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn regular_link_description_set_object<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Object<'s>> { + // TODO: It can also contain another link, but only when it is a plain or angle link. It can contain square brackets, but not ]] + let (remaining, object) = alt(( + parser_with_context!(regular_link_description_set_object_sans_plain_text)(context), + map( + parser_with_context!(plain_text( + detect_regular_link_description_set_object_sans_plain_text + ))(context), + Object::PlainText, + ), + ))(input)?; + Ok((remaining, object)) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn regular_link_description_set_object_sans_plain_text<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Object<'s>> { @@ -209,15 +215,47 @@ pub fn regular_link_description_object_set<'b, 'g, 'r, 's>( Object::InlineBabelCall, ), map(parser_with_context!(org_macro)(context), Object::OrgMacro), - parser_with_context!(minimal_set_object)(context), + parser_with_context!(minimal_set_object_sans_plain_text)(context), ))(input)?; Ok((remaining, object)) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_regular_link_description_set_object_sans_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + if detect_subscript_or_superscript(input).is_ok() { + return Ok((input, ())); + } + if regular_link_description_set_object_sans_plain_text(context, input).is_ok() { + return Ok((input, ())); + } + + return Err(nom::Err::Error(CustomError::MyError(MyError( + "No object detected.".into(), + )))); +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn table_cell_set_object<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, +) -> Res, Object<'s>> { + let (remaining, object) = alt(( + parser_with_context!(table_cell_set_object_sans_plain_text)(context), + map( + parser_with_context!(plain_text(detect_table_cell_set_object_sans_plain_text))(context), + Object::PlainText, + ), + ))(input)?; + Ok((remaining, object)) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn table_cell_set_object_sans_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, ) -> Res, Object<'s>> { let (remaining, object) = alt(( map(parser_with_context!(citation)(context), Object::Citation), @@ -243,7 +281,24 @@ pub fn table_cell_set_object<'b, 'g, 'r, 's>( ), map(parser_with_context!(target)(context), Object::Target), map(parser_with_context!(timestamp)(context), Object::Timestamp), - parser_with_context!(minimal_set_object)(context), + parser_with_context!(minimal_set_object_sans_plain_text)(context), ))(input)?; Ok((remaining, object)) } + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn detect_table_cell_set_object_sans_plain_text<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + if detect_subscript_or_superscript(input).is_ok() { + return Ok((input, ())); + } + if table_cell_set_object_sans_plain_text(context, input).is_ok() { + return Ok((input, ())); + } + + return Err(nom::Err::Error(CustomError::MyError(MyError( + "No object detected.".into(), + )))); +} diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index e57f4d1..b50bd13 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -7,7 +7,6 @@ use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many_till; -use super::object_parser::detect_any_object_except_plain_text; use super::org_source::OrgSource; use super::radio_link::RematchObject; use super::util::exit_matcher_parser; @@ -17,17 +16,42 @@ use crate::error::Res; use crate::types::Object; use crate::types::PlainText; -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub fn plain_text<'b, 'g, 'r, 's>( +pub fn plain_text( + end_condition: F, +) -> impl for<'b, 'g, 'r, 's> Fn( + RefContext<'b, 'g, 'r, 's>, + OrgSource<'s>, +) -> Res, PlainText<'s>> +where + F: for<'bb, 'gg, 'rr, 'ss> Fn( + RefContext<'bb, 'gg, 'rr, 'ss>, + OrgSource<'ss>, + ) -> Res, ()>, +{ + move |context, input| _plain_text(&end_condition, context, input) +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(end_condition)) +)] +fn _plain_text<'b, 'g, 'r, 's, F>( + end_condition: F, context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, -) -> Res, PlainText<'s>> { +) -> Res, PlainText<'s>> +where + F: for<'bb, 'gg, 'rr, 'ss> Fn( + RefContext<'bb, 'gg, 'rr, 'ss>, + OrgSource<'ss>, + ) -> Res, ()>, +{ let (remaining, source) = recognize(verify( many_till( anychar, peek(alt(( parser_with_context!(exit_matcher_parser)(context), - parser_with_context!(plain_text_end)(context), + recognize(parser_with_context!(end_condition)(context)), ))), ), |(children, _exit_contents)| !children.is_empty(), @@ -41,16 +65,6 @@ pub fn plain_text<'b, 'g, 'r, 's>( )) } -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn plain_text_end<'b, 'g, 'r, 's>( - context: RefContext<'b, 'g, 'r, 's>, - input: OrgSource<'s>, -) -> Res, OrgSource<'s>> { - recognize(parser_with_context!(detect_any_object_except_plain_text)( - context, - ))(input) -} - impl<'x> RematchObject<'x> for PlainText<'x> { #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn rematch_object<'b, 'g, 'r, 's>( @@ -75,6 +89,7 @@ mod tests { use crate::context::ContextElement; use crate::context::GlobalSettings; use crate::context::List; + use crate::parser::object_parser::detect_standard_set_object_sans_plain_text; use crate::types::Source; #[test] @@ -83,7 +98,9 @@ mod tests { let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); - let plain_text_matcher = parser_with_context!(plain_text)(&initial_context); + let plain_text_matcher = parser_with_context!(plain_text( + detect_standard_set_object_sans_plain_text + ))(&initial_context); let (remaining, result) = map(plain_text_matcher, Object::PlainText)(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(result.get_source(), Into::<&str>::into(input)); diff --git a/src/parser/regular_link.rs b/src/parser/regular_link.rs index 615e77f..e959a28 100644 --- a/src/parser/regular_link.rs +++ b/src/parser/regular_link.rs @@ -6,7 +6,7 @@ use nom::character::complete::one_of; use nom::combinator::verify; use nom::multi::many_till; -use super::object_parser::regular_link_description_object_set; +use super::object_parser::regular_link_description_set_object; use super::org_source::OrgSource; use super::util::exit_matcher_parser; use super::util::get_consumed; @@ -99,7 +99,7 @@ pub fn description<'b, 'g, 'r, 's>( let parser_context = context.with_additional_node(&parser_context); let (remaining, (children, _exit_contents)) = verify( many_till( - parser_with_context!(regular_link_description_object_set)(&parser_context), + parser_with_context!(regular_link_description_set_object)(&parser_context), parser_with_context!(exit_matcher_parser)(&parser_context), ), |(children, _exit_contents)| !children.is_empty(), From f82d2aada13591bcceed9d2d89add9e0741b1b7e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 18:03:50 -0400 Subject: [PATCH 25/45] Fix run_docker_compare with relative paths. --- scripts/run_docker_compare.bash | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/run_docker_compare.bash b/scripts/run_docker_compare.bash index 7ac76e6..bfd25e1 100755 --- a/scripts/run_docker_compare.bash +++ b/scripts/run_docker_compare.bash @@ -9,7 +9,6 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" : ${BACKTRACE:="NO"} # or YES to print a rust backtrace when panicking : ${NO_COLOR:=""} # Set to anything to disable color output -cd "$DIR/../" REALPATH=$(command -v uu-realpath || command -v realpath) MAKE=$(command -v gmake || command -v make) @@ -56,10 +55,10 @@ function launch_container { local full_path=$($REALPATH "$path") local containing_folder=$(dirname "$full_path") local file_name=$(basename "$full_path") - docker run "${additional_flags[@]}" --init --rm -i --mount type=tmpfs,destination=/tmp -v "${containing_folder}:/input:ro" -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test "${additional_args[@]}" -- "/input/$file_name" + docker run "${additional_flags[@]}" --init --rm -i --mount type=tmpfs,destination=/tmp -v "${containing_folder}:/input:ro" -v "$($REALPATH "$DIR/../"):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test "${additional_args[@]}" -- "/input/$file_name" done else - docker run "${additional_flags[@]}" --init --rm -i --mount type=tmpfs,destination=/tmp -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test "${additional_args[@]}" + docker run "${additional_flags[@]}" --init --rm -i --mount type=tmpfs,destination=/tmp -v "$($REALPATH "$DIR/../"):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test "${additional_args[@]}" fi } From 352c20d1d86f7132409ce5e7f82ced7ce9ea32ac Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 18:05:10 -0400 Subject: [PATCH 26/45] Fix run_docker_compare_bisect with relative paths. --- scripts/run_docker_compare_bisect.bash | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/run_docker_compare_bisect.bash b/scripts/run_docker_compare_bisect.bash index 0fa011a..fd93ba4 100755 --- a/scripts/run_docker_compare_bisect.bash +++ b/scripts/run_docker_compare_bisect.bash @@ -5,7 +5,6 @@ set -euo pipefail IFS=$'\n\t' DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd "$DIR/../" REALPATH=$(command -v uu-realpath || command -v realpath) ############## Setup ######################### From 0d7a15bfeb0a799c5a0cde7755dd374eb2971903 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 18:35:33 -0400 Subject: [PATCH 27/45] Handle spaces after statistics cookies. --- .../sections_and_headings/statistics_cookie_with_space.org | 1 + src/parser/statistics_cookie.rs | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 org_mode_samples/sections_and_headings/statistics_cookie_with_space.org diff --git a/org_mode_samples/sections_and_headings/statistics_cookie_with_space.org b/org_mode_samples/sections_and_headings/statistics_cookie_with_space.org new file mode 100644 index 0000000..6e000ef --- /dev/null +++ b/org_mode_samples/sections_and_headings/statistics_cookie_with_space.org @@ -0,0 +1 @@ +* [0/4] foo diff --git a/src/parser/statistics_cookie.rs b/src/parser/statistics_cookie.rs index 22ec3f5..21f52bf 100644 --- a/src/parser/statistics_cookie.rs +++ b/src/parser/statistics_cookie.rs @@ -4,6 +4,7 @@ use nom::combinator::recognize; use nom::sequence::tuple; use super::org_source::OrgSource; +use super::util::get_consumed; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; use crate::context::parser_with_context; use crate::context::RefContext; @@ -26,10 +27,11 @@ pub fn percent_statistics_cookie<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, StatisticsCookie<'s>> { - let (remaining, source) = + let (remaining, _) = recognize(tuple((tag("["), nom::character::complete::u64, tag("%]"))))(input)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; + let source = get_consumed(input, remaining); Ok(( remaining, StatisticsCookie { @@ -43,7 +45,7 @@ pub fn fraction_statistics_cookie<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, StatisticsCookie<'s>> { - let (remaining, source) = recognize(tuple(( + let (remaining, _) = recognize(tuple(( tag("["), nom::character::complete::u64, tag("/"), @@ -52,6 +54,7 @@ pub fn fraction_statistics_cookie<'b, 'g, 'r, 's>( )))(input)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; + let source = get_consumed(input, remaining); Ok(( remaining, StatisticsCookie { From 0110d233878264033ff6a229393f9373f59d85b3 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 18:41:57 -0400 Subject: [PATCH 28/45] Update empty list test to show that we're not handling trailing whitespace for empty list items properly. --- org_mode_samples/greater_element/plain_list/empty_list_item.org | 2 ++ 1 file changed, 2 insertions(+) diff --git a/org_mode_samples/greater_element/plain_list/empty_list_item.org b/org_mode_samples/greater_element/plain_list/empty_list_item.org index 0fe3a9a..397d6b8 100644 --- a/org_mode_samples/greater_element/plain_list/empty_list_item.org +++ b/org_mode_samples/greater_element/plain_list/empty_list_item.org @@ -1,3 +1,5 @@ 1. 2. 3. + +* headline From 494fe5ccebffd74c02832fa9052f3cfbee726bbc Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 19:01:46 -0400 Subject: [PATCH 29/45] Handle contentless list items mid-document. --- src/parser/plain_list.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index f74de75..8c02bff 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -154,8 +154,10 @@ pub fn plain_list_item<'b, 'g, 'r, 's>( let (remaining, maybe_tag) = opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)?; - let maybe_contentless_item: Res, OrgSource<'_>> = - peek(recognize(tuple((many0(blank_line), eof))))(remaining); + + let maybe_contentless_item: Res, ()> = peek(parser_with_context!( + detect_contentless_item_contents + )(context))(remaining); match maybe_contentless_item { Ok((_rem, _ws)) => { let (remaining, _trailing_ws) = opt(blank_line)(remaining)?; @@ -374,6 +376,18 @@ fn item_tag_post_gap<'b, 'g, 'r, 's>( )(input) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn detect_contentless_item_contents<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { + let (remaining, _) = recognize(many_till( + blank_line, + parser_with_context!(exit_matcher_parser)(context), + ))(input)?; + Ok((remaining, ())) +} + #[cfg(test)] mod tests { use super::*; From b04341882c89c6950714d74c5e3727e3e52438b7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 19:24:03 -0400 Subject: [PATCH 30/45] Add test showing that we are not handling trailing spaces in description list tags correctly. --- .../greater_element/plain_list/description_list_object_key.org | 1 + 1 file changed, 1 insertion(+) diff --git a/org_mode_samples/greater_element/plain_list/description_list_object_key.org b/org_mode_samples/greater_element/plain_list/description_list_object_key.org index fcf945a..737aede 100644 --- a/org_mode_samples/greater_element/plain_list/description_list_object_key.org +++ b/org_mode_samples/greater_element/plain_list/description_list_object_key.org @@ -1 +1,2 @@ - {{{foo(bar)}}} :: baz +- =foo= :: bar From ceb722e47616b0f731da3a4b8ca259b6ea4e05ad Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 19:29:31 -0400 Subject: [PATCH 31/45] Check exit matcher after each space consumed for object trailing whitespace. Since description list tags need to end with a space unconsumed for " ::", we need to check the exit matcher after each space consumed. --- src/parser/util.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/parser/util.rs b/src/parser/util.rs index 6625b86..c9314c1 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -8,6 +8,7 @@ use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; +use nom::combinator::verify; use nom::multi::many0; use nom::multi::many_till; use nom::sequence::tuple; @@ -91,11 +92,15 @@ pub fn maybe_consume_object_trailing_whitespace_if_not_exiting<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Option>> { - if exit_matcher_parser(context, input).is_err() { - opt(space0)(input) - } else { - Ok((input, None)) - } + // We have to check exit matcher after each character because description list tags need to end with a space unconsumed (" ::"). + let (remaining, _) = many_till( + verify(anychar, |c| *c == ' '), + alt(( + peek(recognize(verify(anychar, |c| *c != ' '))), + parser_with_context!(exit_matcher_parser)(context), + )), + )(input)?; + Ok((remaining, None)) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] From 344ef0445322949265b877f911812cd90305a64f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 19:53:58 -0400 Subject: [PATCH 32/45] Add tests showing we are not handling tabs appropriately for description list tags. --- .../greater_element/plain_list/description_list_tabs.org | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 org_mode_samples/greater_element/plain_list/description_list_tabs.org diff --git a/org_mode_samples/greater_element/plain_list/description_list_tabs.org b/org_mode_samples/greater_element/plain_list/description_list_tabs.org new file mode 100644 index 0000000..2d05493 --- /dev/null +++ b/org_mode_samples/greater_element/plain_list/description_list_tabs.org @@ -0,0 +1,3 @@ +- foo :: bar +- foo :: bar +- foo :: bar From a8fbf011243ee78c13679123242479c8385e5285 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 20:02:49 -0400 Subject: [PATCH 33/45] Handle tabs for plain list descriptions. This bug probably exists in hundreds of places across the code base. I am going to have to write a "fuzzer" that replaces random whitespace with tabs to find them all. --- src/parser/plain_list.rs | 9 +++++---- src/parser/util.rs | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 8c02bff..bc3a4fe 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -332,7 +332,7 @@ fn item_tag<'b, 'g, 'r, 's>( ), |(children, _exit_contents)| !children.is_empty(), )(input)?; - let (remaining, _) = tag(" ::")(remaining)?; + let (remaining, _) = tuple((one_of(" \t"), tag("::")))(remaining)?; Ok((remaining, children)) } @@ -341,9 +341,10 @@ fn item_tag_end<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { - recognize(alt(( - tag(" :: "), - recognize(tuple((tag(" ::"), alt((line_ending, eof))))), + recognize(tuple(( + one_of(" \t"), + tag("::"), + alt((recognize(one_of(" \t")), line_ending, eof)), )))(input) } diff --git a/src/parser/util.rs b/src/parser/util.rs index c9314c1..e521079 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -2,13 +2,13 @@ use nom::branch::alt; use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::none_of; +use nom::character::complete::one_of; use nom::character::complete::space0; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; -use nom::combinator::verify; use nom::multi::many0; use nom::multi::many_till; use nom::sequence::tuple; @@ -94,9 +94,9 @@ pub fn maybe_consume_object_trailing_whitespace_if_not_exiting<'b, 'g, 'r, 's>( ) -> Res, Option>> { // We have to check exit matcher after each character because description list tags need to end with a space unconsumed (" ::"). let (remaining, _) = many_till( - verify(anychar, |c| *c == ' '), + one_of(" \t"), alt(( - peek(recognize(verify(anychar, |c| *c != ' '))), + peek(recognize(none_of(" \t"))), parser_with_context!(exit_matcher_parser)(context), )), )(input)?; From 5d20d3e99b580fab8f6a75bd5d73c13fae7e175b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 20:28:21 -0400 Subject: [PATCH 34/45] Add a test showing we are not handling empty space for footnote definitions correctly. --- .../empty_space_before_and_after_content.org | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org diff --git a/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org b/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org new file mode 100644 index 0000000..1d2113e --- /dev/null +++ b/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org @@ -0,0 +1,9 @@ +* Footnotes + +[fn:1] + +#+BEGIN_EXAMPLE +baz +#+END_EXAMPLE + + From 6a1bdd5feed112b80d8c12340a642b1227d921b3 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 21:11:47 -0400 Subject: [PATCH 35/45] Support blank lines before content in footnote definitions. --- .../empty_space_before_and_after_content.org | 1 - src/parser/footnote_definition.rs | 13 +++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org b/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org index 1d2113e..6862213 100644 --- a/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org +++ b/org_mode_samples/greater_element/footnote_definition/empty_space_before_and_after_content.org @@ -6,4 +6,3 @@ baz #+END_EXAMPLE - diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index e413d9d..be8b0e3 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -4,8 +4,10 @@ use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while; use nom::character::complete::digit1; use nom::character::complete::space0; +use nom::combinator::opt; use nom::combinator::recognize; use nom::combinator::verify; +use nom::multi::many0; use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; @@ -41,8 +43,15 @@ pub fn footnote_definition<'b, 'g, 'r, 's>( } start_of_line(input)?; // Cannot be indented. - let (remaining, (_lead_in, lbl, _lead_out, _ws)) = - tuple((tag_no_case("[fn:"), label, tag("]"), space0))(input)?; + let (remaining, (_, lbl, _, _, _)) = tuple(( + tag_no_case("[fn:"), + label, + tag("]"), + space0, + opt(verify(many0(blank_line), |lines: &Vec>| { + lines.len() <= 2 + })), + ))(input)?; let contexts = [ ContextElement::ConsumeTrailingWhitespace(true), ContextElement::Context("footnote definition"), From 21c60d10369dcc15d7b9f967a357e5767e9f9cbb Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 21:30:03 -0400 Subject: [PATCH 36/45] Do not consume trailing whitespace on the footnote definition's final element. --- src/parser/footnote_definition.rs | 20 ++++++++++++++++---- src/parser/plain_list.rs | 13 +------------ src/parser/util.rs | 12 ++++++++++++ 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index be8b0e3..9d5d79a 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -13,6 +13,7 @@ use nom::multi::many_till; use nom::sequence::tuple; use super::org_source::OrgSource; +use super::util::include_input; use super::util::WORD_CONSTITUENT_CHARACTERS; use crate::context::parser_with_context; use crate::context::ContextElement; @@ -63,11 +64,22 @@ pub fn footnote_definition<'b, 'g, 'r, 's>( let parser_context = context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); let parser_context = parser_context.with_additional_node(&contexts[2]); - // TODO: The problem is we are not accounting for trailing whitespace like we do in section. Maybe it would be easier if we passed down whether or not to parse trailing whitespace into the element matcher similar to how tag takes in parameters. let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, (children, _exit_contents)) = - many_till(element_matcher, exit_matcher)(remaining)?; + let (mut remaining, (mut children, _exit_contents)) = + many_till(include_input(element_matcher), exit_matcher)(remaining)?; + + // Re-parse the last element of the footnote definition with consume trailing whitespace off because the trailing whitespace needs to belong to the footnote definition, not the contents. + if context.should_consume_trailing_whitespace() { + if let Some((final_item_input, _)) = children.pop() { + let final_item_context = ContextElement::ConsumeTrailingWhitespace(false); + let final_item_context = parser_context.with_additional_node(&final_item_context); + let (remain, reparsed_final_item) = + parser_with_context!(element(true))(&final_item_context)(final_item_input)?; + children.push((final_item_input, reparsed_final_item)); + remaining = remain; + } + } let source = get_consumed(input, remaining); Ok(( @@ -75,7 +87,7 @@ pub fn footnote_definition<'b, 'g, 'r, 's>( FootnoteDefinition { source: source.into(), label: lbl.into(), - children, + children: children.into_iter().map(|(_, item)| item).collect(), }, )) } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index bc3a4fe..4f4bc49 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -19,6 +19,7 @@ use nom::sequence::tuple; use super::element_parser::element; use super::object_parser::standard_set_object; use super::org_source::OrgSource; +use super::util::include_input; use super::util::non_whitespace_character; use crate::context::parser_with_context; use crate::context::ContextElement; @@ -225,18 +226,6 @@ pub fn plain_list_item<'b, 'g, 'r, 's>( )); } -fn include_input<'s, F, O>( - mut inner: F, -) -> impl FnMut(OrgSource<'s>) -> Res, (OrgSource<'s>, O)> -where - F: FnMut(OrgSource<'s>) -> Res, O>, -{ - move |input: OrgSource<'_>| { - let (remaining, output) = inner(input)?; - Ok((remaining, (input, output))) - } -} - #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn bullet<'s>(i: OrgSource<'s>) -> Res, OrgSource<'s>> { alt(( diff --git a/src/parser/util.rs b/src/parser/util.rs index e521079..53deb7c 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -211,3 +211,15 @@ pub fn text_until_eol<'r, 's>( .map(|(_remaining, line)| Into::<&str>::into(line))?; Ok(line.trim()) } + +pub fn include_input<'s, F, O>( + mut inner: F, +) -> impl FnMut(OrgSource<'s>) -> Res, (OrgSource<'s>, O)> +where + F: FnMut(OrgSource<'s>) -> Res, O>, +{ + move |input: OrgSource<'_>| { + let (remaining, output) = inner(input)?; + Ok((remaining, (input, output))) + } +} From d1fe2f6b09ab86d2e6d17c042ba52b383cd547ae Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 21:50:32 -0400 Subject: [PATCH 37/45] Update the rest of the scripts to work with relative paths. --- scripts/callgrind.bash | 8 ++++---- scripts/perf.bash | 8 +++----- scripts/run_docker_integration_test.bash | 3 +-- scripts/run_integration_test.bash | 3 +-- scripts/time_parse.bash | 6 ++---- 5 files changed, 11 insertions(+), 17 deletions(-) diff --git a/scripts/callgrind.bash b/scripts/callgrind.bash index 3bc909c..9f13c7b 100755 --- a/scripts/callgrind.bash +++ b/scripts/callgrind.bash @@ -4,10 +4,10 @@ set -euo pipefail IFS=$'\n\t' DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd "$DIR/../" -RUSTFLAGS="-C opt-level=0" cargo build --no-default-features -valgrind --tool=callgrind --callgrind-out-file=callgrind.out target/debug/parse "${@}" + +(cd "$DIR/../" && RUSTFLAGS="-C opt-level=0" cargo build --no-default-features) +valgrind --tool=callgrind --callgrind-out-file="$DIR/../callgrind.out" "$DIR/../target/debug/parse" "${@}" echo "You probably want to run:" -echo "callgrind_annotate --auto=yes callgrind.out" +echo "callgrind_annotate --auto=yes '$DIR/../callgrind.out'" diff --git a/scripts/perf.bash b/scripts/perf.bash index f40e58b..aa7ae32 100755 --- a/scripts/perf.bash +++ b/scripts/perf.bash @@ -6,8 +6,6 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" : ${PROFILE:="perf"} -cd "$DIR/../" - function main { local additional_flags=() if [ "$PROFILE" = "dev" ] || [ "$PROFILE" = "debug" ]; then @@ -15,12 +13,12 @@ function main { else additional_flags+=(--profile "$PROFILE") fi - cargo build --no-default-features "${additional_flags[@]}" - perf record --freq=2000 --call-graph dwarf --output=perf.data target/${PROFILE}/parse "${@}" + (cd "$DIR/../" && cargo build --no-default-features "${additional_flags[@]}") + perf record --freq=2000 --call-graph dwarf --output="$DIR/../perf.data" "$DIR/../target/${PROFILE}/parse" "${@}" # Convert to a format firefox will read # flags to consider --show-info - perf script -F +pid --input perf.data > perf.firefox + perf script -F +pid --input "$DIR/../perf.data" > "$DIR/../perf.firefox" echo "You probably want to go to https://profiler.firefox.com/" echo "Either that or run hotspot" diff --git a/scripts/run_docker_integration_test.bash b/scripts/run_docker_integration_test.bash index 5ba5a37..bcf2646 100755 --- a/scripts/run_docker_integration_test.bash +++ b/scripts/run_docker_integration_test.bash @@ -6,7 +6,6 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" : ${NO_COLOR:=""} # Set to anything to disable color output -cd "$DIR/../" REALPATH=$(command -v uu-realpath || command -v realpath) MAKE=$(command -v gmake || command -v make) @@ -56,7 +55,7 @@ cargo test --no-default-features --features compare --no-fail-fast --lib --test EOF ) - docker run "${additional_flags[@]}" --init --rm --read-only --mount type=tmpfs,destination=/tmp -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test sh -c "$init_script" + docker run "${additional_flags[@]}" --init --rm --read-only --mount type=tmpfs,destination=/tmp -v "$($REALPATH "$DIR/../"):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test sh -c "$init_script" } diff --git a/scripts/run_integration_test.bash b/scripts/run_integration_test.bash index 32da81d..095bee9 100755 --- a/scripts/run_integration_test.bash +++ b/scripts/run_integration_test.bash @@ -4,7 +4,6 @@ set -euo pipefail IFS=$'\n\t' DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd "$DIR/../" REALPATH=$(command -v uu-realpath || command -v realpath) function main { @@ -12,7 +11,7 @@ function main { local test while read test; do - cargo test --no-default-features --features compare --no-fail-fast --test test_loader "$test" -- --show-output + (cd "$DIR/../" && cargo test --no-default-features --features compare --no-fail-fast --test test_loader "$test" -- --show-output) done<<<"$test_names" } diff --git a/scripts/time_parse.bash b/scripts/time_parse.bash index 817bada..6409f3f 100755 --- a/scripts/time_parse.bash +++ b/scripts/time_parse.bash @@ -7,8 +7,6 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" : ${PROFILE:="release-lto"} -cd "$DIR/../" - function main { local additional_flags=() if [ "$PROFILE" = "dev" ] || [ "$PROFILE" = "debug" ]; then @@ -16,8 +14,8 @@ function main { else additional_flags+=(--profile "$PROFILE") fi - cargo build --no-default-features "${additional_flags[@]}" - time ./target/${PROFILE}/parse "${@}" + (cd "$DIR/../" && cargo build --no-default-features "${additional_flags[@]}") + time "$DIR/../target/${PROFILE}/parse" "${@}" } main "${@}" From f30069efe765b80e20760c97fe4428500bbcad3a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 21:59:02 -0400 Subject: [PATCH 38/45] Add a test showing we're not handling colons in keyword keys correctly. --- .../lesser_element/keyword/keyword_with_colon_in_key.org | 1 + 1 file changed, 1 insertion(+) create mode 100644 org_mode_samples/lesser_element/keyword/keyword_with_colon_in_key.org diff --git a/org_mode_samples/lesser_element/keyword/keyword_with_colon_in_key.org b/org_mode_samples/lesser_element/keyword/keyword_with_colon_in_key.org new file mode 100644 index 0000000..997a5d7 --- /dev/null +++ b/org_mode_samples/lesser_element/keyword/keyword_with_colon_in_key.org @@ -0,0 +1 @@ +#+title:foo:bar: baz: lorem: ipsum From 7545fb7e1af0cd3a39ed7cd24c863266da9e2543 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 22:17:10 -0400 Subject: [PATCH 39/45] Support keywords with colons in the key and without a space between the colon and value. --- src/parser/keyword.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index 200670a..321118e 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -5,8 +5,8 @@ use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while1; use nom::character::complete::anychar; use nom::character::complete::line_ending; +use nom::character::complete::one_of; use nom::character::complete::space0; -use nom::character::complete::space1; use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::not; @@ -66,7 +66,7 @@ fn _filtered_keyword<'s, F: Matcher>( } Err(_) => {} }; - let (remaining, _ws) = space1(remaining)?; + let (remaining, _ws) = space0(remaining)?; let (remaining, parsed_value) = recognize(many_till( anychar, peek(tuple((space0, alt((line_ending, eof))))), @@ -113,11 +113,15 @@ fn babel_call_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn regular_keyword_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { - recognize(tuple(( - not(peek(tag_no_case("call"))), - not(peek(tag_no_case("begin"))), - is_not(" \t\r\n:"), - )))(input) + not(peek(alt((tag_no_case("call"), tag_no_case("begin")))))(input)?; + recognize(many_till( + anychar, + peek(alt(( + recognize(one_of(" \t\r\n")), // Give up if we hit whitespace + recognize(tuple((tag(":"), one_of(" \t\r\n")))), // Stop if we see a colon followed by whitespace + recognize(tuple((tag(":"), is_not(" \t\r\n:"), not(tag(":"))))), // Stop if we see a colon that is the last colon before whitespace. This is for keywords like "#+foo:bar:baz: lorem: ipsum" which would have the key "foo:bar:baz". + ))), + ))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] From d79035e14d4713d8a2b792f124e6b9aaf0f38035 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 22:21:19 -0400 Subject: [PATCH 40/45] Add a test showing we are not handling empty statistics cookies. --- org_mode_samples/object/statistics_cookie/empty.org | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 org_mode_samples/object/statistics_cookie/empty.org diff --git a/org_mode_samples/object/statistics_cookie/empty.org b/org_mode_samples/object/statistics_cookie/empty.org new file mode 100644 index 0000000..f0168ae --- /dev/null +++ b/org_mode_samples/object/statistics_cookie/empty.org @@ -0,0 +1,4 @@ +[/] +[/2] +[3/] +[%] From 0105b49d0d425658b802120b9d90a18c991730c8 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 22:21:49 -0400 Subject: [PATCH 41/45] Handle empty statistics cookies. --- src/parser/statistics_cookie.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/parser/statistics_cookie.rs b/src/parser/statistics_cookie.rs index 21f52bf..d83d0ea 100644 --- a/src/parser/statistics_cookie.rs +++ b/src/parser/statistics_cookie.rs @@ -1,5 +1,6 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::combinator::opt; use nom::combinator::recognize; use nom::sequence::tuple; @@ -27,8 +28,11 @@ pub fn percent_statistics_cookie<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, StatisticsCookie<'s>> { - let (remaining, _) = - recognize(tuple((tag("["), nom::character::complete::u64, tag("%]"))))(input)?; + let (remaining, _) = recognize(tuple(( + tag("["), + opt(nom::character::complete::u64), + tag("%]"), + )))(input)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); @@ -47,9 +51,9 @@ pub fn fraction_statistics_cookie<'b, 'g, 'r, 's>( ) -> Res, StatisticsCookie<'s>> { let (remaining, _) = recognize(tuple(( tag("["), - nom::character::complete::u64, + opt(nom::character::complete::u64), tag("/"), - nom::character::complete::u64, + opt(nom::character::complete::u64), tag("]"), )))(input)?; let (remaining, _trailing_whitespace) = From cc56b79683435b7db8f6bb7b8e47bc60c429f665 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 22:42:24 -0400 Subject: [PATCH 42/45] Add a test showing we're not handling table formulas. --- org_mode_samples/greater_element/table/with_formulas.org | 8 ++++++++ src/compare/diff.rs | 8 ++++++++ 2 files changed, 16 insertions(+) create mode 100644 org_mode_samples/greater_element/table/with_formulas.org diff --git a/org_mode_samples/greater_element/table/with_formulas.org b/org_mode_samples/greater_element/table/with_formulas.org new file mode 100644 index 0000000..a48f48a --- /dev/null +++ b/org_mode_samples/greater_element/table/with_formulas.org @@ -0,0 +1,8 @@ +| Name | Price | Quantity | Total | +|------+-------+----------+-------| +| foo | 7 | 4 | 28 | +| bar | 3.5 | 3 | 10.5 | +|------+-------+----------+-------| +| | | 7 | 38.5 | +#+tblfm: $4=$2*$3::@>$4=vsum(@2..@-1) +#+tblfm: @>$3=vsum(@2..@-1) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index bc812d1..7e59baf 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1075,6 +1075,10 @@ fn compare_table<'s>( Ok(_) => {} }; + // TODO: Compare :type :tblfm :value + // + // :tblfm is a list () filled with quoted strings containing the value for any tblfm keywords at the end of the table. + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { child_status.push(compare_table_row(source, emacs_child, rust_child)?); } @@ -1112,6 +1116,10 @@ fn compare_table_row<'s>( Ok(_) => {} }; + // TODO: Compare :type + // + // :type is an unquoted atom of either standard or rule + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { child_status.push(compare_table_cell(source, emacs_child, rust_child)?); } From 84d2babda9ab6ed8b08746a0df39de7d70716c6d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 22:47:07 -0400 Subject: [PATCH 43/45] Parse table formulas. --- src/parser/keyword.rs | 13 +++++++++++++ src/parser/table.rs | 5 +++++ 2 files changed, 18 insertions(+) diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index 321118e..c5cc476 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -111,6 +111,19 @@ fn babel_call_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> tag_no_case("call")(input) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub fn table_formula_keyword<'b, 'g, 'r, 's>( + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, Keyword<'s>> { + filtered_keyword(table_formula_key)(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn table_formula_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + tag_no_case("tblfm")(input) +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn regular_keyword_key<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { not(peek(alt((tag_no_case("call"), tag_no_case("begin")))))(input)?; diff --git a/src/parser/table.rs b/src/parser/table.rs index 2dbe014..69c829e 100644 --- a/src/parser/table.rs +++ b/src/parser/table.rs @@ -8,10 +8,12 @@ use nom::combinator::not; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; +use nom::multi::many0; use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; +use super::keyword::table_formula_keyword; use super::object_parser::table_cell_set_object; use super::org_source::OrgSource; use super::util::exit_matcher_parser; @@ -56,6 +58,9 @@ pub fn org_mode_table<'b, 'g, 'r, 's>( let (remaining, (children, _exit_contents)) = many_till(org_mode_table_row_matcher, exit_matcher)(input)?; + let (remaining, _formulas) = + many0(parser_with_context!(table_formula_keyword)(context))(remaining)?; + // TODO: Consume trailing formulas let source = get_consumed(input, remaining); From 80f7098f9b42f8ea6c2d1f5bf649d0cc5eee673f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 23:05:04 -0400 Subject: [PATCH 44/45] Compare table formulas. --- src/compare/diff.rs | 41 +++++++++++++++++++++++++++++++++--- src/parser/table.rs | 3 ++- src/types/greater_element.rs | 2 ++ 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 7e59baf..c7289d1 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeSet; use std::collections::HashSet; use super::util::assert_bounds; @@ -491,7 +492,7 @@ fn compare_heading<'s>( if rust.stars.to_string() != level { this_status = DiffStatus::Bad; message = Some(format!( - "Headline level do not much (emacs != rust): {} != {}", + "Headline level do not match (emacs != rust): {} != {}", level, rust.stars )) } @@ -1075,9 +1076,43 @@ fn compare_table<'s>( Ok(_) => {} }; - // TODO: Compare :type :tblfm :value + // Compare formulas // - // :tblfm is a list () filled with quoted strings containing the value for any tblfm keywords at the end of the table. + // :tblfm is either nil or a list () filled with quoted strings containing the value for any tblfm keywords at the end of the table. + let emacs_formulas = get_property(emacs, ":tblfm")?; + if let Some(emacs_formulas) = emacs_formulas { + let emacs_formulas = emacs_formulas.as_list()?; + if emacs_formulas.len() != rust.formulas.len() { + this_status = DiffStatus::Bad; + message = Some(format!( + "Formulas do not match (emacs != rust): {:?} != {:?}", + emacs_formulas, rust.formulas + )) + } else { + let atoms = emacs_formulas + .into_iter() + .map(Token::as_atom) + .collect::, _>>()?; + let unquoted = atoms + .into_iter() + .map(unquote) + .collect::, _>>()?; + for kw in &rust.formulas { + if !unquoted.contains(kw.value) { + this_status = DiffStatus::Bad; + message = Some(format!("Could not find formula in emacs: {}", kw.value)) + } + } + } + } else { + if !rust.formulas.is_empty() { + this_status = DiffStatus::Bad; + message = Some(format!( + "Formulas do not match (emacs != rust): {:?} != {:?}", + emacs_formulas, rust.formulas + )) + } + } for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { child_status.push(compare_table_row(source, emacs_child, rust_child)?); diff --git a/src/parser/table.rs b/src/parser/table.rs index 69c829e..e5fd7ea 100644 --- a/src/parser/table.rs +++ b/src/parser/table.rs @@ -58,7 +58,7 @@ pub fn org_mode_table<'b, 'g, 'r, 's>( let (remaining, (children, _exit_contents)) = many_till(org_mode_table_row_matcher, exit_matcher)(input)?; - let (remaining, _formulas) = + let (remaining, formulas) = many0(parser_with_context!(table_formula_keyword)(context))(remaining)?; // TODO: Consume trailing formulas @@ -68,6 +68,7 @@ pub fn org_mode_table<'b, 'g, 'r, 's>( remaining, Table { source: source.into(), + formulas, children, }, )) diff --git a/src/types/greater_element.rs b/src/types/greater_element.rs index 9a12900..e897945 100644 --- a/src/types/greater_element.rs +++ b/src/types/greater_element.rs @@ -1,5 +1,6 @@ use super::element::Element; use super::lesser_element::TableCell; +use super::Keyword; use super::Object; use super::Source; @@ -63,6 +64,7 @@ pub struct NodeProperty<'s> { #[derive(Debug)] pub struct Table<'s> { pub source: &'s str, + pub formulas: Vec>, pub children: Vec>, } From 5587e19f1671b1849752572c2ea9cb726278f2ae Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 8 Sep 2023 23:12:15 -0400 Subject: [PATCH 45/45] Cleanup. --- Cargo.toml | 2 +- src/parser/section.rs | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e8981ec..b233792 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ tracing-subscriber = { version = "0.3.17", optional = true, features = ["env-fil walkdir = "2.3.3" [features] -default = ["compare"] +default = [] compare = [] tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"] diff --git a/src/parser/section.rs b/src/parser/section.rs index 33b1685..bcc4f36 100644 --- a/src/parser/section.rs +++ b/src/parser/section.rs @@ -29,7 +29,6 @@ pub fn zeroth_section<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Section<'s>> { - // TODO: The zeroth section is specialized so it probably needs its own parser let contexts = [ ContextElement::ConsumeTrailingWhitespace(true), ContextElement::Context("section"), @@ -88,7 +87,6 @@ pub fn section<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, mut input: OrgSource<'s>, ) -> Res, Section<'s>> { - // TODO: The zeroth section is specialized so it probably needs its own parser let contexts = [ ContextElement::ConsumeTrailingWhitespace(true), ContextElement::Context("section"),