50 Commits

Author SHA1 Message Date
Tom Alexander
d8c3285e3c Add --init flag to docker run.
All checks were successful
rustfmt Build rustfmt has succeeded
rust-build Build rust-build has succeeded
rust-test Build rust-test has succeeded
I noticed in a separate project that ctrl+c was not being honored under --init was passed, so I'm adding it in here.
2023-08-19 02:51:00 -04:00
Tom Alexander
5db6cd617e Improve test cases for plain list ownership. 2023-08-19 02:30:31 -04:00
Tom Alexander
4cd3697fb0 Update org-mode version in dockerfile. 2023-08-18 23:20:29 -04:00
Tom Alexander
2cd6f736c2 Fix building without compare feature.
All checks were successful
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
2023-08-17 00:13:25 -04:00
fluxcdbot
5686256039 CI: autofix rust code.
Some checks failed
rust-build Build rust-build has failed
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
2023-08-17 04:05:48 +00:00
Tom Alexander
7cf1b2d2b8 Disable the failing plain list whitespace ownership test.
Some checks failed
rust-build Build rust-build has failed
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
2023-08-17 00:03:05 -04:00
Tom Alexander
b848d7be73 Merge branch 'no_files' 2023-08-16 23:57:58 -04:00
Tom Alexander
74f4aa8d33 Remove dependency on files for running compare.
The tests still use files since they get the test name from a file but compare does the same action via stdin so it can operator on any org source.
2023-08-16 23:56:05 -04:00
Tom Alexander
4776898894 Merge branch 'fix_plain_list'
Some checks failed
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has failed
rust-build Build rust-build has succeeded
2023-08-16 21:06:54 -04:00
Tom Alexander
8e95ce6368 Add notes about plain list trailing blank line ownership investigation. 2023-08-16 20:24:00 -04:00
Tom Alexander
6c9c304f37 Re-enable disabled test. 2023-08-16 17:39:10 -04:00
Tom Alexander
7fafbfb6bb Do not consume whitespace in the final plain list item. 2023-08-16 17:37:19 -04:00
Tom Alexander
56281633f3 Support blank link in plain_list_item_end, move exit matcher to end of loop in plain_list, and maybe consume trailing whitespace in plain_list_item. 2023-08-16 17:09:06 -04:00
Tom Alexander
823c33ef8e Reduce use of expect in main.rs 2023-08-16 16:37:14 -04:00
Tom Alexander
e5e5120a10 Move telemetry handling to the tracing-specific main function.
All checks were successful
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
This is so main_body can exit with an error at any time without missing the shutdown_telemetry function. This does not catch panics.
2023-08-16 16:05:24 -04:00
Tom Alexander
7df393f31d Make a new naive implementation of plain_list_item.
Still need to update plain_list_item_end and handle the whitespace ownership issues, but starting from a simplified state will help.
2023-08-16 16:05:24 -04:00
Tom Alexander
72d5f8f35c Make a new naive implementation of plain_list. 2023-08-16 16:05:24 -04:00
Tom Alexander
dae46adc12 Feature-gate tracing import.
All checks were successful
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
2023-08-14 23:33:38 -04:00
Tom Alexander
d0dc737c79 Merge branch 'plain_list_whitespace_ownership_issue'
Some checks failed
rust-build Build rust-build has failed
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
2023-08-14 23:20:37 -04:00
Tom Alexander
1c9877015d Disable the test showing my plain list implementation is broken. 2023-08-14 23:20:28 -04:00
Tom Alexander
2938d5809a Use the rust cache for make dockertest. 2023-08-14 23:17:38 -04:00
Tom Alexander
f7ec89858d Add notes about optimization ideas. 2023-08-14 23:16:23 -04:00
Tom Alexander
67b4dfdce6 Merge branch 'tracing_fixes'
Some checks failed
rust-build Build rust-build has failed
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
2023-08-14 22:12:05 -04:00
Tom Alexander
63d092c83d Group the two traces per compare into one trace. 2023-08-14 22:10:58 -04:00
Tom Alexander
a7b298eeec Fix lesser block exit priority.
The paragraph end was matching text inside lesser blocks.
2023-08-14 17:32:10 -04:00
Tom Alexander
1bbfbc3164 Add additional tracing to lesser block. 2023-08-14 17:32:09 -04:00
Tom Alexander
2bcc3f0599 Fix reporting of jaeger traces when diff does not match.
The early exit was causing some traces to not be reported.
2023-08-14 17:32:09 -04:00
Tom Alexander
b93a12c32c Add support for escaped double quotes in sexp. 2023-08-14 16:55:04 -04:00
Tom Alexander
df3045e424 Merge branch 'script_improvement'
All checks were successful
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
2023-08-14 16:13:30 -04:00
Tom Alexander
72b8fec1be Add support for tracing in run_docker_compare.bash. 2023-08-14 16:12:31 -04:00
Tom Alexander
ab17904b1c Clean up run_integration_test.bash. 2023-08-14 15:53:17 -04:00
Tom Alexander
306878c95d Clean up run_docker_integration_test.bash 2023-08-14 15:50:05 -04:00
Tom Alexander
5768c8acda Add a script to run compare using the docker image. 2023-08-14 15:30:13 -04:00
Tom Alexander
e28290ed79 Merge branch 'source_based_tests'
All checks were successful
rustfmt Build rustfmt has succeeded
rust-build Build rust-build has succeeded
rust-test Build rust-test has succeeded
2023-08-14 14:14:26 -04:00
Tom Alexander
fbabf60559 Add ignore to test export_snippet_paragraph_break_precedence. 2023-08-14 14:01:00 -04:00
Tom Alexander
92abac37e2 s/precedent/precedence/
I used the wrong word. This is referring to the priority between paragraphs ending vs export snippets ending, not a reference to something occurring in the past.
2023-08-14 13:57:01 -04:00
Tom Alexander
899073e54f Update to the latest org-mode. 2023-08-14 13:33:05 -04:00
Tom Alexander
eb379af78d Switch export snippet to use exit matchers. 2023-08-14 13:13:32 -04:00
Tom Alexander
422804d846 Add script for running specific tests inside docker.
Some checks failed
rust-build Build rust-build has failed
rust-test Build rust-test has failed
2023-08-14 12:21:15 -04:00
Tom Alexander
cc83431d62 Consume trailing whitespace for property drawers.
Some checks failed
rust-build Build rust-build has failed
rust-test Build rust-test has failed
This is a change between the org-mode in emacs 29.1 and the org-mode currently in main.
2023-08-14 11:57:12 -04:00
Tom Alexander
00354ccc20 Add a volume for cargo cache.
This is to be a good citizen by not downloading all the rust dependencies every time I run the tests locally. Unfortunately, it will still compile all the dependencies each time, but that is a local operation.
2023-08-14 10:57:48 -04:00
Tom Alexander
b75eed6b1e Enable tests that were disabled before.
Some checks are pending
rust-test Build rust-test has started
rust-build Build rust-build has succeeded
2023-08-13 02:21:02 -04:00
Tom Alexander
e33ec4a02c Add support for reading begin/end bounds in the new standard-properties format. 2023-08-13 02:21:02 -04:00
Tom Alexander
f7afcec824 Add support for hash notation in the elisp parser. 2023-08-13 02:21:02 -04:00
Tom Alexander
cf0991fdff Add support for parsing vectors in the elisp parser. 2023-08-13 02:21:02 -04:00
Tom Alexander
d1e0ee831c Switch to installing emacs and org-mode from source in test container.
This is to integrate fixes that have been committed to org-mode but have not made it into emacs, while also getting the latest emacs on alpine.
2023-08-13 02:21:01 -04:00
Tom Alexander
34985c9045 Add makefile target for running the tests inside the docker container.
Some checks failed
rust-build Build rust-build has failed
rust-test Build rust-test has failed
rustfmt Build rustfmt has succeeded
2023-08-13 02:20:16 -04:00
Tom Alexander
7da09fea74 Switch to specifying timeouts instead of timeout in tekton pipelinerun. 2023-08-13 02:20:16 -04:00
Tom Alexander
fc28e3b514 Add a test for trailing blank lines after paragraphs.
Some checks failed
rust-test Build rust-test has failed
rustfmt Build rustfmt has succeeded
rust-build Build rust-build has succeeded
The behavior in emacs does not match the description in the org-mode documentation. I have sent an email to the org-mode mailing list and I am waiting their response so I can adjust (or not adjust) my parser accordingly.
2023-08-11 01:37:04 -04:00
Tom Alexander
df5ee5af16 Explicitly list which files to include in the cargo package.
Some checks failed
rust-build Build rust-build has failed
rust-test Build rust-test has failed
rustfmt Build rustfmt has succeeded
We are including a bunch of files that are not needed for running the rust code. This excludes them to be a better citizen to both crates.io and all users of this package.
2023-08-11 00:11:54 -04:00
23 changed files with 641 additions and 239 deletions

View File

@@ -4,6 +4,10 @@ metadata:
name: rust-test name: rust-test
spec: spec:
pipelineSpec: pipelineSpec:
timeouts:
pipeline: "2h0m0s"
tasks: "1h0m40s"
finally: "0h30m0s"
params: params:
- name: image-name - name: image-name
description: The name for the built image description: The name for the built image
@@ -201,7 +205,6 @@ spec:
secret: secret:
secretName: harbor-plain secretName: harbor-plain
serviceAccountName: build-bot serviceAccountName: build-bot
timeout: 240h0m0s
params: params:
- name: image-name - name: image-name
value: "harbor.fizz.buzz/private/organic-test" value: "harbor.fizz.buzz/private/organic-test"

View File

@@ -10,6 +10,12 @@ readme = "README.md"
keywords = ["emacs", "org-mode"] keywords = ["emacs", "org-mode"]
categories = ["parsing"] categories = ["parsing"]
resolver = "2" resolver = "2"
include = [
"LICENSE",
"**/*.rs",
"Cargo.toml",
"tests/*"
]
[lib] [lib]
name = "organic" name = "organic"

View File

@@ -35,7 +35,17 @@ clean:
.PHONY: test .PHONY: test
test: test:
> cargo test --lib --test test_loader -- --test-threads $(TESTJOBS) > cargo test --no-fail-fast --lib --test test_loader -- --test-threads $(TESTJOBS)
.PHONY: dockertest
dockertest:
> $(MAKE) -C docker/organic_test
> docker run --init --rm -i -t -v "$$(readlink -f ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source organic-test cargo test --no-fail-fast --lib --test test_loader -- --test-threads $(TESTJOBS)
.PHONY: dockerclean
dockerclean:
# Delete volumes created for running the tests in docker. This does not touch anything related to the jaeger docker container.
> docker volume rm cargo-cache rust-cache
.PHONY: integrationtest .PHONY: integrationtest
integrationtest: integrationtest:
@@ -49,8 +59,8 @@ unittest:
jaeger: jaeger:
# 4317 for OTLP gRPC, 4318 for OTLP HTTP. We currently use gRPC but I forward both ports regardless. # 4317 for OTLP gRPC, 4318 for OTLP HTTP. We currently use gRPC but I forward both ports regardless.
# #
# These flags didn't help even though they seem like they would: --collector.otlp.grpc.max-message-size=10000000 --collector.queue-size=20000 --collector.num-workers=100 # These flags didn't help even though they seem like they would: --collector.queue-size=20000 --collector.num-workers=100
> docker run -d --rm --name organicdocker -p 4317:4317 -p 4318:4318 -p 16686:16686 -e COLLECTOR_OTLP_ENABLED=true jaegertracing/all-in-one:1.47 --collector.grpc-server.max-message-size=10000000 > docker run -d --rm --name organicdocker -p 4317:4317 -p 4318:4318 -p 16686:16686 -e COLLECTOR_OTLP_ENABLED=true jaegertracing/all-in-one:1.47 --collector.grpc-server.max-message-size=20000000 --collector.otlp.grpc.max-message-size=20000000
.PHONY: jaegerweb .PHONY: jaegerweb
jaegerweb: jaegerweb:

View File

@@ -74,7 +74,8 @@ fn is_expect_fail(name: &str) -> Option<&str> {
"drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."), "drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
"element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."), "element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
"paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
"export_snippet_paragraph_break_precedent" => Some("Emacs 28 has broken behavior so the tests in the CI fail."), "export_snippet_paragraph_break_precedence" => Some("The latest code for org-mode is matching the export snippet without the closing @@."), // https://list.orgmode.org/orgmode/fb61ea28-f004-4c25-adf7-69fc55683ed4@app.fastmail.com/T/#u
"plain_lists_trailing_whitespace_ownership" => Some("Seeing odd behavior about whitespace ownership."),
_ => None, _ => None,
} }
} }

View File

@@ -1,4 +1,30 @@
FROM rustlang/rust:nightly-alpine3.17 FROM alpine:3.17 AS build
RUN apk add --no-cache musl-dev emacs RUN apk add --no-cache build-base musl-dev git autoconf make texinfo gnutls-dev ncurses-dev gawk
FROM build AS build-emacs
RUN git clone --depth 1 --branch emacs-29.1 https://git.savannah.gnu.org/git/emacs.git /root/emacs
WORKDIR /root/emacs
RUN mkdir /root/dist
RUN ./autogen.sh
RUN ./configure --prefix /usr --without-x --without-sound
RUN make
RUN make DESTDIR="/root/dist" install
FROM build AS build-org-mode
COPY --from=build-emacs /root/dist/ /
RUN mkdir /root/dist
RUN mkdir /root/org-mode && git -C /root/org-mode init --initial-branch=main && git -C /root/org-mode remote add origin https://git.savannah.gnu.org/git/emacs/org-mode.git && git -C /root/org-mode fetch origin 3cbd9f423385bf725dc964a5cff573bba17db3ff && git -C /root/org-mode checkout FETCH_HEAD
WORKDIR /root/org-mode
RUN make compile
RUN make DESTDIR="/root/dist" install
FROM rustlang/rust:nightly-alpine3.17
RUN apk add --no-cache musl-dev ncurses gnutls
RUN cargo install --locked --no-default-features --features ci-autoclean cargo-cache RUN cargo install --locked --no-default-features --features ci-autoclean cargo-cache
COPY --from=build-emacs /root/dist/ /
COPY --from=build-org-mode /root/dist/ /

View File

@@ -0,0 +1,27 @@
* Analysis
** Parse start per character
It might help analysis to record how often we start a specific type of parse for each character. For example, at the start of a plain list, if we had a count of how often each character was the start of a parse of a list we could use that to see how often that list is getting re-parsed.
* Optimizations
** Edit whitespace for list items
Whether or not a list item owns the trailing whitespace depends on if it is the last list item in that list. Since we do not know ahead of time if an item is the last item in the list, we have to either re-parse the list item or modify it after parsing.
*** For
We already are modifying the source of some elements after-the-fact with src_rust{set_source()} so this would be more of the same.
*** Against
I'd like to phase out such modifications because they seem hacky and fragile.
** Make detect element function
Some exit matchers are based on when the next element is found. Some elements do not need to be fully parsed to identify that they are a valid element. For example, src_org{1. foo} can already be identified as the start of a plain list (in the right context) without needing to parse the entire element.
*** For
Avoiding parsing the entire element for an exit matcher would reduce redundant parses.
*** Against
This adds code complexity and introduces the potential for bugs.
How many elements can be reasonably early-detected? For example, src_org{#+begin_src foo} is not enough to detect the start of a source block because without the src_org{#+end_src} it is just plain text.
** Grab multiple characters in plaintext parser before checking exit matcher
Currently we check the exit matcher after each character inside the plain text parser (and many others). Are there character sequences we can assume no exit matcher will trigger between? For example, a contiguous string of latin-alphabet letters?
*** For
This could significantly reduce our calls to exit matchers.
*** Against
I think targets would break this.
The exit matchers are already implicitly building this behavior since they should all exit very early when the starting character is wrong. Putting this logic in a centralized place, far away from where those characters are actually going to be used, is unfortunate for readability.

View File

@@ -0,0 +1,130 @@
* Test 1
** Source
#+begin_src org
1. foo
1. bar
2. baz
2. lorem
ipsum
#+end_src
** Ownership
This table is just showing ownership for the plain list items, not the containing plain list nor the elements inside each item.
| Plain List *Item* | Owns trailing blank lines |
|------------------------+---------------------------|
| foo (includes bar baz) | Yes |
| bar | Yes |
| baz | Yes |
| lorem | No |
** Analysis
In this test case, we see that the only list item that doesn't own its trailing blank lines is "lorem", the final list item of the outer-most list.
* Test 2
We add "cat" as a paragraph at the end of foo which makes "baz" lose its trailing blank lines.
** Source
#+begin_src org
1. foo
1. bar
2. baz
cat
2. lorem
ipsum
#+end_src
** Ownership
| Plain List *Item* | Owns trailing blank lines |
|-------------------------------+---------------------------|
| foo -> cat (includes bar baz) | Yes |
| bar | Yes |
| baz | No |
| lorem | No |
** Analysis
In isolation, this implies that the final plain list item does not own its trailing blank lines, which conflicts with "baz" from test 1.
New theory: List items own their trailing blank lines unless they are both the final list item and not the final element of a list item.
| Plain List *Item* | Owns trailing blank lines | Why |
|-------------------------------+---------------------------+-----------------------------------------------------------|
| foo -> cat (includes bar baz) | Yes | Not the final list item |
| bar | Yes | Not the final list item |
| baz | No | Final item of bar->baz and not the final element of "foo" |
| lorem | No | Final item of foo->lorem and not contained in a list item |
* Test 3
So if that theory is true, taking the entire (foo -> lorem) list from test 1 and nesting it inside a list should coerce "lorem" to own its trailing blank lines since it would then be a final list item (of foo -> lorem) and the final element of the new list.
** Source
#+begin_src org
1. cat
1. foo
1. bar
2. baz
2. lorem
ipsum
#+end_src
** Ownership
| Plain List *Item* | Owns trailing blank lines |
|-----------------------------+---------------------------|
| cat (includes foo -> lorem) | No |
| foo (includes bar baz) | Yes |
| bar | Yes |
| baz | Yes |
| lorem | No |
** Analysis
Against expectations, we did not coerce lorem to consume its trailing blank lines. What is different between "baz" and "lorem"? Well, "baz" is contained within "foo" which has a "lorem" after it, whereas "lorem" is contained within "cat" which does not have any list items after it.
New theory: List items own their trailing blank lines unless they are both the final list item and not the final element of a non-final list item.
| Plain List *Item* | Owns trailing blank lines | Why |
|-----------------------------+---------------------------+------------------------------------------------------|
| cat (includes foo -> lorem) | No | Final list item and not contained in a list item |
| foo (includes bar baz) | Yes | Not the final list item |
| bar | Yes | Not the final list item |
| baz | Yes | Final element of non-final list item |
| lorem | No | Final list item and final element of final list item |
* Test 4
So if that theory is true, then we should be able to coerce lorem to consume its trailing blank lines by adding a second item to the cat list.
** Source
#+begin_src org
1. cat
1. foo
1. bar
2. baz
2. lorem
2. dog
ipsum
#+end_src
** Ownership
| Plain List *Item* | Owns trailing blank lines |
|-----------------------------+---------------------------|
| cat (includes foo -> lorem) | Yes |
| foo (includes bar baz) | Yes |
| bar | Yes |
| baz | Yes |
| lorem | Yes |
| dog | No |
** Analysis
For the first time our expectations were met!
Enduring theory: List items own their trailing blank lines unless they are both the final list item and not the final element of a non-final list item.
| Plain List *Item* | Owns trailing blank lines | Why |
|-----------------------------+---------------------------+--------------------------------------------------|
| cat (includes foo -> lorem) | Yes | Not the final list item |
| foo (includes bar baz) | Yes | Not the final list item |
| bar | Yes | Not the final list item |
| baz | Yes | Final element of non-final list item |
| lorem | Yes | Final element of non-final list item |
| dog | No | Final list item and not contained in a list item |

View File

@@ -0,0 +1,11 @@
1. foo
1. bar
2. baz
2. lorem
ipsum

View File

@@ -0,0 +1,18 @@
foo bar.
* Lorem
baz
* Ipsum
alpha
beta

46
scripts/run_docker_compare.bash Executable file
View File

@@ -0,0 +1,46 @@
#!/usr/bin/env bash
#
set -euo pipefail
IFS=$'\n\t'
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
: ${SHELL:="NO"} # or YES to launch a shell instead of running the test
: ${TRACE:="NO"} # or YES to send traces to jaeger
: ${BACKTRACE:="NO"} # or YES to print a rust backtrace when panicking
cd "$DIR/../"
REALPATH=$(command -v uu-realpath || command -v realpath)
MAKE=$(command -v gmake || command -v make)
function main {
build_container
launch_container
}
function build_container {
$MAKE -C "$DIR/../docker/organic_test"
}
function launch_container {
local additional_flags=()
local additional_args=()
if [ "$SHELL" != "YES" ]; then
additional_args+=(cargo run)
else
additional_flags+=(-t)
fi
if [ "$TRACE" = "YES" ]; then
# We use the host network so it can talk to jaeger hosted at 127.0.0.1
additional_flags+=(--network=host --env RUST_LOG=debug)
fi
if [ "$BACKTRACE" = "YES" ]; then
additional_flags+=(--env RUST_BACKTRACE=full)
fi
docker run "${additional_flags[@]}" --init --rm -i -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source organic-test "${additional_args[@]}"
}
main "${@}"

View File

@@ -0,0 +1,57 @@
#!/usr/bin/env bash
#
set -euo pipefail
IFS=$'\n\t'
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$DIR/../"
REALPATH=$(command -v uu-realpath || command -v realpath)
MAKE=$(command -v gmake || command -v make)
function main {
local test_names=$(get_test_names "${@}")
build_container
local test
while read test; do
launch_container "$test"
done<<<"$test_names"
}
function build_container {
$MAKE -C "$DIR/../docker/organic_test"
}
function get_test_names {
local test_file
local samples_dir=$($REALPATH "$DIR/../org_mode_samples")
for test_file in "$@"
do
if [ -e "$test_file" ]; then
local test_file_full_path=$($REALPATH "$test_file")
local relative_to_samples=$($REALPATH --relative-to "$samples_dir" "$test_file_full_path")
local without_extension="${relative_to_samples%.org}"
echo "${without_extension/\//_}" | tr '[:upper:]' '[:lower:]'
else
echo "$test_file" | tr '[:upper:]' '[:lower:]'
fi
done
}
function launch_container {
local test="$1"
local additional_args=()
local init_script=$(cat <<EOF
set -euo pipefail
IFS=\$'\n\t'
cargo test --no-fail-fast --lib --test test_loader "$test" -- --show-output
EOF
)
docker run --init --rm -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source organic-test sh -c "$init_script"
}
main "${@}"

View File

@@ -4,17 +4,27 @@ set -euo pipefail
IFS=$'\n\t' IFS=$'\n\t'
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$DIR/../"
REALPATH=$(command -v uu-realpath || command -v realpath) REALPATH=$(command -v uu-realpath || command -v realpath)
samples_dir=$(readlink -f "$DIR/../org_mode_samples") function main {
local test_names=$(get_test_names "${@}")
local test
while read test; do
cargo test --no-fail-fast --test test_loader "$test" -- --show-output
done<<<"$test_names"
}
function get_test_names { function get_test_names {
local test_file
local samples_dir=$($REALPATH "$DIR/../org_mode_samples")
for test_file in "$@" for test_file in "$@"
do do
if [ -e "$test_file" ]; then if [ -e "$test_file" ]; then
test_file_full_path=$(readlink -f "$test_file") local test_file_full_path=$($REALPATH "$test_file")
relative_to_samples=$($REALPATH --relative-to "$samples_dir" "$test_file_full_path") local relative_to_samples=$($REALPATH --relative-to "$samples_dir" "$test_file_full_path")
without_extension="${relative_to_samples%.org}" local without_extension="${relative_to_samples%.org}"
echo "${without_extension/\//_}" | tr '[:upper:]' '[:lower:]' echo "${without_extension/\//_}" | tr '[:upper:]' '[:lower:]'
else else
echo "$test_file" | tr '[:upper:]' '[:lower:]' echo "$test_file" | tr '[:upper:]' '[:lower:]'
@@ -22,6 +32,4 @@ function get_test_names {
done done
} }
get_test_names "$@" | while read test; do main "${@}"
(cd "$DIR/../" && cargo test --no-fail-fast --test test_loader "$test" -- --show-output)
done

View File

@@ -1,22 +1,25 @@
use std::path::Path;
use std::process::Command; use std::process::Command;
pub fn emacs_parse_org_document<'a, C>(file_path: C) -> Result<String, Box<dyn std::error::Error>> pub fn emacs_parse_org_document<C>(file_contents: C) -> Result<String, Box<dyn std::error::Error>>
where where
C: AsRef<Path>, C: AsRef<str>,
{ {
let elisp_script = r#"(progn let escaped_file_contents = escape_elisp_string(file_contents);
let elisp_script = format!(
r#"(progn
(erase-buffer)
(insert "{escaped_file_contents}")
(org-mode) (org-mode)
(message "%s" (pp-to-string (org-element-parse-buffer))) (message "%s" (pp-to-string (org-element-parse-buffer)))
)"#; )"#,
escaped_file_contents = escaped_file_contents
);
let mut cmd = Command::new("emacs"); let mut cmd = Command::new("emacs");
let proc = cmd let proc = cmd
.arg("-q") .arg("-q")
.arg("--no-site-file") .arg("--no-site-file")
.arg("--no-splash") .arg("--no-splash")
.arg("--batch") .arg("--batch")
.arg("--insert")
.arg(file_path.as_ref().as_os_str())
.arg("--eval") .arg("--eval")
.arg(elisp_script); .arg(elisp_script);
let out = proc.output()?; let out = proc.output()?;
@@ -24,3 +27,25 @@ where
let org_sexp = out.stderr; let org_sexp = out.stderr;
Ok(String::from_utf8(org_sexp)?) Ok(String::from_utf8(org_sexp)?)
} }
fn escape_elisp_string<C>(file_contents: C) -> String
where
C: AsRef<str>,
{
let source = file_contents.as_ref();
let source_len = source.len();
// We allocate a string 10% larger than the source to account for escape characters. Without this, we would have more allocations during processing.
let mut output = String::with_capacity(source_len + (source_len / 10));
for c in source.chars() {
match c {
'"' | '\\' => {
output.push('\\');
output.push(c);
}
_ => {
output.push(c);
}
}
}
output
}

View File

@@ -48,6 +48,21 @@ pub fn assert_bounds<'s, S: Source<'s>>(
.nth(1) .nth(1)
.ok_or("Should have an attributes child.")?; .ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?; let attributes_map = attributes_child.as_map()?;
let standard_properties = attributes_map.get(":standard-properties");
let (begin, end) = if standard_properties.is_some() {
let std_props = standard_properties
.expect("if statement proves its Some")
.as_vector()?;
let begin = std_props
.get(0)
.ok_or("Missing first element in standard properties")?
.as_atom()?;
let end = std_props
.get(1)
.ok_or("Missing first element in standard properties")?
.as_atom()?;
(begin, end)
} else {
let begin = attributes_map let begin = attributes_map
.get(":begin") .get(":begin")
.ok_or("Missing :begin attribute.")? .ok_or("Missing :begin attribute.")?
@@ -56,6 +71,8 @@ pub fn assert_bounds<'s, S: Source<'s>>(
.get(":end") .get(":end")
.ok_or("Missing :end attribute.")? .ok_or("Missing :end attribute.")?
.as_atom()?; .as_atom()?;
(begin, end)
};
let (rust_begin, rust_end) = get_offsets(source, rust); let (rust_begin, rust_end) = get_offsets(source, rust);
if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end { if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end {
Err(format!("Rust bounds ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?; Err(format!("Rust bounds ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?;

View File

@@ -15,7 +15,8 @@ pub fn init_telemetry() -> Result<(), Box<dyn std::error::Error>> {
// TODO: I think the endpoint can be controlled by the OTEL_EXPORTER_OTLP_TRACES_ENDPOINT env variable instead of hard-coded into this code base. Regardless, I am the only developer right now so I am not too concerned. // TODO: I think the endpoint can be controlled by the OTEL_EXPORTER_OTLP_TRACES_ENDPOINT env variable instead of hard-coded into this code base. Regardless, I am the only developer right now so I am not too concerned.
let exporter = opentelemetry_otlp::new_exporter() let exporter = opentelemetry_otlp::new_exporter()
.tonic() .tonic()
.with_endpoint("http://localhost:4317/v1/traces"); // Using "localhost" is broken inside the docker container when tracing
.with_endpoint("http://127.0.0.1:4317/v1/traces");
let tracer = opentelemetry_otlp::new_pipeline() let tracer = opentelemetry_otlp::new_pipeline()
.tracing() .tracing()

View File

@@ -1,5 +1,6 @@
#![feature(round_char_boundary)] #![feature(round_char_boundary)]
use std::path::Path; #[cfg(feature = "compare")]
use std::io::Read;
#[cfg(feature = "compare")] #[cfg(feature = "compare")]
use ::organic::parser::document; use ::organic::parser::document;
@@ -10,8 +11,11 @@ use organic::emacs_parse_org_document;
#[cfg(feature = "compare")] #[cfg(feature = "compare")]
use organic::parser::sexp::sexp_with_padding; use organic::parser::sexp::sexp_with_padding;
#[cfg(feature = "tracing")]
use crate::init_tracing::init_telemetry; use crate::init_tracing::init_telemetry;
#[cfg(feature = "tracing")]
use crate::init_tracing::shutdown_telemetry; use crate::init_tracing::shutdown_telemetry;
#[cfg(feature = "tracing")]
mod init_tracing; mod init_tracing;
#[cfg(not(feature = "tracing"))] #[cfg(not(feature = "tracing"))]
@@ -22,40 +26,47 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
#[cfg(feature = "tracing")] #[cfg(feature = "tracing")]
fn main() -> Result<(), Box<dyn std::error::Error>> { fn main() -> Result<(), Box<dyn std::error::Error>> {
let rt = tokio::runtime::Runtime::new()?; let rt = tokio::runtime::Runtime::new()?;
let result = rt.block_on(async { main_body() }); let result = rt.block_on(async {
init_telemetry()?;
let main_body_result = main_body();
shutdown_telemetry()?;
main_body_result
});
result result
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn main_body() -> Result<(), Box<dyn std::error::Error>> { fn main_body() -> Result<(), Box<dyn std::error::Error>> {
init_telemetry()?; #[cfg(not(feature = "compare"))]
run_compare( let org_contents = "";
std::env::args() #[cfg(feature = "compare")]
.nth(1) let org_contents = read_stdin_to_string()?;
.expect("Pass a single file into this script."), run_compare(org_contents)
)?;
shutdown_telemetry()?;
Ok(())
} }
#[cfg(feature = "compare")] #[cfg(feature = "compare")]
fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::error::Error>> { fn read_stdin_to_string() -> Result<String, Box<dyn std::error::Error>> {
let org_contents = std::fs::read_to_string(todo_org_path.as_ref()).expect("Read org file."); let mut stdin_contents = String::new();
let (remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure"); std::io::stdin()
let org_sexp = .lock()
emacs_parse_org_document(todo_org_path.as_ref()).expect("Use emacs to parse org file."); .read_to_string(&mut stdin_contents)?;
Ok(stdin_contents)
}
#[cfg(feature = "compare")]
fn run_compare<P: AsRef<str>>(org_contents: P) -> Result<(), Box<dyn std::error::Error>> {
let (remaining, rust_parsed) = document(org_contents.as_ref()).expect("Org Parse failure");
let org_sexp = emacs_parse_org_document(org_contents.as_ref())?;
let (_remaining, parsed_sexp) = let (_remaining, parsed_sexp) =
sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure"); sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure");
println!("{}\n\n\n", org_contents.as_str()); println!("{}\n\n\n", org_contents.as_ref());
println!("{}", org_sexp); println!("{}", org_sexp);
println!("{:#?}", rust_parsed); println!("{:#?}", rust_parsed);
// We do the diffing after printing out both parsed forms in case the diffing panics // We do the diffing after printing out both parsed forms in case the diffing panics
let diff_result = let diff_result = compare_document(&parsed_sexp, &rust_parsed)?;
compare_document(&parsed_sexp, &rust_parsed).expect("Compare parsed documents."); diff_result.print()?;
diff_result
.print()
.expect("Print document parse tree diff.");
if diff_result.is_bad() { if diff_result.is_bad() {
Err("Diff results do not match.")?; Err("Diff results do not match.")?;
@@ -68,7 +79,7 @@ fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::erro
} }
#[cfg(not(feature = "compare"))] #[cfg(not(feature = "compare"))]
fn run_compare<P: AsRef<Path>>(_todo_org_path: P) -> Result<(), Box<dyn std::error::Error>> { fn run_compare<P: AsRef<str>>(_org_contents: P) -> Result<(), Box<dyn std::error::Error>> {
println!("This program was built with compare disabled. Doing nothing."); println!("This program was built with compare disabled. Doing nothing.");
Ok(()) Ok(())
} }

View File

@@ -160,7 +160,7 @@ fn zeroth_section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
opt(parser_with_context!(comment)( opt(parser_with_context!(comment)(
&without_consuming_whitespace_context, &without_consuming_whitespace_context,
)), )),
parser_with_context!(property_drawer)(&without_consuming_whitespace_context), parser_with_context!(property_drawer)(context),
many0(blank_line), many0(blank_line),
)))(input)?; )))(input)?;

View File

@@ -1,8 +1,6 @@
use nom::branch::alt;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::character::complete::anychar; use nom::character::complete::anychar;
use nom::combinator::opt; use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize; use nom::combinator::recognize;
use nom::combinator::verify; use nom::combinator::verify;
use nom::multi::many1; use nom::multi::many1;
@@ -11,6 +9,9 @@ use nom::sequence::tuple;
use super::Context; use super::Context;
use crate::error::Res; use crate::error::Res;
use crate::parser::exiting::ExitClass;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context; use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser; use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed; use crate::parser::util::get_consumed;
@@ -23,8 +24,15 @@ pub fn export_snippet<'r, 's>(
) -> Res<&'s str, ExportSnippet<'s>> { ) -> Res<&'s str, ExportSnippet<'s>> {
let (remaining, _) = tag("@@")(input)?; let (remaining, _) = tag("@@")(input)?;
let (remaining, backend_name) = backend(context, remaining)?; let (remaining, backend_name) = backend(context, remaining)?;
let (remaining, backend_contents) = let parser_context =
opt(tuple((tag(":"), parser_with_context!(contents)(context))))(remaining)?; context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &export_snippet_end,
}));
let (remaining, backend_contents) = opt(tuple((
tag(":"),
parser_with_context!(contents)(&parser_context),
)))(remaining)?;
let (remaining, _) = tag("@@")(remaining)?; let (remaining, _) = tag("@@")(remaining)?;
let source = get_consumed(input, remaining); let source = get_consumed(input, remaining);
Ok(( Ok((
@@ -48,14 +56,13 @@ fn backend<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn contents<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { fn contents<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, source) = recognize(verify( let (remaining, source) = recognize(verify(
many_till( many_till(anychar, parser_with_context!(exit_matcher_parser)(context)),
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
tag("@@"),
))),
),
|(children, _exit_contents)| !children.is_empty(), |(children, _exit_contents)| !children.is_empty(),
))(input)?; ))(input)?;
Ok((remaining, source)) Ok((remaining, source))
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn export_snippet_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
tag("@@")(input)
}

View File

@@ -200,7 +200,7 @@ pub fn src_block<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::Context("lesser block")) .with_additional_node(ContextElement::Context("lesser block"))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta, class: ExitClass::Alpha,
exit_matcher: &lesser_block_end_specialized, exit_matcher: &lesser_block_end_specialized,
})); }));
let parameters = match parameters { let parameters = match parameters {
@@ -238,23 +238,42 @@ fn lesser_block_end(
) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> { ) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> {
let current_name_lower = current_name.to_lowercase(); let current_name_lower = current_name.to_lowercase();
move |context: Context, input: &str| { move |context: Context, input: &str| {
_lesser_block_end(context, input, current_name_lower.as_str())
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _lesser_block_end<'r, 's, 'x>(
context: Context<'r, 's>,
input: &'s str,
current_name_lower: &'x str,
) -> Res<&'s str, &'s str> {
start_of_line(context, input)?; start_of_line(context, input)?;
let (remaining, _leading_whitespace) = space0(input)?; let (remaining, _leading_whitespace) = space0(input)?;
let (remaining, (_begin, _name, _ws)) = tuple(( let (remaining, (_begin, _name, _ws)) = tuple((
tag_no_case("#+end_"), tag_no_case("#+end_"),
tag_no_case(current_name_lower.as_str()), tag_no_case(current_name_lower),
alt((eof, line_ending)), alt((eof, line_ending)),
))(remaining)?; ))(remaining)?;
let source = get_consumed(input, remaining); let source = get_consumed(input, remaining);
Ok((remaining, source)) Ok((remaining, source))
} }
}
fn lesser_block_begin( fn lesser_block_begin(
current_name: &str, current_name: &str,
) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> { ) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> {
let current_name_lower = current_name.to_lowercase(); let current_name_lower = current_name.to_lowercase();
move |context: Context, input: &str| { move |context: Context, input: &str| {
_lesser_block_begin(context, input, current_name_lower.as_str())
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _lesser_block_begin<'r, 's, 'x>(
context: Context<'r, 's>,
input: &'s str,
current_name_lower: &'x str,
) -> Res<&'s str, &'s str> {
start_of_line(context, input)?; start_of_line(context, input)?;
let (remaining, _leading_whitespace) = space0(input)?; let (remaining, _leading_whitespace) = space0(input)?;
let (remaining, (_begin, name)) = tuple(( let (remaining, (_begin, name)) = tuple((
@@ -265,4 +284,3 @@ fn lesser_block_begin(
))(remaining)?; ))(remaining)?;
Ok((remaining, name)) Ok((remaining, name))
} }
}

View File

@@ -6,16 +6,12 @@ use nom::character::complete::one_of;
use nom::character::complete::space0; use nom::character::complete::space0;
use nom::character::complete::space1; use nom::character::complete::space1;
use nom::combinator::eof; use nom::combinator::eof;
use nom::combinator::peek; use nom::combinator::opt;
use nom::combinator::recognize; use nom::combinator::recognize;
use nom::combinator::verify; use nom::combinator::verify;
use nom::multi::many1; use nom::multi::many1;
use nom::multi::many_till; use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple; use nom::sequence::tuple;
#[cfg(feature = "tracing")]
use tracing::span;
use super::greater_element::PlainList; use super::greater_element::PlainList;
use super::greater_element::PlainListItem; use super::greater_element::PlainListItem;
@@ -32,6 +28,7 @@ use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::blank_line; use crate::parser::util::blank_line;
use crate::parser::util::exit_matcher_parser; use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed; use crate::parser::util::get_consumed;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::start_of_line; use crate::parser::util::start_of_line;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@@ -42,107 +39,64 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
class: ExitClass::Beta, class: ExitClass::Beta,
exit_matcher: &plain_list_end, exit_matcher: &plain_list_end,
})); }));
let without_consume_context = // children stores tuple of (input string, parsed object) so we can re-parse the final item
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
let with_consume_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context);
let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
let mut children = Vec::new(); let mut children = Vec::new();
let mut first_item_indentation: Option<usize> = None; let mut first_item_indentation: Option<usize> = None;
let mut remaining = input; let mut remaining = input;
// The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
//
// 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace.
// 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace.
// 3. Re-parse the final item with consume trailing whitespace disabled.
//
// While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
loop { loop {
/* let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
Trailing whitespace belongs to the plain list, not the plain list item match list_item {
Possible outcomes:
Don't consume, yes exit matcher
Don't consume, no additional item
Consume, additional item
*/
{
// Don't consume, yes exit matcher
#[cfg(feature = "tracing")]
let span = span!(tracing::Level::DEBUG, "first");
#[cfg(feature = "tracing")]
let _enter = span.enter();
let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining);
match last_item_then_exit {
Ok((remain, (item, _exit)))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
break;
}
Ok(_) | Err(_) => {}
};
}
{
// Consume, additional item
#[cfg(feature = "tracing")]
let span = span!(tracing::Level::DEBUG, "second");
#[cfg(feature = "tracing")]
let _enter = span.enter();
let not_last_item =
tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining);
match not_last_item {
Ok((remain, (item, future_item)))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation)
&& future_item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
continue;
}
Ok(_) | Err(_) => {}
};
}
{
// Don't consume, no additional item
#[cfg(feature = "tracing")]
let span = span!(tracing::Level::DEBUG, "third");
#[cfg(feature = "tracing")]
let _enter = span.enter();
let last_item_then_exit = without_consume_matcher(remaining);
match last_item_then_exit {
Ok((remain, item)) Ok((remain, item))
if item.indentation if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
== *first_item_indentation.get_or_insert(item.indentation) =>
{ {
children.push((remaining, item));
remaining = remain; remaining = remain;
children.push(item);
break;
} }
Ok(_) | Err(_) => { Ok(_) | Err(_) => {
// TODO: Maybe this is reachable when there are no items at all. break;
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Should be unreachable.",
))));
// unreachable!();
} }
}; };
let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
if maybe_exit.is_ok() {
break;
} }
} }
if children.is_empty() { let (final_child_start, _final_item_first_parse) = match children.pop() {
Some(final_child) => final_child,
None => {
return Err(nom::Err::Error(CustomError::MyError(MyError( return Err(nom::Err::Error(CustomError::MyError(MyError(
"Plain lists require at least one element.", "Plain lists require at least one element.",
)))); ))));
} }
};
let final_item_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
let (remaining, reparsed_final_item) =
parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
children.push((final_child_start, reparsed_final_item));
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining); let source = get_consumed(input, remaining);
Ok((remaining, PlainList { source, children })) Ok((
remaining,
PlainList {
source,
children: children.into_iter().map(|(_start, item)| item).collect(),
},
))
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@@ -154,29 +108,12 @@ pub fn plain_list_item<'r, 's>(
let (remaining, leading_whitespace) = space0(input)?; let (remaining, leading_whitespace) = space0(input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len(); let indent_level = leading_whitespace.len();
let with_consume_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_item_end,
}));
let without_consume_context = context
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_item_end,
}));
let with_consume_matcher = parser_with_context!(element(true))(&with_consume_context);
let without_consume_matcher = parser_with_context!(element(true))(&without_consume_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
let (remaining, bull) = let (remaining, bull) =
verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?; verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining); let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
match maybe_contentless_item { match maybe_contentless_item {
Ok((rem, _ws)) => { Ok((rem, _ws)) => {
// TODO: do we need to consume if this isn't the last item?
let source = get_consumed(input, rem); let source = get_consumed(input, rem);
return Ok(( return Ok((
rem, rem,
@@ -188,19 +125,29 @@ pub fn plain_list_item<'r, 's>(
}, },
)); ));
} }
Err(_) => { Err(_) => {}
};
let (remaining, _ws) = space1(remaining)?; let (remaining, _ws) = space1(remaining)?;
let (remaining, (mut contents, final_element)) = many_till( let parser_context = context
&with_consume_matcher, .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
alt(( .with_additional_node(ContextElement::ListItem(indent_level))
terminated(&without_consume_matcher, exit_matcher), .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
preceded( class: ExitClass::Beta,
peek(tuple((&with_consume_matcher, exit_matcher))), exit_matcher: &plain_list_item_end,
&without_consume_matcher, }));
let (remaining, (children, _exit_contents)) = verify(
many_till(
parser_with_context!(element(true))(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
), ),
)), |(children, _exit_contents)| !children.is_empty(),
)(remaining)?; )(remaining)?;
contents.push(final_element);
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining); let source = get_consumed(input, remaining);
return Ok(( return Ok((
remaining, remaining,
@@ -208,12 +155,10 @@ pub fn plain_list_item<'r, 's>(
source, source,
indentation: indent_level, indentation: indent_level,
bullet: bull, bullet: bull,
children: contents, children,
}, },
)); ));
} }
};
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> { fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> {
@@ -241,18 +186,11 @@ fn plain_list_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let current_item_indent_level: &usize = start_of_line(context, input)?;
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError( recognize(tuple((
"Not inside a plain list item", opt(blank_line),
))))?; parser_with_context!(line_indented_lte)(context),
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); )))(input)
let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context);
alt((
recognize(verify(plain_list_item_matcher, |pli| {
pli.indentation <= *current_item_indent_level
})),
recognize(line_indented_lte_matcher),
))(input)
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@@ -262,8 +200,6 @@ fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'
"Not inside a plain list item", "Not inside a plain list item",
))))?; ))))?;
start_of_line(context, input)?;
let matched = recognize(verify( let matched = recognize(verify(
tuple((space0::<&str, _>, non_whitespace_character)), tuple((space0::<&str, _>, non_whitespace_character)),
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)

View File

@@ -23,6 +23,7 @@ pub enum Token<'s> {
Atom(&'s str), Atom(&'s str),
List(Vec<Token<'s>>), List(Vec<Token<'s>>),
TextWithProperties(TextWithProperties<'s>), TextWithProperties(TextWithProperties<'s>),
Vector(Vec<Token<'s>>),
} }
#[derive(Debug)] #[derive(Debug)]
@@ -59,6 +60,10 @@ impl<'s> TextWithProperties<'s> {
out.push('\\'); out.push('\\');
ParseState::Normal ParseState::Normal
} }
(ParseState::Escape, '"') => {
out.push('"');
ParseState::Normal
}
_ => todo!(), _ => todo!(),
}; };
} }
@@ -73,6 +78,13 @@ enum ParseState {
} }
impl<'s> Token<'s> { impl<'s> Token<'s> {
pub fn as_vector<'p>(&'p self) -> Result<&'p Vec<Token<'s>>, Box<dyn std::error::Error>> {
Ok(match self {
Token::Vector(children) => Ok(children),
_ => Err(format!("wrong token type {:?}", self)),
}?)
}
pub fn as_list<'p>(&'p self) -> Result<&'p Vec<Token<'s>>, Box<dyn std::error::Error>> { pub fn as_list<'p>(&'p self) -> Result<&'p Vec<Token<'s>>, Box<dyn std::error::Error>> {
Ok(match self { Ok(match self {
Token::List(children) => Ok(children), Token::List(children) => Ok(children),
@@ -136,7 +148,7 @@ pub fn sexp<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
alt((list, atom))(input) alt((list, vector, atom))(input)
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@@ -151,16 +163,33 @@ fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
Ok((remaining, Token::List(children))) Ok((remaining, Token::List(children)))
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn vector<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag("[")(input)?;
let (remaining, children) = delimited(
multispace0,
separated_list1(multispace1, token),
multispace0,
)(remaining)?;
let (remaining, _) = tag("]")(remaining)?;
Ok((remaining, Token::Vector(children)))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
not(peek(tag(")")))(input)?; not(peek(one_of(")]")))(input)?;
alt((text_with_properties, quoted_atom, unquoted_atom))(input) alt((
text_with_properties,
hash_notation,
quoted_atom,
unquoted_atom,
))(input)
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, body) = take_till1(|c| match c { let (remaining, body) = take_till1(|c| match c {
' ' | '\t' | '\r' | '\n' | ')' => true, ' ' | '\t' | '\r' | '\n' | ')' | ']' => true,
_ => false, _ => false,
})(input)?; })(input)?;
Ok((remaining, Token::Atom(body))) Ok((remaining, Token::Atom(body)))
@@ -182,6 +211,18 @@ fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
Ok((remaining, Token::Atom(source))) Ok((remaining, Token::Atom(source)))
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn hash_notation<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag("#<")(input)?;
let (remaining, _body) = take_till1(|c| match c {
'>' => true,
_ => false,
})(remaining)?;
let (remaining, _) = tag(">")(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Token::Atom(source)))
}
fn text_with_properties<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { fn text_with_properties<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
let (remaining, _) = tag("#(")(input)?; let (remaining, _) = tag("#(")(input)?;
let (remaining, (text, props)) = delimited( let (remaining, (text, props)) = delimited(
@@ -237,6 +278,7 @@ mod tests {
Token::Atom(_) => false, Token::Atom(_) => false,
Token::List(_) => true, Token::List(_) => true,
Token::TextWithProperties(_) => false, Token::TextWithProperties(_) => false,
Token::Vector(_) => false,
}); });
} }
@@ -249,6 +291,7 @@ mod tests {
Token::Atom(_) => false, Token::Atom(_) => false,
Token::List(_) => true, Token::List(_) => true,
Token::TextWithProperties(_) => false, Token::TextWithProperties(_) => false,
Token::Vector(_) => false,
}); });
let children = match parsed { let children = match parsed {
Token::List(children) => children, Token::List(children) => children,
@@ -308,6 +351,7 @@ mod tests {
Token::Atom(_) => false, Token::Atom(_) => false,
Token::List(_) => true, Token::List(_) => true,
Token::TextWithProperties(_) => false, Token::TextWithProperties(_) => false,
Token::Vector(_) => false,
}); });
let children = match parsed { let children = match parsed {
Token::List(children) => children, Token::List(children) => children,

View File

@@ -3,7 +3,7 @@ fn {name}() {{
let todo_org_path = "{path}"; let todo_org_path = "{path}";
let org_contents = std::fs::read_to_string(todo_org_path).expect("Read org file."); let org_contents = std::fs::read_to_string(todo_org_path).expect("Read org file.");
println!("{{}}", org_contents); println!("{{}}", org_contents);
let org_sexp = emacs_parse_org_document(todo_org_path).expect("Use emacs to parse org file."); let org_sexp = emacs_parse_org_document(org_contents.as_str()).expect("Use emacs to parse org file.");
println!("{{}}", org_sexp); println!("{{}}", org_sexp);
let (_remaining, parsed_sexp) = sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure"); let (_remaining, parsed_sexp) = sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure");
let (remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure"); let (remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure");