Compare commits
47 Commits
v0.1.2
...
2cd6f736c2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2cd6f736c2 | ||
|
|
5686256039 | ||
|
|
7cf1b2d2b8 | ||
|
|
b848d7be73 | ||
|
|
74f4aa8d33 | ||
|
|
4776898894 | ||
|
|
8e95ce6368 | ||
|
|
6c9c304f37 | ||
|
|
7fafbfb6bb | ||
|
|
56281633f3 | ||
|
|
823c33ef8e | ||
|
|
e5e5120a10 | ||
|
|
7df393f31d | ||
|
|
72d5f8f35c | ||
|
|
dae46adc12 | ||
|
|
d0dc737c79 | ||
|
|
1c9877015d | ||
|
|
2938d5809a | ||
|
|
f7ec89858d | ||
|
|
67b4dfdce6 | ||
|
|
63d092c83d | ||
|
|
a7b298eeec | ||
|
|
1bbfbc3164 | ||
|
|
2bcc3f0599 | ||
|
|
b93a12c32c | ||
|
|
df3045e424 | ||
|
|
72b8fec1be | ||
|
|
ab17904b1c | ||
|
|
306878c95d | ||
|
|
5768c8acda | ||
|
|
e28290ed79 | ||
|
|
fbabf60559 | ||
|
|
92abac37e2 | ||
|
|
899073e54f | ||
|
|
eb379af78d | ||
|
|
422804d846 | ||
|
|
cc83431d62 | ||
|
|
00354ccc20 | ||
|
|
b75eed6b1e | ||
|
|
e33ec4a02c | ||
|
|
f7afcec824 | ||
|
|
cf0991fdff | ||
|
|
d1e0ee831c | ||
|
|
34985c9045 | ||
|
|
7da09fea74 | ||
|
|
fc28e3b514 | ||
|
|
df5ee5af16 |
@@ -4,6 +4,10 @@ metadata:
|
||||
name: rust-test
|
||||
spec:
|
||||
pipelineSpec:
|
||||
timeouts:
|
||||
pipeline: "2h0m0s"
|
||||
tasks: "1h0m40s"
|
||||
finally: "0h30m0s"
|
||||
params:
|
||||
- name: image-name
|
||||
description: The name for the built image
|
||||
@@ -201,7 +205,6 @@ spec:
|
||||
secret:
|
||||
secretName: harbor-plain
|
||||
serviceAccountName: build-bot
|
||||
timeout: 240h0m0s
|
||||
params:
|
||||
- name: image-name
|
||||
value: "harbor.fizz.buzz/private/organic-test"
|
||||
|
||||
@@ -10,6 +10,12 @@ readme = "README.md"
|
||||
keywords = ["emacs", "org-mode"]
|
||||
categories = ["parsing"]
|
||||
resolver = "2"
|
||||
include = [
|
||||
"LICENSE",
|
||||
"**/*.rs",
|
||||
"Cargo.toml",
|
||||
"tests/*"
|
||||
]
|
||||
|
||||
[lib]
|
||||
name = "organic"
|
||||
|
||||
16
Makefile
16
Makefile
@@ -35,7 +35,17 @@ clean:
|
||||
|
||||
.PHONY: test
|
||||
test:
|
||||
> cargo test --lib --test test_loader -- --test-threads $(TESTJOBS)
|
||||
> cargo test --no-fail-fast --lib --test test_loader -- --test-threads $(TESTJOBS)
|
||||
|
||||
.PHONY: dockertest
|
||||
dockertest:
|
||||
> $(MAKE) -C docker/organic_test
|
||||
> docker run --rm -i -t -v "$$(readlink -f ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source organic-test cargo test --no-fail-fast --lib --test test_loader -- --test-threads $(TESTJOBS)
|
||||
|
||||
.PHONY: dockerclean
|
||||
dockerclean:
|
||||
# Delete volumes created for running the tests in docker. This does not touch anything related to the jaeger docker container.
|
||||
> docker volume rm cargo-cache rust-cache
|
||||
|
||||
.PHONY: integrationtest
|
||||
integrationtest:
|
||||
@@ -49,8 +59,8 @@ unittest:
|
||||
jaeger:
|
||||
# 4317 for OTLP gRPC, 4318 for OTLP HTTP. We currently use gRPC but I forward both ports regardless.
|
||||
#
|
||||
# These flags didn't help even though they seem like they would: --collector.otlp.grpc.max-message-size=10000000 --collector.queue-size=20000 --collector.num-workers=100
|
||||
> docker run -d --rm --name organicdocker -p 4317:4317 -p 4318:4318 -p 16686:16686 -e COLLECTOR_OTLP_ENABLED=true jaegertracing/all-in-one:1.47 --collector.grpc-server.max-message-size=10000000
|
||||
# These flags didn't help even though they seem like they would: --collector.queue-size=20000 --collector.num-workers=100
|
||||
> docker run -d --rm --name organicdocker -p 4317:4317 -p 4318:4318 -p 16686:16686 -e COLLECTOR_OTLP_ENABLED=true jaegertracing/all-in-one:1.47 --collector.grpc-server.max-message-size=20000000 --collector.otlp.grpc.max-message-size=20000000
|
||||
|
||||
.PHONY: jaegerweb
|
||||
jaegerweb:
|
||||
|
||||
3
build.rs
3
build.rs
@@ -74,7 +74,8 @@ fn is_expect_fail(name: &str) -> Option<&str> {
|
||||
"drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
|
||||
"element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
|
||||
"paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
|
||||
"export_snippet_paragraph_break_precedent" => Some("Emacs 28 has broken behavior so the tests in the CI fail."),
|
||||
"export_snippet_paragraph_break_precedence" => Some("The latest code for org-mode is matching the export snippet without the closing @@."), // https://list.orgmode.org/orgmode/fb61ea28-f004-4c25-adf7-69fc55683ed4@app.fastmail.com/T/#u
|
||||
"plain_lists_trailing_whitespace_ownership" => Some("Seeing odd behavior about whitespace ownership."),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,30 @@
|
||||
FROM rustlang/rust:nightly-alpine3.17
|
||||
FROM alpine:3.17 AS build
|
||||
|
||||
RUN apk add --no-cache musl-dev emacs
|
||||
RUN apk add --no-cache build-base musl-dev git autoconf make texinfo gnutls-dev ncurses-dev gawk
|
||||
|
||||
|
||||
FROM build AS build-emacs
|
||||
|
||||
RUN git clone --depth 1 --branch emacs-29.1 https://git.savannah.gnu.org/git/emacs.git /root/emacs
|
||||
WORKDIR /root/emacs
|
||||
RUN mkdir /root/dist
|
||||
RUN ./autogen.sh
|
||||
RUN ./configure --prefix /usr --without-x --without-sound
|
||||
RUN make
|
||||
RUN make DESTDIR="/root/dist" install
|
||||
|
||||
|
||||
FROM build AS build-org-mode
|
||||
COPY --from=build-emacs /root/dist/ /
|
||||
RUN mkdir /root/dist
|
||||
RUN mkdir /root/org-mode && git -C /root/org-mode init && git -C /root/org-mode remote add origin https://git.savannah.gnu.org/git/emacs/org-mode.git && git -C /root/org-mode fetch origin b89bc55867d7cb809c379d371d12d409db785154 && git -C /root/org-mode checkout FETCH_HEAD
|
||||
WORKDIR /root/org-mode
|
||||
RUN make compile
|
||||
RUN make DESTDIR="/root/dist" install
|
||||
|
||||
|
||||
FROM rustlang/rust:nightly-alpine3.17
|
||||
RUN apk add --no-cache musl-dev ncurses gnutls
|
||||
RUN cargo install --locked --no-default-features --features ci-autoclean cargo-cache
|
||||
COPY --from=build-emacs /root/dist/ /
|
||||
COPY --from=build-org-mode /root/dist/ /
|
||||
|
||||
27
notes/optimization_ideas.org
Normal file
27
notes/optimization_ideas.org
Normal file
@@ -0,0 +1,27 @@
|
||||
* Analysis
|
||||
** Parse start per character
|
||||
It might help analysis to record how often we start a specific type of parse for each character. For example, at the start of a plain list, if we had a count of how often each character was the start of a parse of a list we could use that to see how often that list is getting re-parsed.
|
||||
* Optimizations
|
||||
** Edit whitespace for list items
|
||||
Whether or not a list item owns the trailing whitespace depends on if it is the last list item in that list. Since we do not know ahead of time if an item is the last item in the list, we have to either re-parse the list item or modify it after parsing.
|
||||
|
||||
*** For
|
||||
We already are modifying the source of some elements after-the-fact with src_rust{set_source()} so this would be more of the same.
|
||||
*** Against
|
||||
I'd like to phase out such modifications because they seem hacky and fragile.
|
||||
** Make detect element function
|
||||
Some exit matchers are based on when the next element is found. Some elements do not need to be fully parsed to identify that they are a valid element. For example, src_org{1. foo} can already be identified as the start of a plain list (in the right context) without needing to parse the entire element.
|
||||
*** For
|
||||
Avoiding parsing the entire element for an exit matcher would reduce redundant parses.
|
||||
*** Against
|
||||
This adds code complexity and introduces the potential for bugs.
|
||||
|
||||
How many elements can be reasonably early-detected? For example, src_org{#+begin_src foo} is not enough to detect the start of a source block because without the src_org{#+end_src} it is just plain text.
|
||||
** Grab multiple characters in plaintext parser before checking exit matcher
|
||||
Currently we check the exit matcher after each character inside the plain text parser (and many others). Are there character sequences we can assume no exit matcher will trigger between? For example, a contiguous string of latin-alphabet letters?
|
||||
*** For
|
||||
This could significantly reduce our calls to exit matchers.
|
||||
*** Against
|
||||
I think targets would break this.
|
||||
|
||||
The exit matchers are already implicitly building this behavior since they should all exit very early when the starting character is wrong. Putting this logic in a centralized place, far away from where those characters are actually going to be used, is unfortunate for readability.
|
||||
128
notes/plain_list_ownership_notes.org
Normal file
128
notes/plain_list_ownership_notes.org
Normal file
@@ -0,0 +1,128 @@
|
||||
* Test 1
|
||||
** Source
|
||||
#+begin_src org
|
||||
1. foo
|
||||
|
||||
1. bar
|
||||
|
||||
2. baz
|
||||
|
||||
2. lorem
|
||||
|
||||
ipsum
|
||||
#+end_src
|
||||
** Ownership
|
||||
This table is just showing ownership for the plain list items, not the containing plain list nor the elements inside each item.
|
||||
|
||||
| Plain List *Item* | Owns trailing blank lines |
|
||||
|------------------------+---------------------------|
|
||||
| foo (includes bar baz) | Yes |
|
||||
| bar | Yes |
|
||||
| baz | Yes |
|
||||
| lorem | No |
|
||||
** Analysis
|
||||
This seems to imply that plain list items own their trailing blank lines except for the final plain list item in the top-most plain list which does not own its trailing blank lines.
|
||||
* Test 2
|
||||
** Source
|
||||
#+begin_src org
|
||||
1. foo
|
||||
|
||||
bar
|
||||
|
||||
1. baz
|
||||
|
||||
lorem
|
||||
|
||||
ipsum
|
||||
|
||||
|
||||
dolar
|
||||
#+end_src
|
||||
** Ownership
|
||||
This table is just showing ownership for the plain list items, not the containing plain list nor the elements inside each item.
|
||||
|
||||
| Plain List *Item* | Owns trailing blank lines |
|
||||
|--------------------------+---------------------------|
|
||||
| foo -> ipsum (inclusive) | No |
|
||||
| baz lorem | No |
|
||||
** Analysis
|
||||
This shows that the final plain list item in a nested plain list (baz lorem) does not own its trailing blank lines which conflicts with "baz" from Test 1.
|
||||
* Test 3
|
||||
** Source
|
||||
#+begin_src org
|
||||
1. foo
|
||||
|
||||
1. bar
|
||||
|
||||
baz
|
||||
|
||||
2. lorem
|
||||
|
||||
ipsum
|
||||
#+end_src
|
||||
** Ownership
|
||||
| Plain List *Item* | Owns trailing blank lines |
|
||||
|------------------------+---------------------------|
|
||||
| foo (includes bar baz) | Yes |
|
||||
| bar baz | Yes |
|
||||
| lorem | No |
|
||||
** Analysis
|
||||
This was to test if having an extra paragraph in the final list item in the nested list changes the behavior. The behavior is consistent with Test 1, so the extra paragraph is not the cause of the discrepancy.
|
||||
* Test 4
|
||||
** Source
|
||||
#+begin_src org
|
||||
1. foo
|
||||
|
||||
1. bar
|
||||
|
||||
2. baz
|
||||
|
||||
candy
|
||||
|
||||
2. lorem
|
||||
|
||||
ipsum
|
||||
#+end_src
|
||||
** Ownership
|
||||
| Plain List *Item* | Owns trailing blank lines |
|
||||
|----------------------------------+---------------------------|
|
||||
| foo (includes bar baz and candy) | Yes |
|
||||
| bar | Yes |
|
||||
| baz | No |
|
||||
| lorem | No |
|
||||
** Analysis
|
||||
This was to test if putting a non-plain-list element at the end of foo changes the ownership of blank lines. baz changed to no longer owning its trailing whitespace.
|
||||
|
||||
This seems to imply that list items own their trailing whitespace except for the final item unless that list item is at the end of a list item.
|
||||
* Test 5
|
||||
** Source
|
||||
#+begin_src org
|
||||
1. foo
|
||||
|
||||
1. bar
|
||||
|
||||
2. baz
|
||||
|
||||
candy
|
||||
|
||||
2. lorem
|
||||
|
||||
1. cat
|
||||
|
||||
2. dog
|
||||
|
||||
ipsum
|
||||
#+end_src
|
||||
** Ownership
|
||||
| Plain List *Item* | Owns trailing blank lines |
|
||||
|----------------------------------+---------------------------|
|
||||
| foo (includes bar baz and candy) | Yes |
|
||||
| bar | Yes |
|
||||
| baz | No |
|
||||
| lorem (includes cat and dog) | No |
|
||||
| cat | Yes |
|
||||
| dog | No |
|
||||
** Analysis
|
||||
This breaks the theory that the final list item nested at the end of a list item gets to own its trailing blank lines since dog does not own its blank lines despite Test 1's baz owning its blank lines.
|
||||
|
||||
New Theory: final list items only own their blank lines if they are nested at the end of a non-final list item.
|
||||
@@ -0,0 +1,11 @@
|
||||
1. foo
|
||||
|
||||
1. bar
|
||||
|
||||
2. baz
|
||||
|
||||
2. lorem
|
||||
|
||||
|
||||
|
||||
ipsum
|
||||
@@ -0,0 +1,18 @@
|
||||
foo bar.
|
||||
|
||||
|
||||
|
||||
|
||||
* Lorem
|
||||
baz
|
||||
|
||||
|
||||
|
||||
|
||||
* Ipsum
|
||||
alpha
|
||||
|
||||
|
||||
|
||||
|
||||
beta
|
||||
46
scripts/run_docker_compare.bash
Executable file
46
scripts/run_docker_compare.bash
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
: ${SHELL:="NO"} # or YES to launch a shell instead of running the test
|
||||
: ${TRACE:="NO"} # or YES to send traces to jaeger
|
||||
: ${BACKTRACE:="NO"} # or YES to print a rust backtrace when panicking
|
||||
|
||||
cd "$DIR/../"
|
||||
REALPATH=$(command -v uu-realpath || command -v realpath)
|
||||
MAKE=$(command -v gmake || command -v make)
|
||||
|
||||
function main {
|
||||
build_container
|
||||
launch_container
|
||||
}
|
||||
|
||||
function build_container {
|
||||
$MAKE -C "$DIR/../docker/organic_test"
|
||||
}
|
||||
|
||||
function launch_container {
|
||||
local additional_flags=()
|
||||
local additional_args=()
|
||||
|
||||
if [ "$SHELL" != "YES" ]; then
|
||||
additional_args+=(cargo run)
|
||||
else
|
||||
additional_flags+=(-t)
|
||||
fi
|
||||
|
||||
if [ "$TRACE" = "YES" ]; then
|
||||
# We use the host network so it can talk to jaeger hosted at 127.0.0.1
|
||||
additional_flags+=(--network=host --env RUST_LOG=debug)
|
||||
fi
|
||||
|
||||
if [ "$BACKTRACE" = "YES" ]; then
|
||||
additional_flags+=(--env RUST_BACKTRACE=full)
|
||||
fi
|
||||
|
||||
docker run "${additional_flags[@]}" --rm -i -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source organic-test "${additional_args[@]}"
|
||||
}
|
||||
|
||||
main "${@}"
|
||||
57
scripts/run_docker_integration_test.bash
Executable file
57
scripts/run_docker_integration_test.bash
Executable file
@@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
cd "$DIR/../"
|
||||
REALPATH=$(command -v uu-realpath || command -v realpath)
|
||||
MAKE=$(command -v gmake || command -v make)
|
||||
|
||||
function main {
|
||||
local test_names=$(get_test_names "${@}")
|
||||
build_container
|
||||
|
||||
local test
|
||||
while read test; do
|
||||
launch_container "$test"
|
||||
done<<<"$test_names"
|
||||
}
|
||||
|
||||
function build_container {
|
||||
$MAKE -C "$DIR/../docker/organic_test"
|
||||
}
|
||||
|
||||
function get_test_names {
|
||||
local test_file
|
||||
local samples_dir=$($REALPATH "$DIR/../org_mode_samples")
|
||||
for test_file in "$@"
|
||||
do
|
||||
if [ -e "$test_file" ]; then
|
||||
local test_file_full_path=$($REALPATH "$test_file")
|
||||
local relative_to_samples=$($REALPATH --relative-to "$samples_dir" "$test_file_full_path")
|
||||
local without_extension="${relative_to_samples%.org}"
|
||||
echo "${without_extension/\//_}" | tr '[:upper:]' '[:lower:]'
|
||||
else
|
||||
echo "$test_file" | tr '[:upper:]' '[:lower:]'
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function launch_container {
|
||||
local test="$1"
|
||||
local additional_args=()
|
||||
|
||||
local init_script=$(cat <<EOF
|
||||
set -euo pipefail
|
||||
IFS=\$'\n\t'
|
||||
|
||||
cargo test --no-fail-fast --lib --test test_loader "$test" -- --show-output
|
||||
EOF
|
||||
)
|
||||
|
||||
docker run --rm -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source organic-test sh -c "$init_script"
|
||||
}
|
||||
|
||||
|
||||
main "${@}"
|
||||
@@ -4,17 +4,27 @@ set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
cd "$DIR/../"
|
||||
REALPATH=$(command -v uu-realpath || command -v realpath)
|
||||
|
||||
samples_dir=$(readlink -f "$DIR/../org_mode_samples")
|
||||
function main {
|
||||
local test_names=$(get_test_names "${@}")
|
||||
|
||||
local test
|
||||
while read test; do
|
||||
cargo test --no-fail-fast --test test_loader "$test" -- --show-output
|
||||
done<<<"$test_names"
|
||||
}
|
||||
|
||||
function get_test_names {
|
||||
local test_file
|
||||
local samples_dir=$($REALPATH "$DIR/../org_mode_samples")
|
||||
for test_file in "$@"
|
||||
do
|
||||
if [ -e "$test_file" ]; then
|
||||
test_file_full_path=$(readlink -f "$test_file")
|
||||
relative_to_samples=$($REALPATH --relative-to "$samples_dir" "$test_file_full_path")
|
||||
without_extension="${relative_to_samples%.org}"
|
||||
local test_file_full_path=$($REALPATH "$test_file")
|
||||
local relative_to_samples=$($REALPATH --relative-to "$samples_dir" "$test_file_full_path")
|
||||
local without_extension="${relative_to_samples%.org}"
|
||||
echo "${without_extension/\//_}" | tr '[:upper:]' '[:lower:]'
|
||||
else
|
||||
echo "$test_file" | tr '[:upper:]' '[:lower:]'
|
||||
@@ -22,6 +32,4 @@ function get_test_names {
|
||||
done
|
||||
}
|
||||
|
||||
get_test_names "$@" | while read test; do
|
||||
(cd "$DIR/../" && cargo test --no-fail-fast --test test_loader "$test" -- --show-output)
|
||||
done
|
||||
main "${@}"
|
||||
|
||||
@@ -1,22 +1,25 @@
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
pub fn emacs_parse_org_document<'a, C>(file_path: C) -> Result<String, Box<dyn std::error::Error>>
|
||||
pub fn emacs_parse_org_document<C>(file_contents: C) -> Result<String, Box<dyn std::error::Error>>
|
||||
where
|
||||
C: AsRef<Path>,
|
||||
C: AsRef<str>,
|
||||
{
|
||||
let elisp_script = r#"(progn
|
||||
let escaped_file_contents = escape_elisp_string(file_contents);
|
||||
let elisp_script = format!(
|
||||
r#"(progn
|
||||
(erase-buffer)
|
||||
(insert "{escaped_file_contents}")
|
||||
(org-mode)
|
||||
(message "%s" (pp-to-string (org-element-parse-buffer)))
|
||||
)"#;
|
||||
)"#,
|
||||
escaped_file_contents = escaped_file_contents
|
||||
);
|
||||
let mut cmd = Command::new("emacs");
|
||||
let proc = cmd
|
||||
.arg("-q")
|
||||
.arg("--no-site-file")
|
||||
.arg("--no-splash")
|
||||
.arg("--batch")
|
||||
.arg("--insert")
|
||||
.arg(file_path.as_ref().as_os_str())
|
||||
.arg("--eval")
|
||||
.arg(elisp_script);
|
||||
let out = proc.output()?;
|
||||
@@ -24,3 +27,25 @@ where
|
||||
let org_sexp = out.stderr;
|
||||
Ok(String::from_utf8(org_sexp)?)
|
||||
}
|
||||
|
||||
fn escape_elisp_string<C>(file_contents: C) -> String
|
||||
where
|
||||
C: AsRef<str>,
|
||||
{
|
||||
let source = file_contents.as_ref();
|
||||
let source_len = source.len();
|
||||
// We allocate a string 10% larger than the source to account for escape characters. Without this, we would have more allocations during processing.
|
||||
let mut output = String::with_capacity(source_len + (source_len / 10));
|
||||
for c in source.chars() {
|
||||
match c {
|
||||
'"' | '\\' => {
|
||||
output.push('\\');
|
||||
output.push(c);
|
||||
}
|
||||
_ => {
|
||||
output.push(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
@@ -48,14 +48,31 @@ pub fn assert_bounds<'s, S: Source<'s>>(
|
||||
.nth(1)
|
||||
.ok_or("Should have an attributes child.")?;
|
||||
let attributes_map = attributes_child.as_map()?;
|
||||
let begin = attributes_map
|
||||
.get(":begin")
|
||||
.ok_or("Missing :begin attribute.")?
|
||||
.as_atom()?;
|
||||
let end = attributes_map
|
||||
.get(":end")
|
||||
.ok_or("Missing :end attribute.")?
|
||||
.as_atom()?;
|
||||
let standard_properties = attributes_map.get(":standard-properties");
|
||||
let (begin, end) = if standard_properties.is_some() {
|
||||
let std_props = standard_properties
|
||||
.expect("if statement proves its Some")
|
||||
.as_vector()?;
|
||||
let begin = std_props
|
||||
.get(0)
|
||||
.ok_or("Missing first element in standard properties")?
|
||||
.as_atom()?;
|
||||
let end = std_props
|
||||
.get(1)
|
||||
.ok_or("Missing first element in standard properties")?
|
||||
.as_atom()?;
|
||||
(begin, end)
|
||||
} else {
|
||||
let begin = attributes_map
|
||||
.get(":begin")
|
||||
.ok_or("Missing :begin attribute.")?
|
||||
.as_atom()?;
|
||||
let end = attributes_map
|
||||
.get(":end")
|
||||
.ok_or("Missing :end attribute.")?
|
||||
.as_atom()?;
|
||||
(begin, end)
|
||||
};
|
||||
let (rust_begin, rust_end) = get_offsets(source, rust);
|
||||
if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end {
|
||||
Err(format!("Rust bounds ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?;
|
||||
|
||||
@@ -15,7 +15,8 @@ pub fn init_telemetry() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// TODO: I think the endpoint can be controlled by the OTEL_EXPORTER_OTLP_TRACES_ENDPOINT env variable instead of hard-coded into this code base. Regardless, I am the only developer right now so I am not too concerned.
|
||||
let exporter = opentelemetry_otlp::new_exporter()
|
||||
.tonic()
|
||||
.with_endpoint("http://localhost:4317/v1/traces");
|
||||
// Using "localhost" is broken inside the docker container when tracing
|
||||
.with_endpoint("http://127.0.0.1:4317/v1/traces");
|
||||
|
||||
let tracer = opentelemetry_otlp::new_pipeline()
|
||||
.tracing()
|
||||
|
||||
55
src/main.rs
55
src/main.rs
@@ -1,5 +1,6 @@
|
||||
#![feature(round_char_boundary)]
|
||||
use std::path::Path;
|
||||
#[cfg(feature = "compare")]
|
||||
use std::io::Read;
|
||||
|
||||
#[cfg(feature = "compare")]
|
||||
use ::organic::parser::document;
|
||||
@@ -10,8 +11,11 @@ use organic::emacs_parse_org_document;
|
||||
#[cfg(feature = "compare")]
|
||||
use organic::parser::sexp::sexp_with_padding;
|
||||
|
||||
#[cfg(feature = "tracing")]
|
||||
use crate::init_tracing::init_telemetry;
|
||||
#[cfg(feature = "tracing")]
|
||||
use crate::init_tracing::shutdown_telemetry;
|
||||
#[cfg(feature = "tracing")]
|
||||
mod init_tracing;
|
||||
|
||||
#[cfg(not(feature = "tracing"))]
|
||||
@@ -22,40 +26,47 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
#[cfg(feature = "tracing")]
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let rt = tokio::runtime::Runtime::new()?;
|
||||
let result = rt.block_on(async { main_body() });
|
||||
let result = rt.block_on(async {
|
||||
init_telemetry()?;
|
||||
let main_body_result = main_body();
|
||||
shutdown_telemetry()?;
|
||||
main_body_result
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn main_body() -> Result<(), Box<dyn std::error::Error>> {
|
||||
init_telemetry()?;
|
||||
run_compare(
|
||||
std::env::args()
|
||||
.nth(1)
|
||||
.expect("Pass a single file into this script."),
|
||||
)?;
|
||||
shutdown_telemetry()?;
|
||||
Ok(())
|
||||
#[cfg(not(feature = "compare"))]
|
||||
let org_contents = "";
|
||||
#[cfg(feature = "compare")]
|
||||
let org_contents = read_stdin_to_string()?;
|
||||
run_compare(org_contents)
|
||||
}
|
||||
|
||||
#[cfg(feature = "compare")]
|
||||
fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let org_contents = std::fs::read_to_string(todo_org_path.as_ref()).expect("Read org file.");
|
||||
let (remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure");
|
||||
let org_sexp =
|
||||
emacs_parse_org_document(todo_org_path.as_ref()).expect("Use emacs to parse org file.");
|
||||
fn read_stdin_to_string() -> Result<String, Box<dyn std::error::Error>> {
|
||||
let mut stdin_contents = String::new();
|
||||
std::io::stdin()
|
||||
.lock()
|
||||
.read_to_string(&mut stdin_contents)?;
|
||||
Ok(stdin_contents)
|
||||
}
|
||||
|
||||
#[cfg(feature = "compare")]
|
||||
fn run_compare<P: AsRef<str>>(org_contents: P) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let (remaining, rust_parsed) = document(org_contents.as_ref()).expect("Org Parse failure");
|
||||
let org_sexp = emacs_parse_org_document(org_contents.as_ref())?;
|
||||
let (_remaining, parsed_sexp) =
|
||||
sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure");
|
||||
|
||||
println!("{}\n\n\n", org_contents.as_str());
|
||||
println!("{}\n\n\n", org_contents.as_ref());
|
||||
println!("{}", org_sexp);
|
||||
println!("{:#?}", rust_parsed);
|
||||
|
||||
// We do the diffing after printing out both parsed forms in case the diffing panics
|
||||
let diff_result =
|
||||
compare_document(&parsed_sexp, &rust_parsed).expect("Compare parsed documents.");
|
||||
diff_result
|
||||
.print()
|
||||
.expect("Print document parse tree diff.");
|
||||
let diff_result = compare_document(&parsed_sexp, &rust_parsed)?;
|
||||
diff_result.print()?;
|
||||
|
||||
if diff_result.is_bad() {
|
||||
Err("Diff results do not match.")?;
|
||||
@@ -68,7 +79,7 @@ fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::erro
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "compare"))]
|
||||
fn run_compare<P: AsRef<Path>>(_todo_org_path: P) -> Result<(), Box<dyn std::error::Error>> {
|
||||
fn run_compare<P: AsRef<str>>(_org_contents: P) -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("This program was built with compare disabled. Doing nothing.");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -160,7 +160,7 @@ fn zeroth_section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
|
||||
opt(parser_with_context!(comment)(
|
||||
&without_consuming_whitespace_context,
|
||||
)),
|
||||
parser_with_context!(property_drawer)(&without_consuming_whitespace_context),
|
||||
parser_with_context!(property_drawer)(context),
|
||||
many0(blank_line),
|
||||
)))(input)?;
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::anychar;
|
||||
use nom::combinator::opt;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::recognize;
|
||||
use nom::combinator::verify;
|
||||
use nom::multi::many1;
|
||||
@@ -11,6 +9,9 @@ use nom::sequence::tuple;
|
||||
|
||||
use super::Context;
|
||||
use crate::error::Res;
|
||||
use crate::parser::exiting::ExitClass;
|
||||
use crate::parser::parser_context::ContextElement;
|
||||
use crate::parser::parser_context::ExitMatcherNode;
|
||||
use crate::parser::parser_with_context::parser_with_context;
|
||||
use crate::parser::util::exit_matcher_parser;
|
||||
use crate::parser::util::get_consumed;
|
||||
@@ -23,8 +24,15 @@ pub fn export_snippet<'r, 's>(
|
||||
) -> Res<&'s str, ExportSnippet<'s>> {
|
||||
let (remaining, _) = tag("@@")(input)?;
|
||||
let (remaining, backend_name) = backend(context, remaining)?;
|
||||
let (remaining, backend_contents) =
|
||||
opt(tuple((tag(":"), parser_with_context!(contents)(context))))(remaining)?;
|
||||
let parser_context =
|
||||
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
class: ExitClass::Beta,
|
||||
exit_matcher: &export_snippet_end,
|
||||
}));
|
||||
let (remaining, backend_contents) = opt(tuple((
|
||||
tag(":"),
|
||||
parser_with_context!(contents)(&parser_context),
|
||||
)))(remaining)?;
|
||||
let (remaining, _) = tag("@@")(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((
|
||||
@@ -48,14 +56,13 @@ fn backend<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn contents<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let (remaining, source) = recognize(verify(
|
||||
many_till(
|
||||
anychar,
|
||||
peek(alt((
|
||||
parser_with_context!(exit_matcher_parser)(context),
|
||||
tag("@@"),
|
||||
))),
|
||||
),
|
||||
many_till(anychar, parser_with_context!(exit_matcher_parser)(context)),
|
||||
|(children, _exit_contents)| !children.is_empty(),
|
||||
))(input)?;
|
||||
Ok((remaining, source))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn export_snippet_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
tag("@@")(input)
|
||||
}
|
||||
|
||||
@@ -200,7 +200,7 @@ pub fn src_block<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st
|
||||
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
|
||||
.with_additional_node(ContextElement::Context("lesser block"))
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
class: ExitClass::Beta,
|
||||
class: ExitClass::Alpha,
|
||||
exit_matcher: &lesser_block_end_specialized,
|
||||
}));
|
||||
let parameters = match parameters {
|
||||
@@ -238,31 +238,49 @@ fn lesser_block_end(
|
||||
) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> {
|
||||
let current_name_lower = current_name.to_lowercase();
|
||||
move |context: Context, input: &str| {
|
||||
start_of_line(context, input)?;
|
||||
let (remaining, _leading_whitespace) = space0(input)?;
|
||||
let (remaining, (_begin, _name, _ws)) = tuple((
|
||||
tag_no_case("#+end_"),
|
||||
tag_no_case(current_name_lower.as_str()),
|
||||
alt((eof, line_ending)),
|
||||
))(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((remaining, source))
|
||||
_lesser_block_end(context, input, current_name_lower.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn _lesser_block_end<'r, 's, 'x>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
current_name_lower: &'x str,
|
||||
) -> Res<&'s str, &'s str> {
|
||||
start_of_line(context, input)?;
|
||||
let (remaining, _leading_whitespace) = space0(input)?;
|
||||
let (remaining, (_begin, _name, _ws)) = tuple((
|
||||
tag_no_case("#+end_"),
|
||||
tag_no_case(current_name_lower),
|
||||
alt((eof, line_ending)),
|
||||
))(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((remaining, source))
|
||||
}
|
||||
|
||||
fn lesser_block_begin(
|
||||
current_name: &str,
|
||||
) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> {
|
||||
let current_name_lower = current_name.to_lowercase();
|
||||
move |context: Context, input: &str| {
|
||||
start_of_line(context, input)?;
|
||||
let (remaining, _leading_whitespace) = space0(input)?;
|
||||
let (remaining, (_begin, name)) = tuple((
|
||||
tag_no_case("#+begin_"),
|
||||
verify(name, |name: &str| {
|
||||
name.to_lowercase().as_str() == current_name_lower
|
||||
}),
|
||||
))(remaining)?;
|
||||
Ok((remaining, name))
|
||||
_lesser_block_begin(context, input, current_name_lower.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn _lesser_block_begin<'r, 's, 'x>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
current_name_lower: &'x str,
|
||||
) -> Res<&'s str, &'s str> {
|
||||
start_of_line(context, input)?;
|
||||
let (remaining, _leading_whitespace) = space0(input)?;
|
||||
let (remaining, (_begin, name)) = tuple((
|
||||
tag_no_case("#+begin_"),
|
||||
verify(name, |name: &str| {
|
||||
name.to_lowercase().as_str() == current_name_lower
|
||||
}),
|
||||
))(remaining)?;
|
||||
Ok((remaining, name))
|
||||
}
|
||||
|
||||
@@ -6,16 +6,12 @@ use nom::character::complete::one_of;
|
||||
use nom::character::complete::space0;
|
||||
use nom::character::complete::space1;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::opt;
|
||||
use nom::combinator::recognize;
|
||||
use nom::combinator::verify;
|
||||
use nom::multi::many1;
|
||||
use nom::multi::many_till;
|
||||
use nom::sequence::preceded;
|
||||
use nom::sequence::terminated;
|
||||
use nom::sequence::tuple;
|
||||
#[cfg(feature = "tracing")]
|
||||
use tracing::span;
|
||||
|
||||
use super::greater_element::PlainList;
|
||||
use super::greater_element::PlainListItem;
|
||||
@@ -32,6 +28,7 @@ use crate::parser::parser_context::ExitMatcherNode;
|
||||
use crate::parser::util::blank_line;
|
||||
use crate::parser::util::exit_matcher_parser;
|
||||
use crate::parser::util::get_consumed;
|
||||
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
|
||||
use crate::parser::util::start_of_line;
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@@ -42,107 +39,64 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
|
||||
class: ExitClass::Beta,
|
||||
exit_matcher: &plain_list_end,
|
||||
}));
|
||||
let without_consume_context =
|
||||
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
|
||||
let with_consume_context =
|
||||
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
|
||||
let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context);
|
||||
let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context);
|
||||
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
|
||||
// children stores tuple of (input string, parsed object) so we can re-parse the final item
|
||||
let mut children = Vec::new();
|
||||
let mut first_item_indentation: Option<usize> = None;
|
||||
let mut remaining = input;
|
||||
|
||||
// The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
|
||||
//
|
||||
// 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace.
|
||||
// 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace.
|
||||
// 3. Re-parse the final item with consume trailing whitespace disabled.
|
||||
//
|
||||
// While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
|
||||
|
||||
loop {
|
||||
/*
|
||||
Trailing whitespace belongs to the plain list, not the plain list item
|
||||
let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
|
||||
match list_item {
|
||||
Ok((remain, item))
|
||||
if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
|
||||
{
|
||||
children.push((remaining, item));
|
||||
remaining = remain;
|
||||
}
|
||||
Ok(_) | Err(_) => {
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
Possible outcomes:
|
||||
Don't consume, yes exit matcher
|
||||
Don't consume, no additional item
|
||||
Consume, additional item
|
||||
*/
|
||||
{
|
||||
// Don't consume, yes exit matcher
|
||||
#[cfg(feature = "tracing")]
|
||||
let span = span!(tracing::Level::DEBUG, "first");
|
||||
#[cfg(feature = "tracing")]
|
||||
let _enter = span.enter();
|
||||
|
||||
let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining);
|
||||
match last_item_then_exit {
|
||||
Ok((remain, (item, _exit)))
|
||||
if item.indentation
|
||||
== *first_item_indentation.get_or_insert(item.indentation) =>
|
||||
{
|
||||
remaining = remain;
|
||||
children.push(item);
|
||||
break;
|
||||
}
|
||||
Ok(_) | Err(_) => {}
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
// Consume, additional item
|
||||
#[cfg(feature = "tracing")]
|
||||
let span = span!(tracing::Level::DEBUG, "second");
|
||||
#[cfg(feature = "tracing")]
|
||||
let _enter = span.enter();
|
||||
|
||||
let not_last_item =
|
||||
tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining);
|
||||
match not_last_item {
|
||||
Ok((remain, (item, future_item)))
|
||||
if item.indentation
|
||||
== *first_item_indentation.get_or_insert(item.indentation)
|
||||
&& future_item.indentation
|
||||
== *first_item_indentation.get_or_insert(item.indentation) =>
|
||||
{
|
||||
remaining = remain;
|
||||
children.push(item);
|
||||
continue;
|
||||
}
|
||||
Ok(_) | Err(_) => {}
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
// Don't consume, no additional item
|
||||
#[cfg(feature = "tracing")]
|
||||
let span = span!(tracing::Level::DEBUG, "third");
|
||||
#[cfg(feature = "tracing")]
|
||||
let _enter = span.enter();
|
||||
|
||||
let last_item_then_exit = without_consume_matcher(remaining);
|
||||
match last_item_then_exit {
|
||||
Ok((remain, item))
|
||||
if item.indentation
|
||||
== *first_item_indentation.get_or_insert(item.indentation) =>
|
||||
{
|
||||
remaining = remain;
|
||||
children.push(item);
|
||||
break;
|
||||
}
|
||||
Ok(_) | Err(_) => {
|
||||
// TODO: Maybe this is reachable when there are no items at all.
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Should be unreachable.",
|
||||
))));
|
||||
// unreachable!();
|
||||
}
|
||||
};
|
||||
let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
|
||||
if maybe_exit.is_ok() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if children.is_empty() {
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Plain lists require at least one element.",
|
||||
))));
|
||||
}
|
||||
let (final_child_start, _final_item_first_parse) = match children.pop() {
|
||||
Some(final_child) => final_child,
|
||||
None => {
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Plain lists require at least one element.",
|
||||
))));
|
||||
}
|
||||
};
|
||||
let final_item_context =
|
||||
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
|
||||
let (remaining, reparsed_final_item) =
|
||||
parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
|
||||
children.push((final_child_start, reparsed_final_item));
|
||||
|
||||
let (remaining, _trailing_ws) =
|
||||
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
|
||||
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((remaining, PlainList { source, children }))
|
||||
Ok((
|
||||
remaining,
|
||||
PlainList {
|
||||
source,
|
||||
children: children.into_iter().map(|(_start, item)| item).collect(),
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@@ -154,29 +108,12 @@ pub fn plain_list_item<'r, 's>(
|
||||
let (remaining, leading_whitespace) = space0(input)?;
|
||||
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
||||
let indent_level = leading_whitespace.len();
|
||||
let with_consume_context = context
|
||||
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
|
||||
.with_additional_node(ContextElement::ListItem(indent_level))
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
class: ExitClass::Beta,
|
||||
exit_matcher: &plain_list_item_end,
|
||||
}));
|
||||
let without_consume_context = context
|
||||
.with_additional_node(ContextElement::ListItem(indent_level))
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
class: ExitClass::Beta,
|
||||
exit_matcher: &plain_list_item_end,
|
||||
}));
|
||||
|
||||
let with_consume_matcher = parser_with_context!(element(true))(&with_consume_context);
|
||||
let without_consume_matcher = parser_with_context!(element(true))(&without_consume_context);
|
||||
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
|
||||
let (remaining, bull) =
|
||||
verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
|
||||
|
||||
let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
|
||||
match maybe_contentless_item {
|
||||
Ok((rem, _ws)) => {
|
||||
// TODO: do we need to consume if this isn't the last item?
|
||||
let source = get_consumed(input, rem);
|
||||
return Ok((
|
||||
rem,
|
||||
@@ -188,31 +125,39 @@ pub fn plain_list_item<'r, 's>(
|
||||
},
|
||||
));
|
||||
}
|
||||
Err(_) => {
|
||||
let (remaining, _ws) = space1(remaining)?;
|
||||
let (remaining, (mut contents, final_element)) = many_till(
|
||||
&with_consume_matcher,
|
||||
alt((
|
||||
terminated(&without_consume_matcher, exit_matcher),
|
||||
preceded(
|
||||
peek(tuple((&with_consume_matcher, exit_matcher))),
|
||||
&without_consume_matcher,
|
||||
),
|
||||
)),
|
||||
)(remaining)?;
|
||||
contents.push(final_element);
|
||||
let source = get_consumed(input, remaining);
|
||||
return Ok((
|
||||
remaining,
|
||||
PlainListItem {
|
||||
source,
|
||||
indentation: indent_level,
|
||||
bullet: bull,
|
||||
children: contents,
|
||||
},
|
||||
));
|
||||
}
|
||||
Err(_) => {}
|
||||
};
|
||||
|
||||
let (remaining, _ws) = space1(remaining)?;
|
||||
let parser_context = context
|
||||
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
|
||||
.with_additional_node(ContextElement::ListItem(indent_level))
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
class: ExitClass::Beta,
|
||||
exit_matcher: &plain_list_item_end,
|
||||
}));
|
||||
|
||||
let (remaining, (children, _exit_contents)) = verify(
|
||||
many_till(
|
||||
parser_with_context!(element(true))(&parser_context),
|
||||
parser_with_context!(exit_matcher_parser)(&parser_context),
|
||||
),
|
||||
|(children, _exit_contents)| !children.is_empty(),
|
||||
)(remaining)?;
|
||||
|
||||
let (remaining, _trailing_ws) =
|
||||
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
|
||||
|
||||
let source = get_consumed(input, remaining);
|
||||
return Ok((
|
||||
remaining,
|
||||
PlainListItem {
|
||||
source,
|
||||
indentation: indent_level,
|
||||
bullet: bull,
|
||||
children,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@@ -241,18 +186,11 @@ fn plain_list_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let current_item_indent_level: &usize =
|
||||
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Not inside a plain list item",
|
||||
))))?;
|
||||
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
|
||||
let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context);
|
||||
alt((
|
||||
recognize(verify(plain_list_item_matcher, |pli| {
|
||||
pli.indentation <= *current_item_indent_level
|
||||
})),
|
||||
recognize(line_indented_lte_matcher),
|
||||
))(input)
|
||||
start_of_line(context, input)?;
|
||||
recognize(tuple((
|
||||
opt(blank_line),
|
||||
parser_with_context!(line_indented_lte)(context),
|
||||
)))(input)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@@ -262,8 +200,6 @@ fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'
|
||||
"Not inside a plain list item",
|
||||
))))?;
|
||||
|
||||
start_of_line(context, input)?;
|
||||
|
||||
let matched = recognize(verify(
|
||||
tuple((space0::<&str, _>, non_whitespace_character)),
|
||||
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
||||
|
||||
@@ -23,6 +23,7 @@ pub enum Token<'s> {
|
||||
Atom(&'s str),
|
||||
List(Vec<Token<'s>>),
|
||||
TextWithProperties(TextWithProperties<'s>),
|
||||
Vector(Vec<Token<'s>>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -59,6 +60,10 @@ impl<'s> TextWithProperties<'s> {
|
||||
out.push('\\');
|
||||
ParseState::Normal
|
||||
}
|
||||
(ParseState::Escape, '"') => {
|
||||
out.push('"');
|
||||
ParseState::Normal
|
||||
}
|
||||
_ => todo!(),
|
||||
};
|
||||
}
|
||||
@@ -73,6 +78,13 @@ enum ParseState {
|
||||
}
|
||||
|
||||
impl<'s> Token<'s> {
|
||||
pub fn as_vector<'p>(&'p self) -> Result<&'p Vec<Token<'s>>, Box<dyn std::error::Error>> {
|
||||
Ok(match self {
|
||||
Token::Vector(children) => Ok(children),
|
||||
_ => Err(format!("wrong token type {:?}", self)),
|
||||
}?)
|
||||
}
|
||||
|
||||
pub fn as_list<'p>(&'p self) -> Result<&'p Vec<Token<'s>>, Box<dyn std::error::Error>> {
|
||||
Ok(match self {
|
||||
Token::List(children) => Ok(children),
|
||||
@@ -136,7 +148,7 @@ pub fn sexp<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
alt((list, atom))(input)
|
||||
alt((list, vector, atom))(input)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@@ -151,16 +163,33 @@ fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
Ok((remaining, Token::List(children)))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn vector<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
let (remaining, _) = tag("[")(input)?;
|
||||
let (remaining, children) = delimited(
|
||||
multispace0,
|
||||
separated_list1(multispace1, token),
|
||||
multispace0,
|
||||
)(remaining)?;
|
||||
let (remaining, _) = tag("]")(remaining)?;
|
||||
Ok((remaining, Token::Vector(children)))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
not(peek(tag(")")))(input)?;
|
||||
alt((text_with_properties, quoted_atom, unquoted_atom))(input)
|
||||
not(peek(one_of(")]")))(input)?;
|
||||
alt((
|
||||
text_with_properties,
|
||||
hash_notation,
|
||||
quoted_atom,
|
||||
unquoted_atom,
|
||||
))(input)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
let (remaining, body) = take_till1(|c| match c {
|
||||
' ' | '\t' | '\r' | '\n' | ')' => true,
|
||||
' ' | '\t' | '\r' | '\n' | ')' | ']' => true,
|
||||
_ => false,
|
||||
})(input)?;
|
||||
Ok((remaining, Token::Atom(body)))
|
||||
@@ -182,6 +211,18 @@ fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
Ok((remaining, Token::Atom(source)))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn hash_notation<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
let (remaining, _) = tag("#<")(input)?;
|
||||
let (remaining, _body) = take_till1(|c| match c {
|
||||
'>' => true,
|
||||
_ => false,
|
||||
})(remaining)?;
|
||||
let (remaining, _) = tag(">")(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((remaining, Token::Atom(source)))
|
||||
}
|
||||
|
||||
fn text_with_properties<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
let (remaining, _) = tag("#(")(input)?;
|
||||
let (remaining, (text, props)) = delimited(
|
||||
@@ -237,6 +278,7 @@ mod tests {
|
||||
Token::Atom(_) => false,
|
||||
Token::List(_) => true,
|
||||
Token::TextWithProperties(_) => false,
|
||||
Token::Vector(_) => false,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -249,6 +291,7 @@ mod tests {
|
||||
Token::Atom(_) => false,
|
||||
Token::List(_) => true,
|
||||
Token::TextWithProperties(_) => false,
|
||||
Token::Vector(_) => false,
|
||||
});
|
||||
let children = match parsed {
|
||||
Token::List(children) => children,
|
||||
@@ -308,6 +351,7 @@ mod tests {
|
||||
Token::Atom(_) => false,
|
||||
Token::List(_) => true,
|
||||
Token::TextWithProperties(_) => false,
|
||||
Token::Vector(_) => false,
|
||||
});
|
||||
let children = match parsed {
|
||||
Token::List(children) => children,
|
||||
|
||||
@@ -3,7 +3,7 @@ fn {name}() {{
|
||||
let todo_org_path = "{path}";
|
||||
let org_contents = std::fs::read_to_string(todo_org_path).expect("Read org file.");
|
||||
println!("{{}}", org_contents);
|
||||
let org_sexp = emacs_parse_org_document(todo_org_path).expect("Use emacs to parse org file.");
|
||||
let org_sexp = emacs_parse_org_document(org_contents.as_str()).expect("Use emacs to parse org file.");
|
||||
println!("{{}}", org_sexp);
|
||||
let (_remaining, parsed_sexp) = sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure");
|
||||
let (remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure");
|
||||
|
||||
Reference in New Issue
Block a user