Compare commits
55 Commits
v0.1.2
...
8561fdc1bd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8561fdc1bd | ||
|
|
f2089257b0 | ||
|
|
09821c8898 | ||
|
|
69ecfd2646 | ||
|
|
8162f03051 | ||
|
|
d8c3285e3c | ||
|
|
5db6cd617e | ||
|
|
4cd3697fb0 | ||
|
|
2cd6f736c2 | ||
|
|
5686256039 | ||
|
|
7cf1b2d2b8 | ||
|
|
b848d7be73 | ||
|
|
74f4aa8d33 | ||
|
|
4776898894 | ||
|
|
8e95ce6368 | ||
|
|
6c9c304f37 | ||
|
|
7fafbfb6bb | ||
|
|
56281633f3 | ||
|
|
823c33ef8e | ||
|
|
e5e5120a10 | ||
|
|
7df393f31d | ||
|
|
72d5f8f35c | ||
|
|
dae46adc12 | ||
|
|
d0dc737c79 | ||
|
|
1c9877015d | ||
|
|
2938d5809a | ||
|
|
f7ec89858d | ||
|
|
67b4dfdce6 | ||
|
|
63d092c83d | ||
|
|
a7b298eeec | ||
|
|
1bbfbc3164 | ||
|
|
2bcc3f0599 | ||
|
|
b93a12c32c | ||
|
|
df3045e424 | ||
|
|
72b8fec1be | ||
|
|
ab17904b1c | ||
|
|
306878c95d | ||
|
|
5768c8acda | ||
|
|
e28290ed79 | ||
|
|
fbabf60559 | ||
|
|
92abac37e2 | ||
|
|
899073e54f | ||
|
|
eb379af78d | ||
|
|
422804d846 | ||
|
|
cc83431d62 | ||
|
|
00354ccc20 | ||
|
|
b75eed6b1e | ||
|
|
e33ec4a02c | ||
|
|
f7afcec824 | ||
|
|
cf0991fdff | ||
|
|
d1e0ee831c | ||
|
|
34985c9045 | ||
|
|
7da09fea74 | ||
|
|
fc28e3b514 | ||
|
|
df5ee5af16 |
@@ -1,3 +1,4 @@
|
||||
**/.git
|
||||
target
|
||||
Cargo.lock
|
||||
notes/
|
||||
|
||||
@@ -4,6 +4,10 @@ metadata:
|
||||
name: rust-test
|
||||
spec:
|
||||
pipelineSpec:
|
||||
timeouts:
|
||||
pipeline: "2h0m0s"
|
||||
tasks: "1h0m40s"
|
||||
finally: "0h30m0s"
|
||||
params:
|
||||
- name: image-name
|
||||
description: The name for the built image
|
||||
@@ -201,7 +205,6 @@ spec:
|
||||
secret:
|
||||
secretName: harbor-plain
|
||||
serviceAccountName: build-bot
|
||||
timeout: 240h0m0s
|
||||
params:
|
||||
- name: image-name
|
||||
value: "harbor.fizz.buzz/private/organic-test"
|
||||
|
||||
@@ -10,6 +10,12 @@ readme = "README.md"
|
||||
keywords = ["emacs", "org-mode"]
|
||||
categories = ["parsing"]
|
||||
resolver = "2"
|
||||
include = [
|
||||
"LICENSE",
|
||||
"**/*.rs",
|
||||
"Cargo.toml",
|
||||
"tests/*"
|
||||
]
|
||||
|
||||
[lib]
|
||||
name = "organic"
|
||||
|
||||
16
Makefile
16
Makefile
@@ -35,7 +35,17 @@ clean:
|
||||
|
||||
.PHONY: test
|
||||
test:
|
||||
> cargo test --lib --test test_loader -- --test-threads $(TESTJOBS)
|
||||
> cargo test --no-fail-fast --lib --test test_loader -- --test-threads $(TESTJOBS)
|
||||
|
||||
.PHONY: dockertest
|
||||
dockertest:
|
||||
> $(MAKE) -C docker/organic_test
|
||||
> docker run --init --rm -i -t -v "$$(readlink -f ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source organic-test cargo test --no-fail-fast --lib --test test_loader -- --test-threads $(TESTJOBS)
|
||||
|
||||
.PHONY: dockerclean
|
||||
dockerclean:
|
||||
# Delete volumes created for running the tests in docker. This does not touch anything related to the jaeger docker container.
|
||||
> docker volume rm cargo-cache rust-cache
|
||||
|
||||
.PHONY: integrationtest
|
||||
integrationtest:
|
||||
@@ -49,8 +59,8 @@ unittest:
|
||||
jaeger:
|
||||
# 4317 for OTLP gRPC, 4318 for OTLP HTTP. We currently use gRPC but I forward both ports regardless.
|
||||
#
|
||||
# These flags didn't help even though they seem like they would: --collector.otlp.grpc.max-message-size=10000000 --collector.queue-size=20000 --collector.num-workers=100
|
||||
> docker run -d --rm --name organicdocker -p 4317:4317 -p 4318:4318 -p 16686:16686 -e COLLECTOR_OTLP_ENABLED=true jaegertracing/all-in-one:1.47 --collector.grpc-server.max-message-size=10000000
|
||||
# These flags didn't help even though they seem like they would: --collector.queue-size=20000 --collector.num-workers=100
|
||||
> docker run -d --rm --name organicdocker -p 4317:4317 -p 4318:4318 -p 16686:16686 -e COLLECTOR_OTLP_ENABLED=true jaegertracing/all-in-one:1.47 --collector.grpc-server.max-message-size=20000000 --collector.otlp.grpc.max-message-size=20000000
|
||||
|
||||
.PHONY: jaegerweb
|
||||
jaegerweb:
|
||||
|
||||
11
build.rs
11
build.rs
@@ -41,6 +41,7 @@ fn write_test(test_file: &mut File, test: &walkdir::DirEntry) {
|
||||
.strip_suffix(".org")
|
||||
.expect("Should have .org extension")
|
||||
.replace("/", "_");
|
||||
let test_name = format!("autogen_{}", test_name);
|
||||
|
||||
if let Some(_reason) = is_expect_fail(test_name.as_str()) {
|
||||
write!(test_file, "#[ignore]\n").unwrap();
|
||||
@@ -71,10 +72,12 @@ use organic::parser::sexp::sexp_with_padding;
|
||||
|
||||
fn is_expect_fail(name: &str) -> Option<&str> {
|
||||
match name {
|
||||
"drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
|
||||
"element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
|
||||
"paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
|
||||
"export_snippet_paragraph_break_precedent" => Some("Emacs 28 has broken behavior so the tests in the CI fail."),
|
||||
"autogen_greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
|
||||
"autogen_element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
|
||||
"autogen_lesser_element_paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
|
||||
"autogen_greater_element_plain_list_trailing_whitespace_ownership_test_case_1" => Some("Seeing odd behavior about whitespace ownership."), // https://list.orgmode.org/9372527e-3852-419e-936a-7b4dd38cc847@app.fastmail.com/
|
||||
"autogen_greater_element_plain_list_trailing_whitespace_ownership_test_case_3" => Some("Seeing odd behavior about whitespace ownership."), // https://list.orgmode.org/9372527e-3852-419e-936a-7b4dd38cc847@app.fastmail.com/
|
||||
"autogen_greater_element_plain_list_trailing_whitespace_ownership_test_case_4" => Some("Seeing odd behavior about whitespace ownership."), // https://list.orgmode.org/9372527e-3852-419e-936a-7b4dd38cc847@app.fastmail.com/
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ all: build push
|
||||
|
||||
.PHONY: build
|
||||
build:
|
||||
docker build -t $(IMAGE_NAME) -f Dockerfile ../
|
||||
docker build -t $(IMAGE_NAME) -f Dockerfile ../../
|
||||
|
||||
.PHONY: push
|
||||
push:
|
||||
|
||||
@@ -6,7 +6,7 @@ all: build push
|
||||
|
||||
.PHONY: build
|
||||
build:
|
||||
docker build -t $(IMAGE_NAME) -f Dockerfile ../
|
||||
docker build -t $(IMAGE_NAME) -f Dockerfile ../../
|
||||
|
||||
.PHONY: push
|
||||
push:
|
||||
|
||||
@@ -1,4 +1,30 @@
|
||||
FROM rustlang/rust:nightly-alpine3.17
|
||||
FROM alpine:3.17 AS build
|
||||
|
||||
RUN apk add --no-cache musl-dev emacs
|
||||
RUN apk add --no-cache build-base musl-dev git autoconf make texinfo gnutls-dev ncurses-dev gawk
|
||||
|
||||
|
||||
FROM build AS build-emacs
|
||||
|
||||
RUN git clone --depth 1 --branch emacs-29.1 https://git.savannah.gnu.org/git/emacs.git /root/emacs
|
||||
WORKDIR /root/emacs
|
||||
RUN mkdir /root/dist
|
||||
RUN ./autogen.sh
|
||||
RUN ./configure --prefix /usr --without-x --without-sound
|
||||
RUN make
|
||||
RUN make DESTDIR="/root/dist" install
|
||||
|
||||
|
||||
FROM build AS build-org-mode
|
||||
COPY --from=build-emacs /root/dist/ /
|
||||
RUN mkdir /root/dist
|
||||
RUN mkdir /root/org-mode && git -C /root/org-mode init --initial-branch=main && git -C /root/org-mode remote add origin https://git.savannah.gnu.org/git/emacs/org-mode.git && git -C /root/org-mode fetch origin 299193bf091a63474fc8036bd31de51800a2555a && git -C /root/org-mode checkout FETCH_HEAD
|
||||
WORKDIR /root/org-mode
|
||||
RUN make compile
|
||||
RUN make DESTDIR="/root/dist" install
|
||||
|
||||
|
||||
FROM rustlang/rust:nightly-alpine3.17
|
||||
RUN apk add --no-cache musl-dev ncurses gnutls
|
||||
RUN cargo install --locked --no-default-features --features ci-autoclean cargo-cache
|
||||
COPY --from=build-emacs /root/dist/ /
|
||||
COPY --from=build-org-mode /root/dist/ /
|
||||
|
||||
@@ -6,7 +6,7 @@ all: build push
|
||||
|
||||
.PHONY: build
|
||||
build:
|
||||
docker build -t $(IMAGE_NAME) -f Dockerfile ../
|
||||
docker build -t $(IMAGE_NAME) -f Dockerfile ../../
|
||||
|
||||
.PHONY: push
|
||||
push:
|
||||
|
||||
27
notes/optimization_ideas.org
Normal file
27
notes/optimization_ideas.org
Normal file
@@ -0,0 +1,27 @@
|
||||
* Analysis
|
||||
** Parse start per character
|
||||
It might help analysis to record how often we start a specific type of parse for each character. For example, at the start of a plain list, if we had a count of how often each character was the start of a parse of a list we could use that to see how often that list is getting re-parsed.
|
||||
* Optimizations
|
||||
** Edit whitespace for list items
|
||||
Whether or not a list item owns the trailing whitespace depends on if it is the last list item in that list. Since we do not know ahead of time if an item is the last item in the list, we have to either re-parse the list item or modify it after parsing.
|
||||
|
||||
*** For
|
||||
We already are modifying the source of some elements after-the-fact with src_rust{set_source()} so this would be more of the same.
|
||||
*** Against
|
||||
I'd like to phase out such modifications because they seem hacky and fragile.
|
||||
** Make detect element function
|
||||
Some exit matchers are based on when the next element is found. Some elements do not need to be fully parsed to identify that they are a valid element. For example, src_org{1. foo} can already be identified as the start of a plain list (in the right context) without needing to parse the entire element.
|
||||
*** For
|
||||
Avoiding parsing the entire element for an exit matcher would reduce redundant parses.
|
||||
*** Against
|
||||
This adds code complexity and introduces the potential for bugs.
|
||||
|
||||
How many elements can be reasonably early-detected? For example, src_org{#+begin_src foo} is not enough to detect the start of a source block because without the src_org{#+end_src} it is just plain text.
|
||||
** Grab multiple characters in plaintext parser before checking exit matcher
|
||||
Currently we check the exit matcher after each character inside the plain text parser (and many others). Are there character sequences we can assume no exit matcher will trigger between? For example, a contiguous string of latin-alphabet letters?
|
||||
*** For
|
||||
This could significantly reduce our calls to exit matchers.
|
||||
*** Against
|
||||
I think targets would break this.
|
||||
|
||||
The exit matchers are already implicitly building this behavior since they should all exit very early when the starting character is wrong. Putting this logic in a centralized place, far away from where those characters are actually going to be used, is unfortunate for readability.
|
||||
@@ -0,0 +1,9 @@
|
||||
1. foo
|
||||
|
||||
1. bar
|
||||
|
||||
2. baz
|
||||
|
||||
2. lorem
|
||||
|
||||
ipsum
|
||||
@@ -0,0 +1,11 @@
|
||||
1. foo
|
||||
|
||||
1. bar
|
||||
|
||||
2. baz
|
||||
|
||||
cat
|
||||
|
||||
2. lorem
|
||||
|
||||
ipsum
|
||||
@@ -0,0 +1,10 @@
|
||||
1. cat
|
||||
1. foo
|
||||
|
||||
1. bar
|
||||
|
||||
2. baz
|
||||
|
||||
2. lorem
|
||||
|
||||
ipsum
|
||||
@@ -0,0 +1,12 @@
|
||||
1. cat
|
||||
1. foo
|
||||
|
||||
1. bar
|
||||
|
||||
2. baz
|
||||
|
||||
2. lorem
|
||||
|
||||
2. dog
|
||||
|
||||
ipsum
|
||||
@@ -0,0 +1,18 @@
|
||||
foo bar.
|
||||
|
||||
|
||||
|
||||
|
||||
* Lorem
|
||||
baz
|
||||
|
||||
|
||||
|
||||
|
||||
* Ipsum
|
||||
alpha
|
||||
|
||||
|
||||
|
||||
|
||||
beta
|
||||
46
scripts/run_docker_compare.bash
Executable file
46
scripts/run_docker_compare.bash
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
: ${SHELL:="NO"} # or YES to launch a shell instead of running the test
|
||||
: ${TRACE:="NO"} # or YES to send traces to jaeger
|
||||
: ${BACKTRACE:="NO"} # or YES to print a rust backtrace when panicking
|
||||
|
||||
cd "$DIR/../"
|
||||
REALPATH=$(command -v uu-realpath || command -v realpath)
|
||||
MAKE=$(command -v gmake || command -v make)
|
||||
|
||||
function main {
|
||||
build_container
|
||||
launch_container
|
||||
}
|
||||
|
||||
function build_container {
|
||||
$MAKE -C "$DIR/../docker/organic_test"
|
||||
}
|
||||
|
||||
function launch_container {
|
||||
local additional_flags=()
|
||||
local additional_args=()
|
||||
|
||||
if [ "$SHELL" != "YES" ]; then
|
||||
additional_args+=(cargo run)
|
||||
else
|
||||
additional_flags+=(-t)
|
||||
fi
|
||||
|
||||
if [ "$TRACE" = "YES" ]; then
|
||||
# We use the host network so it can talk to jaeger hosted at 127.0.0.1
|
||||
additional_flags+=(--network=host --env RUST_LOG=debug)
|
||||
fi
|
||||
|
||||
if [ "$BACKTRACE" = "YES" ]; then
|
||||
additional_flags+=(--env RUST_BACKTRACE=full)
|
||||
fi
|
||||
|
||||
docker run "${additional_flags[@]}" --init --rm -i -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source organic-test "${additional_args[@]}"
|
||||
}
|
||||
|
||||
main "${@}"
|
||||
57
scripts/run_docker_integration_test.bash
Executable file
57
scripts/run_docker_integration_test.bash
Executable file
@@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
cd "$DIR/../"
|
||||
REALPATH=$(command -v uu-realpath || command -v realpath)
|
||||
MAKE=$(command -v gmake || command -v make)
|
||||
|
||||
function main {
|
||||
local test_names=$(get_test_names "${@}")
|
||||
build_container
|
||||
|
||||
local test
|
||||
while read test; do
|
||||
launch_container "$test"
|
||||
done<<<"$test_names"
|
||||
}
|
||||
|
||||
function build_container {
|
||||
$MAKE -C "$DIR/../docker/organic_test"
|
||||
}
|
||||
|
||||
function get_test_names {
|
||||
local test_file
|
||||
local samples_dir=$($REALPATH "$DIR/../org_mode_samples")
|
||||
for test_file in "$@"
|
||||
do
|
||||
if [ -e "$test_file" ]; then
|
||||
local test_file_full_path=$($REALPATH "$test_file")
|
||||
local relative_to_samples=$($REALPATH --relative-to "$samples_dir" "$test_file_full_path")
|
||||
local without_extension="${relative_to_samples%.org}"
|
||||
echo "${without_extension/\//_}" | tr '[:upper:]' '[:lower:]'
|
||||
else
|
||||
echo "$test_file" | tr '[:upper:]' '[:lower:]'
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function launch_container {
|
||||
local test="$1"
|
||||
local additional_args=()
|
||||
|
||||
local init_script=$(cat <<EOF
|
||||
set -euo pipefail
|
||||
IFS=\$'\n\t'
|
||||
|
||||
cargo test --no-fail-fast --lib --test test_loader "$test" -- --show-output
|
||||
EOF
|
||||
)
|
||||
|
||||
docker run --init --rm -v "$($REALPATH ./):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source organic-test sh -c "$init_script"
|
||||
}
|
||||
|
||||
|
||||
main "${@}"
|
||||
@@ -4,17 +4,27 @@ set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
cd "$DIR/../"
|
||||
REALPATH=$(command -v uu-realpath || command -v realpath)
|
||||
|
||||
samples_dir=$(readlink -f "$DIR/../org_mode_samples")
|
||||
function main {
|
||||
local test_names=$(get_test_names "${@}")
|
||||
|
||||
local test
|
||||
while read test; do
|
||||
cargo test --no-fail-fast --test test_loader "$test" -- --show-output
|
||||
done<<<"$test_names"
|
||||
}
|
||||
|
||||
function get_test_names {
|
||||
local test_file
|
||||
local samples_dir=$($REALPATH "$DIR/../org_mode_samples")
|
||||
for test_file in "$@"
|
||||
do
|
||||
if [ -e "$test_file" ]; then
|
||||
test_file_full_path=$(readlink -f "$test_file")
|
||||
relative_to_samples=$($REALPATH --relative-to "$samples_dir" "$test_file_full_path")
|
||||
without_extension="${relative_to_samples%.org}"
|
||||
local test_file_full_path=$($REALPATH "$test_file")
|
||||
local relative_to_samples=$($REALPATH --relative-to "$samples_dir" "$test_file_full_path")
|
||||
local without_extension="${relative_to_samples%.org}"
|
||||
echo "${without_extension/\//_}" | tr '[:upper:]' '[:lower:]'
|
||||
else
|
||||
echo "$test_file" | tr '[:upper:]' '[:lower:]'
|
||||
@@ -22,6 +32,4 @@ function get_test_names {
|
||||
done
|
||||
}
|
||||
|
||||
get_test_names "$@" | while read test; do
|
||||
(cd "$DIR/../" && cargo test --no-fail-fast --test test_loader "$test" -- --show-output)
|
||||
done
|
||||
main "${@}"
|
||||
|
||||
@@ -1,22 +1,25 @@
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
pub fn emacs_parse_org_document<'a, C>(file_path: C) -> Result<String, Box<dyn std::error::Error>>
|
||||
pub fn emacs_parse_org_document<C>(file_contents: C) -> Result<String, Box<dyn std::error::Error>>
|
||||
where
|
||||
C: AsRef<Path>,
|
||||
C: AsRef<str>,
|
||||
{
|
||||
let elisp_script = r#"(progn
|
||||
let escaped_file_contents = escape_elisp_string(file_contents);
|
||||
let elisp_script = format!(
|
||||
r#"(progn
|
||||
(erase-buffer)
|
||||
(insert "{escaped_file_contents}")
|
||||
(org-mode)
|
||||
(message "%s" (pp-to-string (org-element-parse-buffer)))
|
||||
)"#;
|
||||
)"#,
|
||||
escaped_file_contents = escaped_file_contents
|
||||
);
|
||||
let mut cmd = Command::new("emacs");
|
||||
let proc = cmd
|
||||
.arg("-q")
|
||||
.arg("--no-site-file")
|
||||
.arg("--no-splash")
|
||||
.arg("--batch")
|
||||
.arg("--insert")
|
||||
.arg(file_path.as_ref().as_os_str())
|
||||
.arg("--eval")
|
||||
.arg(elisp_script);
|
||||
let out = proc.output()?;
|
||||
@@ -24,3 +27,25 @@ where
|
||||
let org_sexp = out.stderr;
|
||||
Ok(String::from_utf8(org_sexp)?)
|
||||
}
|
||||
|
||||
fn escape_elisp_string<C>(file_contents: C) -> String
|
||||
where
|
||||
C: AsRef<str>,
|
||||
{
|
||||
let source = file_contents.as_ref();
|
||||
let source_len = source.len();
|
||||
// We allocate a string 10% larger than the source to account for escape characters. Without this, we would have more allocations during processing.
|
||||
let mut output = String::with_capacity(source_len + (source_len / 10));
|
||||
for c in source.chars() {
|
||||
match c {
|
||||
'"' | '\\' => {
|
||||
output.push('\\');
|
||||
output.push(c);
|
||||
}
|
||||
_ => {
|
||||
output.push(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
@@ -48,6 +48,21 @@ pub fn assert_bounds<'s, S: Source<'s>>(
|
||||
.nth(1)
|
||||
.ok_or("Should have an attributes child.")?;
|
||||
let attributes_map = attributes_child.as_map()?;
|
||||
let standard_properties = attributes_map.get(":standard-properties");
|
||||
let (begin, end) = if standard_properties.is_some() {
|
||||
let std_props = standard_properties
|
||||
.expect("if statement proves its Some")
|
||||
.as_vector()?;
|
||||
let begin = std_props
|
||||
.get(0)
|
||||
.ok_or("Missing first element in standard properties")?
|
||||
.as_atom()?;
|
||||
let end = std_props
|
||||
.get(1)
|
||||
.ok_or("Missing first element in standard properties")?
|
||||
.as_atom()?;
|
||||
(begin, end)
|
||||
} else {
|
||||
let begin = attributes_map
|
||||
.get(":begin")
|
||||
.ok_or("Missing :begin attribute.")?
|
||||
@@ -56,6 +71,8 @@ pub fn assert_bounds<'s, S: Source<'s>>(
|
||||
.get(":end")
|
||||
.ok_or("Missing :end attribute.")?
|
||||
.as_atom()?;
|
||||
(begin, end)
|
||||
};
|
||||
let (rust_begin, rust_end) = get_offsets(source, rust);
|
||||
if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end {
|
||||
Err(format!("Rust bounds ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?;
|
||||
|
||||
@@ -15,7 +15,8 @@ pub fn init_telemetry() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// TODO: I think the endpoint can be controlled by the OTEL_EXPORTER_OTLP_TRACES_ENDPOINT env variable instead of hard-coded into this code base. Regardless, I am the only developer right now so I am not too concerned.
|
||||
let exporter = opentelemetry_otlp::new_exporter()
|
||||
.tonic()
|
||||
.with_endpoint("http://localhost:4317/v1/traces");
|
||||
// Using "localhost" is broken inside the docker container when tracing
|
||||
.with_endpoint("http://127.0.0.1:4317/v1/traces");
|
||||
|
||||
let tracer = opentelemetry_otlp::new_pipeline()
|
||||
.tracing()
|
||||
|
||||
55
src/main.rs
55
src/main.rs
@@ -1,5 +1,6 @@
|
||||
#![feature(round_char_boundary)]
|
||||
use std::path::Path;
|
||||
#[cfg(feature = "compare")]
|
||||
use std::io::Read;
|
||||
|
||||
#[cfg(feature = "compare")]
|
||||
use ::organic::parser::document;
|
||||
@@ -10,8 +11,11 @@ use organic::emacs_parse_org_document;
|
||||
#[cfg(feature = "compare")]
|
||||
use organic::parser::sexp::sexp_with_padding;
|
||||
|
||||
#[cfg(feature = "tracing")]
|
||||
use crate::init_tracing::init_telemetry;
|
||||
#[cfg(feature = "tracing")]
|
||||
use crate::init_tracing::shutdown_telemetry;
|
||||
#[cfg(feature = "tracing")]
|
||||
mod init_tracing;
|
||||
|
||||
#[cfg(not(feature = "tracing"))]
|
||||
@@ -22,40 +26,47 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
#[cfg(feature = "tracing")]
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let rt = tokio::runtime::Runtime::new()?;
|
||||
let result = rt.block_on(async { main_body() });
|
||||
let result = rt.block_on(async {
|
||||
init_telemetry()?;
|
||||
let main_body_result = main_body();
|
||||
shutdown_telemetry()?;
|
||||
main_body_result
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn main_body() -> Result<(), Box<dyn std::error::Error>> {
|
||||
init_telemetry()?;
|
||||
run_compare(
|
||||
std::env::args()
|
||||
.nth(1)
|
||||
.expect("Pass a single file into this script."),
|
||||
)?;
|
||||
shutdown_telemetry()?;
|
||||
Ok(())
|
||||
#[cfg(not(feature = "compare"))]
|
||||
let org_contents = "";
|
||||
#[cfg(feature = "compare")]
|
||||
let org_contents = read_stdin_to_string()?;
|
||||
run_compare(org_contents)
|
||||
}
|
||||
|
||||
#[cfg(feature = "compare")]
|
||||
fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let org_contents = std::fs::read_to_string(todo_org_path.as_ref()).expect("Read org file.");
|
||||
let (remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure");
|
||||
let org_sexp =
|
||||
emacs_parse_org_document(todo_org_path.as_ref()).expect("Use emacs to parse org file.");
|
||||
fn read_stdin_to_string() -> Result<String, Box<dyn std::error::Error>> {
|
||||
let mut stdin_contents = String::new();
|
||||
std::io::stdin()
|
||||
.lock()
|
||||
.read_to_string(&mut stdin_contents)?;
|
||||
Ok(stdin_contents)
|
||||
}
|
||||
|
||||
#[cfg(feature = "compare")]
|
||||
fn run_compare<P: AsRef<str>>(org_contents: P) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let (remaining, rust_parsed) = document(org_contents.as_ref()).expect("Org Parse failure");
|
||||
let org_sexp = emacs_parse_org_document(org_contents.as_ref())?;
|
||||
let (_remaining, parsed_sexp) =
|
||||
sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure");
|
||||
|
||||
println!("{}\n\n\n", org_contents.as_str());
|
||||
println!("{}\n\n\n", org_contents.as_ref());
|
||||
println!("{}", org_sexp);
|
||||
println!("{:#?}", rust_parsed);
|
||||
|
||||
// We do the diffing after printing out both parsed forms in case the diffing panics
|
||||
let diff_result =
|
||||
compare_document(&parsed_sexp, &rust_parsed).expect("Compare parsed documents.");
|
||||
diff_result
|
||||
.print()
|
||||
.expect("Print document parse tree diff.");
|
||||
let diff_result = compare_document(&parsed_sexp, &rust_parsed)?;
|
||||
diff_result.print()?;
|
||||
|
||||
if diff_result.is_bad() {
|
||||
Err("Diff results do not match.")?;
|
||||
@@ -68,7 +79,7 @@ fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::erro
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "compare"))]
|
||||
fn run_compare<P: AsRef<Path>>(_todo_org_path: P) -> Result<(), Box<dyn std::error::Error>> {
|
||||
fn run_compare<P: AsRef<str>>(_org_contents: P) -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("This program was built with compare disabled. Doing nothing.");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -160,7 +160,7 @@ fn zeroth_section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
|
||||
opt(parser_with_context!(comment)(
|
||||
&without_consuming_whitespace_context,
|
||||
)),
|
||||
parser_with_context!(property_drawer)(&without_consuming_whitespace_context),
|
||||
parser_with_context!(property_drawer)(context),
|
||||
many0(blank_line),
|
||||
)))(input)?;
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::anychar;
|
||||
use nom::combinator::opt;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::recognize;
|
||||
use nom::combinator::verify;
|
||||
use nom::multi::many1;
|
||||
@@ -11,6 +9,9 @@ use nom::sequence::tuple;
|
||||
|
||||
use super::Context;
|
||||
use crate::error::Res;
|
||||
use crate::parser::exiting::ExitClass;
|
||||
use crate::parser::parser_context::ContextElement;
|
||||
use crate::parser::parser_context::ExitMatcherNode;
|
||||
use crate::parser::parser_with_context::parser_with_context;
|
||||
use crate::parser::util::exit_matcher_parser;
|
||||
use crate::parser::util::get_consumed;
|
||||
@@ -23,8 +24,15 @@ pub fn export_snippet<'r, 's>(
|
||||
) -> Res<&'s str, ExportSnippet<'s>> {
|
||||
let (remaining, _) = tag("@@")(input)?;
|
||||
let (remaining, backend_name) = backend(context, remaining)?;
|
||||
let (remaining, backend_contents) =
|
||||
opt(tuple((tag(":"), parser_with_context!(contents)(context))))(remaining)?;
|
||||
let parser_context =
|
||||
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
class: ExitClass::Beta,
|
||||
exit_matcher: &export_snippet_end,
|
||||
}));
|
||||
let (remaining, backend_contents) = opt(tuple((
|
||||
tag(":"),
|
||||
parser_with_context!(contents)(&parser_context),
|
||||
)))(remaining)?;
|
||||
let (remaining, _) = tag("@@")(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((
|
||||
@@ -48,14 +56,13 @@ fn backend<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn contents<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let (remaining, source) = recognize(verify(
|
||||
many_till(
|
||||
anychar,
|
||||
peek(alt((
|
||||
parser_with_context!(exit_matcher_parser)(context),
|
||||
tag("@@"),
|
||||
))),
|
||||
),
|
||||
many_till(anychar, parser_with_context!(exit_matcher_parser)(context)),
|
||||
|(children, _exit_contents)| !children.is_empty(),
|
||||
))(input)?;
|
||||
Ok((remaining, source))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn export_snippet_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
tag("@@")(input)
|
||||
}
|
||||
|
||||
@@ -200,7 +200,7 @@ pub fn src_block<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st
|
||||
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
|
||||
.with_additional_node(ContextElement::Context("lesser block"))
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
class: ExitClass::Beta,
|
||||
class: ExitClass::Alpha,
|
||||
exit_matcher: &lesser_block_end_specialized,
|
||||
}));
|
||||
let parameters = match parameters {
|
||||
@@ -238,16 +238,25 @@ fn lesser_block_end(
|
||||
) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> {
|
||||
let current_name_lower = current_name.to_lowercase();
|
||||
move |context: Context, input: &str| {
|
||||
_lesser_block_end(context, input, current_name_lower.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn _lesser_block_end<'r, 's, 'x>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
current_name_lower: &'x str,
|
||||
) -> Res<&'s str, &'s str> {
|
||||
start_of_line(context, input)?;
|
||||
let (remaining, _leading_whitespace) = space0(input)?;
|
||||
let (remaining, (_begin, _name, _ws)) = tuple((
|
||||
tag_no_case("#+end_"),
|
||||
tag_no_case(current_name_lower.as_str()),
|
||||
tag_no_case(current_name_lower),
|
||||
alt((eof, line_ending)),
|
||||
))(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((remaining, source))
|
||||
}
|
||||
}
|
||||
|
||||
fn lesser_block_begin(
|
||||
@@ -255,6 +264,16 @@ fn lesser_block_begin(
|
||||
) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> {
|
||||
let current_name_lower = current_name.to_lowercase();
|
||||
move |context: Context, input: &str| {
|
||||
_lesser_block_begin(context, input, current_name_lower.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn _lesser_block_begin<'r, 's, 'x>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
current_name_lower: &'x str,
|
||||
) -> Res<&'s str, &'s str> {
|
||||
start_of_line(context, input)?;
|
||||
let (remaining, _leading_whitespace) = space0(input)?;
|
||||
let (remaining, (_begin, name)) = tuple((
|
||||
@@ -264,5 +283,4 @@ fn lesser_block_begin(
|
||||
}),
|
||||
))(remaining)?;
|
||||
Ok((remaining, name))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,16 +6,12 @@ use nom::character::complete::one_of;
|
||||
use nom::character::complete::space0;
|
||||
use nom::character::complete::space1;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::opt;
|
||||
use nom::combinator::recognize;
|
||||
use nom::combinator::verify;
|
||||
use nom::multi::many1;
|
||||
use nom::multi::many_till;
|
||||
use nom::sequence::preceded;
|
||||
use nom::sequence::terminated;
|
||||
use nom::sequence::tuple;
|
||||
#[cfg(feature = "tracing")]
|
||||
use tracing::span;
|
||||
|
||||
use super::greater_element::PlainList;
|
||||
use super::greater_element::PlainListItem;
|
||||
@@ -32,6 +28,7 @@ use crate::parser::parser_context::ExitMatcherNode;
|
||||
use crate::parser::util::blank_line;
|
||||
use crate::parser::util::exit_matcher_parser;
|
||||
use crate::parser::util::get_consumed;
|
||||
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
|
||||
use crate::parser::util::start_of_line;
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@@ -42,107 +39,64 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
|
||||
class: ExitClass::Beta,
|
||||
exit_matcher: &plain_list_end,
|
||||
}));
|
||||
let without_consume_context =
|
||||
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
|
||||
let with_consume_context =
|
||||
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
|
||||
let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context);
|
||||
let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context);
|
||||
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
|
||||
// children stores tuple of (input string, parsed object) so we can re-parse the final item
|
||||
let mut children = Vec::new();
|
||||
let mut first_item_indentation: Option<usize> = None;
|
||||
let mut remaining = input;
|
||||
|
||||
// The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
|
||||
//
|
||||
// 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace.
|
||||
// 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace.
|
||||
// 3. Re-parse the final item with consume trailing whitespace disabled.
|
||||
//
|
||||
// While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
|
||||
|
||||
loop {
|
||||
/*
|
||||
Trailing whitespace belongs to the plain list, not the plain list item
|
||||
|
||||
Possible outcomes:
|
||||
Don't consume, yes exit matcher
|
||||
Don't consume, no additional item
|
||||
Consume, additional item
|
||||
*/
|
||||
{
|
||||
// Don't consume, yes exit matcher
|
||||
#[cfg(feature = "tracing")]
|
||||
let span = span!(tracing::Level::DEBUG, "first");
|
||||
#[cfg(feature = "tracing")]
|
||||
let _enter = span.enter();
|
||||
|
||||
let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining);
|
||||
match last_item_then_exit {
|
||||
Ok((remain, (item, _exit)))
|
||||
if item.indentation
|
||||
== *first_item_indentation.get_or_insert(item.indentation) =>
|
||||
{
|
||||
remaining = remain;
|
||||
children.push(item);
|
||||
break;
|
||||
}
|
||||
Ok(_) | Err(_) => {}
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
// Consume, additional item
|
||||
#[cfg(feature = "tracing")]
|
||||
let span = span!(tracing::Level::DEBUG, "second");
|
||||
#[cfg(feature = "tracing")]
|
||||
let _enter = span.enter();
|
||||
|
||||
let not_last_item =
|
||||
tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining);
|
||||
match not_last_item {
|
||||
Ok((remain, (item, future_item)))
|
||||
if item.indentation
|
||||
== *first_item_indentation.get_or_insert(item.indentation)
|
||||
&& future_item.indentation
|
||||
== *first_item_indentation.get_or_insert(item.indentation) =>
|
||||
{
|
||||
remaining = remain;
|
||||
children.push(item);
|
||||
continue;
|
||||
}
|
||||
Ok(_) | Err(_) => {}
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
// Don't consume, no additional item
|
||||
#[cfg(feature = "tracing")]
|
||||
let span = span!(tracing::Level::DEBUG, "third");
|
||||
#[cfg(feature = "tracing")]
|
||||
let _enter = span.enter();
|
||||
|
||||
let last_item_then_exit = without_consume_matcher(remaining);
|
||||
match last_item_then_exit {
|
||||
let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
|
||||
match list_item {
|
||||
Ok((remain, item))
|
||||
if item.indentation
|
||||
== *first_item_indentation.get_or_insert(item.indentation) =>
|
||||
if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
|
||||
{
|
||||
children.push((remaining, item));
|
||||
remaining = remain;
|
||||
children.push(item);
|
||||
break;
|
||||
}
|
||||
Ok(_) | Err(_) => {
|
||||
// TODO: Maybe this is reachable when there are no items at all.
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Should be unreachable.",
|
||||
))));
|
||||
// unreachable!();
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
|
||||
if maybe_exit.is_ok() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if children.is_empty() {
|
||||
let (final_child_start, _final_item_first_parse) = match children.pop() {
|
||||
Some(final_child) => final_child,
|
||||
None => {
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Plain lists require at least one element.",
|
||||
))));
|
||||
}
|
||||
};
|
||||
let final_item_context =
|
||||
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
|
||||
let (remaining, reparsed_final_item) =
|
||||
parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
|
||||
children.push((final_child_start, reparsed_final_item));
|
||||
|
||||
let (remaining, _trailing_ws) =
|
||||
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
|
||||
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((remaining, PlainList { source, children }))
|
||||
Ok((
|
||||
remaining,
|
||||
PlainList {
|
||||
source,
|
||||
children: children.into_iter().map(|(_start, item)| item).collect(),
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@@ -154,29 +108,12 @@ pub fn plain_list_item<'r, 's>(
|
||||
let (remaining, leading_whitespace) = space0(input)?;
|
||||
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
||||
let indent_level = leading_whitespace.len();
|
||||
let with_consume_context = context
|
||||
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
|
||||
.with_additional_node(ContextElement::ListItem(indent_level))
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
class: ExitClass::Beta,
|
||||
exit_matcher: &plain_list_item_end,
|
||||
}));
|
||||
let without_consume_context = context
|
||||
.with_additional_node(ContextElement::ListItem(indent_level))
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
class: ExitClass::Beta,
|
||||
exit_matcher: &plain_list_item_end,
|
||||
}));
|
||||
|
||||
let with_consume_matcher = parser_with_context!(element(true))(&with_consume_context);
|
||||
let without_consume_matcher = parser_with_context!(element(true))(&without_consume_context);
|
||||
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
|
||||
let (remaining, bull) =
|
||||
verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
|
||||
|
||||
let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
|
||||
match maybe_contentless_item {
|
||||
Ok((rem, _ws)) => {
|
||||
// TODO: do we need to consume if this isn't the last item?
|
||||
let source = get_consumed(input, rem);
|
||||
return Ok((
|
||||
rem,
|
||||
@@ -188,19 +125,29 @@ pub fn plain_list_item<'r, 's>(
|
||||
},
|
||||
));
|
||||
}
|
||||
Err(_) => {
|
||||
Err(_) => {}
|
||||
};
|
||||
|
||||
let (remaining, _ws) = space1(remaining)?;
|
||||
let (remaining, (mut contents, final_element)) = many_till(
|
||||
&with_consume_matcher,
|
||||
alt((
|
||||
terminated(&without_consume_matcher, exit_matcher),
|
||||
preceded(
|
||||
peek(tuple((&with_consume_matcher, exit_matcher))),
|
||||
&without_consume_matcher,
|
||||
let parser_context = context
|
||||
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
|
||||
.with_additional_node(ContextElement::ListItem(indent_level))
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
class: ExitClass::Beta,
|
||||
exit_matcher: &plain_list_item_end,
|
||||
}));
|
||||
|
||||
let (remaining, (children, _exit_contents)) = verify(
|
||||
many_till(
|
||||
parser_with_context!(element(true))(&parser_context),
|
||||
parser_with_context!(exit_matcher_parser)(&parser_context),
|
||||
),
|
||||
)),
|
||||
|(children, _exit_contents)| !children.is_empty(),
|
||||
)(remaining)?;
|
||||
contents.push(final_element);
|
||||
|
||||
let (remaining, _trailing_ws) =
|
||||
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
|
||||
|
||||
let source = get_consumed(input, remaining);
|
||||
return Ok((
|
||||
remaining,
|
||||
@@ -208,11 +155,9 @@ pub fn plain_list_item<'r, 's>(
|
||||
source,
|
||||
indentation: indent_level,
|
||||
bullet: bull,
|
||||
children: contents,
|
||||
children,
|
||||
},
|
||||
));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@@ -241,18 +186,11 @@ fn plain_list_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let current_item_indent_level: &usize =
|
||||
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Not inside a plain list item",
|
||||
))))?;
|
||||
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
|
||||
let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context);
|
||||
alt((
|
||||
recognize(verify(plain_list_item_matcher, |pli| {
|
||||
pli.indentation <= *current_item_indent_level
|
||||
})),
|
||||
recognize(line_indented_lte_matcher),
|
||||
))(input)
|
||||
start_of_line(context, input)?;
|
||||
recognize(tuple((
|
||||
opt(blank_line),
|
||||
parser_with_context!(line_indented_lte)(context),
|
||||
)))(input)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@@ -262,8 +200,6 @@ fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'
|
||||
"Not inside a plain list item",
|
||||
))))?;
|
||||
|
||||
start_of_line(context, input)?;
|
||||
|
||||
let matched = recognize(verify(
|
||||
tuple((space0::<&str, _>, non_whitespace_character)),
|
||||
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
||||
|
||||
@@ -23,6 +23,7 @@ pub enum Token<'s> {
|
||||
Atom(&'s str),
|
||||
List(Vec<Token<'s>>),
|
||||
TextWithProperties(TextWithProperties<'s>),
|
||||
Vector(Vec<Token<'s>>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -59,6 +60,10 @@ impl<'s> TextWithProperties<'s> {
|
||||
out.push('\\');
|
||||
ParseState::Normal
|
||||
}
|
||||
(ParseState::Escape, '"') => {
|
||||
out.push('"');
|
||||
ParseState::Normal
|
||||
}
|
||||
_ => todo!(),
|
||||
};
|
||||
}
|
||||
@@ -73,6 +78,13 @@ enum ParseState {
|
||||
}
|
||||
|
||||
impl<'s> Token<'s> {
|
||||
pub fn as_vector<'p>(&'p self) -> Result<&'p Vec<Token<'s>>, Box<dyn std::error::Error>> {
|
||||
Ok(match self {
|
||||
Token::Vector(children) => Ok(children),
|
||||
_ => Err(format!("wrong token type {:?}", self)),
|
||||
}?)
|
||||
}
|
||||
|
||||
pub fn as_list<'p>(&'p self) -> Result<&'p Vec<Token<'s>>, Box<dyn std::error::Error>> {
|
||||
Ok(match self {
|
||||
Token::List(children) => Ok(children),
|
||||
@@ -136,7 +148,7 @@ pub fn sexp<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn token<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
alt((list, atom))(input)
|
||||
alt((list, vector, atom))(input)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@@ -151,16 +163,33 @@ fn list<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
Ok((remaining, Token::List(children)))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn vector<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
let (remaining, _) = tag("[")(input)?;
|
||||
let (remaining, children) = delimited(
|
||||
multispace0,
|
||||
separated_list1(multispace1, token),
|
||||
multispace0,
|
||||
)(remaining)?;
|
||||
let (remaining, _) = tag("]")(remaining)?;
|
||||
Ok((remaining, Token::Vector(children)))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
not(peek(tag(")")))(input)?;
|
||||
alt((text_with_properties, quoted_atom, unquoted_atom))(input)
|
||||
not(peek(one_of(")]")))(input)?;
|
||||
alt((
|
||||
text_with_properties,
|
||||
hash_notation,
|
||||
quoted_atom,
|
||||
unquoted_atom,
|
||||
))(input)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
let (remaining, body) = take_till1(|c| match c {
|
||||
' ' | '\t' | '\r' | '\n' | ')' => true,
|
||||
' ' | '\t' | '\r' | '\n' | ')' | ']' => true,
|
||||
_ => false,
|
||||
})(input)?;
|
||||
Ok((remaining, Token::Atom(body)))
|
||||
@@ -182,6 +211,18 @@ fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
Ok((remaining, Token::Atom(source)))
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn hash_notation<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
let (remaining, _) = tag("#<")(input)?;
|
||||
let (remaining, _body) = take_till1(|c| match c {
|
||||
'>' => true,
|
||||
_ => false,
|
||||
})(remaining)?;
|
||||
let (remaining, _) = tag(">")(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((remaining, Token::Atom(source)))
|
||||
}
|
||||
|
||||
fn text_with_properties<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
let (remaining, _) = tag("#(")(input)?;
|
||||
let (remaining, (text, props)) = delimited(
|
||||
@@ -237,6 +278,7 @@ mod tests {
|
||||
Token::Atom(_) => false,
|
||||
Token::List(_) => true,
|
||||
Token::TextWithProperties(_) => false,
|
||||
Token::Vector(_) => false,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -249,6 +291,7 @@ mod tests {
|
||||
Token::Atom(_) => false,
|
||||
Token::List(_) => true,
|
||||
Token::TextWithProperties(_) => false,
|
||||
Token::Vector(_) => false,
|
||||
});
|
||||
let children = match parsed {
|
||||
Token::List(children) => children,
|
||||
@@ -308,6 +351,7 @@ mod tests {
|
||||
Token::Atom(_) => false,
|
||||
Token::List(_) => true,
|
||||
Token::TextWithProperties(_) => false,
|
||||
Token::Vector(_) => false,
|
||||
});
|
||||
let children = match parsed {
|
||||
Token::List(children) => children,
|
||||
|
||||
@@ -3,7 +3,7 @@ fn {name}() {{
|
||||
let todo_org_path = "{path}";
|
||||
let org_contents = std::fs::read_to_string(todo_org_path).expect("Read org file.");
|
||||
println!("{{}}", org_contents);
|
||||
let org_sexp = emacs_parse_org_document(todo_org_path).expect("Use emacs to parse org file.");
|
||||
let org_sexp = emacs_parse_org_document(org_contents.as_str()).expect("Use emacs to parse org file.");
|
||||
println!("{{}}", org_sexp);
|
||||
let (_remaining, parsed_sexp) = sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure");
|
||||
let (remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure");
|
||||
|
||||
Reference in New Issue
Block a user