diff --git a/Makefile b/Makefile index 537f634..763a441 100644 --- a/Makefile +++ b/Makefile @@ -33,6 +33,10 @@ release: clean: > cargo clean +.PHONY: format +format: +> $(MAKE) -C docker/cargo_fmt run + .PHONY: test test: > cargo test --no-default-features --features compare --no-fail-fast --lib --test test_loader -- --test-threads $(TESTJOBS) diff --git a/build.rs b/build.rs index 31cfb72..6fbdbe8 100644 --- a/build.rs +++ b/build.rs @@ -16,6 +16,9 @@ fn main() { let destination = Path::new(&out_dir).join("tests.rs"); let mut test_file = File::create(&destination).unwrap(); + // Re-generate the tests if any org-mode files change + println!("cargo:rerun-if-changed=org_mode_samples"); + write_header(&mut test_file); let test_files = WalkDir::new("org_mode_samples") diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index f0c31d7..86f3a40 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -1,5 +1,5 @@ FROM alpine:3.17 AS build -RUN apk add --no-cache build-base musl-dev git autoconf make texinfo gnutls-dev ncurses-dev gawk +RUN apk add --no-cache build-base musl-dev git autoconf make texinfo gnutls-dev ncurses-dev gawk libgccjit-dev FROM build AS build-emacs @@ -8,13 +8,13 @@ RUN git clone --depth 1 --branch $EMACS_VERSION https://git.savannah.gnu.org/git WORKDIR /root/emacs RUN mkdir /root/dist RUN ./autogen.sh -RUN ./configure --prefix /usr --without-x --without-sound +RUN ./configure --prefix /usr --without-x --without-sound --with-native-compilation=aot RUN make RUN make DESTDIR="/root/dist" install FROM build AS build-org-mode -ARG ORG_VERSION=163bafb43dcc2bc94a2c7ccaa77d3d1dd488f1af +ARG ORG_VERSION=c703541ffcc14965e3567f928de1683a1c1e33f6 COPY --from=build-emacs /root/dist/ / RUN mkdir /root/dist # Savannah does not allow fetching specific revisions, so we're going to have to put unnecessary load on their server by cloning main and then checking out the revision we want. @@ -27,7 +27,7 @@ RUN make DESTDIR="/root/dist" install FROM rustlang/rust:nightly-alpine3.17 AS tester ENV LANG=en_US.UTF-8 -RUN apk add --no-cache musl-dev ncurses gnutls +RUN apk add --no-cache musl-dev ncurses gnutls libgccjit RUN cargo install --locked --no-default-features --features ci-autoclean cargo-cache COPY --from=build-emacs /root/dist/ / COPY --from=build-org-mode /root/dist/ / @@ -88,14 +88,20 @@ ARG DOOMEMACS_PATH=/foreign_documents/doomemacs ARG DOOMEMACS_REPO=https://github.com/doomemacs/doomemacs.git RUN mkdir -p $DOOMEMACS_PATH && git -C $DOOMEMACS_PATH init --initial-branch=main && git -C $DOOMEMACS_PATH remote add origin $DOOMEMACS_REPO && git -C $DOOMEMACS_PATH fetch origin $DOOMEMACS_VERSION && git -C $DOOMEMACS_PATH checkout FETCH_HEAD +ARG WORG_VERSION=0c8d5679b536af450b61812246a3e02b8103f4b8 +ARG WORG_PATH=/foreign_documents/worg +ARG WORG_REPO=https://git.sr.ht/~bzg/worg +RUN mkdir -p $WORG_PATH && git -C $WORG_PATH init --initial-branch=main && git -C $WORG_PATH remote add origin $WORG_REPO && git -C $WORG_PATH fetch origin $WORG_VERSION && git -C $WORG_PATH checkout FETCH_HEAD + FROM tester as foreign-document-test RUN apk add --no-cache bash coreutils RUN mkdir /foreign_documents -COPY --from=build-org-mode /root/org-mode /foreign_documents/org-mode -COPY --from=build-emacs /root/emacs /foreign_documents/emacs COPY --from=foreign-document-gather /foreign_documents/howardabrams /foreign_documents/howardabrams COPY --from=foreign-document-gather /foreign_documents/doomemacs /foreign_documents/doomemacs +COPY --from=foreign-document-gather /foreign_documents/worg /foreign_documents/worg +COPY --from=build-org-mode /root/org-mode /foreign_documents/org-mode +COPY --from=build-emacs /root/emacs /foreign_documents/emacs COPY foreign_document_test_entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/organic_test/foreign_document_test_entrypoint.sh b/docker/organic_test/foreign_document_test_entrypoint.sh index 6dbea8c..3a51ce9 100644 --- a/docker/organic_test/foreign_document_test_entrypoint.sh +++ b/docker/organic_test/foreign_document_test_entrypoint.sh @@ -32,6 +32,8 @@ function main { if [ "$?" -ne 0 ]; then all_status=1; fi (run_compare_function "emacs" compare_all_org_document "/foreign_documents/emacs") if [ "$?" -ne 0 ]; then all_status=1; fi + (run_compare_function "worg" compare_all_org_document "/foreign_documents/worg") + if [ "$?" -ne 0 ]; then all_status=1; fi (run_compare_function "howard_abrams" compare_howard_abrams) if [ "$?" -ne 0 ]; then all_status=1; fi (run_compare_function "doomemacs" compare_all_org_document "/foreign_documents/doomemacs") @@ -39,9 +41,9 @@ function main { set -e if [ "$all_status" -ne 0 ]; then - echo "$(red_text "Some tests failed.")" + red_text "Some tests failed." else - echo "$(green_text "All tests passed.")" + green_text "All tests passed." fi return "$all_status" } @@ -62,8 +64,9 @@ function indent { local depth="$1" local scaled_depth=$((depth * 2)) shift 1 - local prefix=$(printf -- "%${scaled_depth}s") - while read l; do + local prefix + prefix=$(printf -- "%${scaled_depth}s") + while read -r l; do (IFS=' '; printf -- '%s%s\n' "$prefix" "$l") done } @@ -91,12 +94,13 @@ function compare_all_org_document { local target_document local all_status=0 while read target_document; do - local relative_path=$($REALPATH --relative-to "$root_dir" "$target_document") + local relative_path + relative_path=$($REALPATH --relative-to "$root_dir" "$target_document") set +e (run_compare "$relative_path" "$target_document") if [ "$?" -ne 0 ]; then all_status=1; fi set -e - done<<<$(find "$root_dir" -type f -iname '*.org') + done<<<"$(find "$root_dir" -type f -iname '*.org' | sort)" return "$all_status" } diff --git a/notes/optimization_ideas.org b/notes/optimization_ideas.org index baefe61..36b0adf 100644 --- a/notes/optimization_ideas.org +++ b/notes/optimization_ideas.org @@ -25,3 +25,4 @@ This could significantly reduce our calls to exit matchers. I think targets would break this. The exit matchers are already implicitly building this behavior since they should all exit very early when the starting character is wrong. Putting this logic in a centralized place, far away from where those characters are actually going to be used, is unfortunate for readability. +** Use exit matcher to cut off trailing whitespace instead of re-matching in plain lists. diff --git a/org_mode_samples/greater_element/dynamic_block/no_closing_colon.org b/org_mode_samples/greater_element/dynamic_block/no_closing_colon.org new file mode 100644 index 0000000..c6073d9 --- /dev/null +++ b/org_mode_samples/greater_element/dynamic_block/no_closing_colon.org @@ -0,0 +1,3 @@ +#+BEGIN: timestamp :format "%Y-%m-%d %H:%M" + +#+END diff --git a/org_mode_samples/greater_element/greater_block/quote_block_with_leading_blank_line.org b/org_mode_samples/greater_element/greater_block/quote_block_with_leading_blank_line.org new file mode 100644 index 0000000..7b4e687 --- /dev/null +++ b/org_mode_samples/greater_element/greater_block/quote_block_with_leading_blank_line.org @@ -0,0 +1,5 @@ +#+begin_quote + +foo + +#+end_quote diff --git a/org_mode_samples/greater_element/plain_list/alphabetical_bullets.org b/org_mode_samples/greater_element/plain_list/alphabetical_bullets.org new file mode 100644 index 0000000..f91ae6a --- /dev/null +++ b/org_mode_samples/greater_element/plain_list/alphabetical_bullets.org @@ -0,0 +1,3 @@ +# These are only allowed by configuring org-list-allow-alphabetical which the automated tests are not currently set up to do, so this will parse as a paragraph: +a. foo +b. bar diff --git a/org_mode_samples/greater_element/plain_list/description_list_empty_value.org b/org_mode_samples/greater_element/plain_list/description_list_empty_value.org new file mode 100644 index 0000000..123ba10 --- /dev/null +++ b/org_mode_samples/greater_element/plain_list/description_list_empty_value.org @@ -0,0 +1,6 @@ +- foo :: + +- bar :: + + +baz diff --git a/org_mode_samples/greater_element/plain_list/mixed_types.org b/org_mode_samples/greater_element/plain_list/mixed_types.org new file mode 100644 index 0000000..5143152 --- /dev/null +++ b/org_mode_samples/greater_element/plain_list/mixed_types.org @@ -0,0 +1,3 @@ +1. foo +- bar +- lorem :: ipsum diff --git a/org_mode_samples/greater_element/plain_list/ordered_list_with_fake_tag.org b/org_mode_samples/greater_element/plain_list/ordered_list_with_fake_tag.org new file mode 100644 index 0000000..0003513 --- /dev/null +++ b/org_mode_samples/greater_element/plain_list/ordered_list_with_fake_tag.org @@ -0,0 +1,2 @@ +# Since this is an ordered list, the text before the " :: " is NOT parsed as a tag. +1. foo :: bar diff --git a/org_mode_samples/greater_element/table/align_unaligned_table.org b/org_mode_samples/greater_element/table/align_unaligned_table.org new file mode 100644 index 0000000..11a9844 --- /dev/null +++ b/org_mode_samples/greater_element/table/align_unaligned_table.org @@ -0,0 +1,6 @@ +# The STARTUP directive here instructs org-mode to align tables which emacs normally does when opening the file. Since Organic is solely a parser, we have no business editing the org-mode document so Organic does not handle aligning tables, so in order for this test to pass, we have to avoid that behavior in Emacs. +#+STARTUP: align + +|foo|bar| +|- +|lorem|ipsum| diff --git a/org_mode_samples/greater_element/table/empty_formula.org b/org_mode_samples/greater_element/table/empty_formula.org new file mode 100644 index 0000000..6653d05 --- /dev/null +++ b/org_mode_samples/greater_element/table/empty_formula.org @@ -0,0 +1,4 @@ +| Name | Value | +|------+-------| +| foo | bar | +#+tblfm: diff --git a/org_mode_samples/lesser_element/fixed_width_area/tab_instead_of_space.org b/org_mode_samples/lesser_element/fixed_width_area/tab_instead_of_space.org new file mode 100644 index 0000000..eac5785 --- /dev/null +++ b/org_mode_samples/lesser_element/fixed_width_area/tab_instead_of_space.org @@ -0,0 +1,2 @@ +# Fixed width areas must begin with colon followed by a space, not a tab, so this is not a fixed width area. +: foo diff --git a/org_mode_samples/object/plain_link/with_parenthesis.org b/org_mode_samples/object/plain_link/with_parenthesis.org new file mode 100644 index 0000000..e141c66 --- /dev/null +++ b/org_mode_samples/object/plain_link/with_parenthesis.org @@ -0,0 +1,11 @@ +# Should be a link: +https://en.wikipedia.org/wiki/Shebang_(Unix) + +# No closing parenthesis, so link ends at underscore. +https://en.wikipedia.org/wiki/Shebang_(Unix + +# Parenthesis only allowed to depth of 2 so link ends at underscore. +https://en.wikipedia.org/wiki/Shebang_(((Unix))) + +# Even though they eventually become balanced, we hit negative parenthesis depth so link ends at ) +https://en.wikipedia.org/wiki/Shebang)Unix( diff --git a/org_mode_samples/object/radio_link/different_case.org b/org_mode_samples/object/radio_link/different_case.org new file mode 100644 index 0000000..dddc9bf --- /dev/null +++ b/org_mode_samples/object/radio_link/different_case.org @@ -0,0 +1,3 @@ +<<>> + +foo bar baz diff --git a/org_mode_samples/object/radio_link/different_whitespace.org b/org_mode_samples/object/radio_link/different_whitespace.org new file mode 100644 index 0000000..19b678f --- /dev/null +++ b/org_mode_samples/object/radio_link/different_whitespace.org @@ -0,0 +1,6 @@ +<<>> + + +foo +bar +baz diff --git a/org_mode_samples/object/regular_link/elisp.org b/org_mode_samples/object/regular_link/elisp.org new file mode 100644 index 0000000..1b41cd1 --- /dev/null +++ b/org_mode_samples/object/regular_link/elisp.org @@ -0,0 +1 @@ +[[elisp:(local-set-key "\M-\x" 'foo-bar-baz)]] diff --git a/org_mode_samples/object/regular_link/with_parenthesis.org b/org_mode_samples/object/regular_link/with_parenthesis.org new file mode 100644 index 0000000..30b3efd --- /dev/null +++ b/org_mode_samples/object/regular_link/with_parenthesis.org @@ -0,0 +1 @@ +[[https://en.wikipedia.org/wiki/Shebang_(Unix)]] diff --git a/org_mode_samples/object/regular_link/wrapped_with_brackets.org b/org_mode_samples/object/regular_link/wrapped_with_brackets.org new file mode 100644 index 0000000..2b8ef08 --- /dev/null +++ b/org_mode_samples/object/regular_link/wrapped_with_brackets.org @@ -0,0 +1 @@ +[[[http://foo.bar/baz][lorem]]] diff --git a/org_mode_samples/object/subscript_and_superscript/options_require_braces.org b/org_mode_samples/object/subscript_and_superscript/options_require_braces.org new file mode 100644 index 0000000..22328e0 --- /dev/null +++ b/org_mode_samples/object/subscript_and_superscript/options_require_braces.org @@ -0,0 +1,7 @@ +# Even though *exporting* honors the setting to require braces for subscript/superscript, the official org-mode parser still parses subscripts and superscripts. + +#+OPTIONS: ^:{} +foo_this isn't a subscript when exported due to lack of braces (but its still a subscript during parsing) + + +bar_{this is a subscript} diff --git a/org_mode_samples/object/subscript_and_superscript/with_parenthesis.org b/org_mode_samples/object/subscript_and_superscript/with_parenthesis.org new file mode 100644 index 0000000..8551490 --- /dev/null +++ b/org_mode_samples/object/subscript_and_superscript/with_parenthesis.org @@ -0,0 +1,13 @@ +foo_(bar) + +foo_(b(ar) + +foo_(b{ar) + +foo_{b(ar} + +foo_(b(a)r) + +foo_b(a)r + +foo_(b+ar) diff --git a/org_mode_samples/object/text_markup/double_star.org b/org_mode_samples/object/text_markup/double_star.org new file mode 100644 index 0000000..c758a3d --- /dev/null +++ b/org_mode_samples/object/text_markup/double_star.org @@ -0,0 +1 @@ +foo ** bar ** baz diff --git a/org_mode_samples/object/text_markup/double_tilde.org b/org_mode_samples/object/text_markup/double_tilde.org new file mode 100644 index 0000000..1e9f1df --- /dev/null +++ b/org_mode_samples/object/text_markup/double_tilde.org @@ -0,0 +1 @@ +foo ~~ bar ~~ baz diff --git a/org_mode_samples/object/text_markup/target_substring.org b/org_mode_samples/object/text_markup/target_substring.org new file mode 100644 index 0000000..25d14d7 --- /dev/null +++ b/org_mode_samples/object/text_markup/target_substring.org @@ -0,0 +1,4 @@ +# Since "foos" has an extra "s", this does not match the target. +the foos bar + +The <<>> and stuff. diff --git a/org_mode_samples/sections_and_headings/empty_heading.org b/org_mode_samples/sections_and_headings/empty_heading.org new file mode 100644 index 0000000..741bea2 --- /dev/null +++ b/org_mode_samples/sections_and_headings/empty_heading.org @@ -0,0 +1,2 @@ +* DONE +* diff --git a/org_mode_samples/sections_and_headings/fast_access_todo_states.org b/org_mode_samples/sections_and_headings/fast_access_todo_states.org new file mode 100644 index 0000000..a197dcd --- /dev/null +++ b/org_mode_samples/sections_and_headings/fast_access_todo_states.org @@ -0,0 +1,6 @@ +#+TODO: TODO(t) INPROGRESS(i/!) | DONE(d!) CANCELED(c@/!) +# ! : Log changes leading to this state. +# @ : Log changes leading to this state and prompt for a comment to include. +# /! : Log changes leaving this state if and only if to a state that does not log. This can be combined with the above like WAIT(w!/!) or DELAYED(d@/!) +* INPROGRESS +- State "TODO" from "INPROGRESS" [2023-09-14 Thu 02:13] diff --git a/org_mode_samples/sections_and_headings/odd_level_depth.org b/org_mode_samples/sections_and_headings/odd_level_depth.org new file mode 100644 index 0000000..f92f920 --- /dev/null +++ b/org_mode_samples/sections_and_headings/odd_level_depth.org @@ -0,0 +1,8 @@ +#+STARTUP: odd +* Foo +***** Bar +* Baz +*** Lorem +* Ipsum +**** Dolar +***** Cat diff --git a/scripts/run_docker_compare.bash b/scripts/run_docker_compare.bash index 309cddf..38372b9 100755 --- a/scripts/run_docker_compare.bash +++ b/scripts/run_docker_compare.bash @@ -8,10 +8,26 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" : ${TRACE:="NO"} # or YES to send traces to jaeger : ${BACKTRACE:="NO"} # or YES to print a rust backtrace when panicking : ${NO_COLOR:=""} # Set to anything to disable color output +: ${PROFILE:="debug"} REALPATH=$(command -v uu-realpath || command -v realpath) MAKE=$(command -v gmake || command -v make) +############## Setup ######################### + +function die { + local status_code="$1" + shift + (>&2 echo "${@}") + exit "$status_code" +} + +function log { + (>&2 echo "${@}") +} + +############## Program ######################### + function main { build_container launch_container "${@}" @@ -23,7 +39,6 @@ function build_container { function launch_container { local additional_flags=() - local additional_args=() local features=(compare) if [ "$NO_COLOR" != "" ]; then @@ -37,11 +52,8 @@ function launch_container { fi if [ "$SHELL" != "YES" ]; then - local features_joined=$(IFS=","; echo "${features[*]}") - additional_args+=(cargo run --bin compare --no-default-features --features "$features_joined") additional_flags+=(--read-only) else - additional_args+=(/bin/sh) additional_flags+=(-t) fi @@ -49,16 +61,50 @@ function launch_container { additional_flags+=(--env RUST_BACKTRACE=full) fi + if [ "$SHELL" = "YES" ]; then + exec docker run "${additional_flags[@]}" --init --rm -i --mount type=tmpfs,destination=/tmp -v "/:/input:ro" -v "$($REALPATH "$DIR/../"):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test /bin/sh + fi + + local features_joined + features_joined=$(IFS=","; echo "${features[*]}") + + local build_flags=() + if [ "$PROFILE" = "dev" ] || [ "$PROFILE" = "debug" ]; then + PROFILE="debug" + else + build_flags+=(--profile "$PROFILE") + fi + + if [ $# -gt 0 ]; then # If we passed in args, we need to forward them along for path in "${@}"; do - local full_path=$($REALPATH "$path") - local containing_folder=$(dirname "$full_path") - local file_name=$(basename "$full_path") - docker run "${additional_flags[@]}" --init --rm -i --mount type=tmpfs,destination=/tmp -v "${containing_folder}:/input:ro" -v "$($REALPATH "$DIR/../"):/source:ro" --mount source=cargo-cache,target=/usr/local/cargo/registry --mount source=rust-cache,target=/target --env CARGO_TARGET_DIR=/target -w /source --entrypoint "" organic-test "${additional_args[@]}" -- "/input/$file_name" + local full_path + full_path=$($REALPATH "$path") + init_script=$(cat < /dev/null + (run_parse "$bad") local status=$? set -e if [ $status -eq 0 ]; then @@ -71,21 +60,12 @@ function main { echo "Bad line: $bad" } -function setup_temp_dir { - local temp_dir=$(mktemp -d -t 'compare_bisect.XXXXXXXX') - cp -r "$SOURCE_FOLDER/"* "$temp_dir/" - echo "$temp_dir" -} - function run_parse { local lines="$1" - local temp_dir=$(setup_temp_dir) - folders+=("$temp_dir") - cat "$SOURCE_FOLDER/$TARGET_DOCUMENT" | head -n "$lines" > "$temp_dir/$TARGET_DOCUMENT" - "${DIR}/run_docker_compare.bash" "$temp_dir/$TARGET_DOCUMENT" + + cd "$SOURCE_FOLDER" + head -n "$lines" "$SOURCE_FOLDER/$TARGET_DOCUMENT" | PROFILE=release-lto "${DIR}/run_docker_compare.bash" local status=$? - rm -rf "$temp_dir" - # TODO: Remove temp_dir from folders return "$status" } diff --git a/src/compare/compare.rs b/src/compare/compare.rs index 4ba4519..5d6cd46 100644 --- a/src/compare/compare.rs +++ b/src/compare/compare.rs @@ -14,7 +14,9 @@ use crate::LocalFileAccessInterface; pub fn run_anonymous_compare>( org_contents: P, ) -> Result<(), Box> { - let org_contents = org_contents.as_ref(); + // TODO: This is a work-around to pretend that dos line endings do not exist. It would be better to handle the difference in line endings. + let org_contents = org_contents.as_ref().replace("\r\n", "\n"); + let org_contents = org_contents.as_str(); eprintln!("Using emacs version: {}", get_emacs_version()?.trim()); eprintln!("Using org-mode version: {}", get_org_mode_version()?.trim()); let rust_parsed = parse(org_contents)?; @@ -44,6 +46,8 @@ pub fn run_compare_on_file>(org_path: P) -> Result<(), Box( let level = get_property(emacs, ":level")? .ok_or("Level should not be nil")? .as_atom()?; - if rust.stars.to_string() != level { + if rust.level.to_string() != level { this_status = DiffStatus::Bad; message = Some(format!( "Headline level do not match (emacs != rust): {} != {}", - level, rust.stars + level, rust.level )) } @@ -546,14 +547,26 @@ fn compare_heading<'s>( }; // Compare title - let title = get_property(emacs, ":title")?.ok_or("Missing :title attribute.")?; - let title_status = title - .as_list()? - .iter() - .zip(rust.title.iter()) - .map(|(emacs_child, rust_child)| compare_object(source, emacs_child, rust_child)) - .collect::, _>>()?; - child_status.push(artificial_diff_scope("title".to_owned(), title_status)?); + let title = get_property(emacs, ":title")?; + match (title, rust.title.len()) { + (None, 0) => {} + (None, _) => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Titles do not match (emacs != rust): {:?} != {:?}", + title, rust.title + )) + } + (Some(title), _) => { + let title_status = title + .as_list()? + .iter() + .zip(rust.title.iter()) + .map(|(emacs_child, rust_child)| compare_object(source, emacs_child, rust_child)) + .collect::, _>>()?; + child_status.push(artificial_diff_scope("title".to_owned(), title_status)?); + } + }; // Compare priority let priority = get_property(emacs, ":priority")?; @@ -719,6 +732,10 @@ fn compare_plain_list<'s>( Ok(_) => {} }; + // TODO compare :type + // + // :type is an unquoted atom of either descriptive, ordered, or unordered + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { child_status.push(compare_plain_list_item(source, emacs_child, rust_child)?); } @@ -787,7 +804,26 @@ fn compare_plain_list_item<'s>( contents_status, )?); - // TODO: compare :bullet :checkbox :counter :pre-blank + // TODO: compare :bullet :counter :pre-blank + + // Compare checkbox + let checkbox = get_property(emacs, ":checkbox")? + .map(Token::as_atom) + .map_or(Ok(None), |r| r.map(Some))? + .unwrap_or("nil"); + match (checkbox, &rust.checkbox) { + ("nil", None) => {} + ("off", Some((CheckboxType::Off, _))) => {} + ("trans", Some((CheckboxType::Trans, _))) => {} + ("on", Some((CheckboxType::On, _))) => {} + _ => { + this_status = DiffStatus::Bad; + message = Some(format!( + "Checkbox mismatch (emacs != rust) {:?} != {:?}", + checkbox, rust.checkbox + )); + } + }; Ok(DiffResult { status: this_status, @@ -862,6 +898,8 @@ fn compare_dynamic_block<'s>( Ok(_) => {} }; + // TODO: Compare :block-name :arguments + for (emacs_child, rust_child) in children.iter().skip(2).zip(rust.children.iter()) { child_status.push(compare_element(source, emacs_child, rust_child)?); } @@ -1914,6 +1952,8 @@ fn compare_regular_link<'s>( Ok(_) => {} }; + // TODO: Compare :type :path :format :raw-link :application :search-option + Ok(DiffResult { status: this_status, name: emacs_name.to_owned(), @@ -2441,6 +2481,8 @@ fn compare_subscript<'s>( Ok(_) => {} }; + // TODO compare :use-brackets-p + Ok(DiffResult { status: this_status, name: emacs_name.to_owned(), @@ -2472,6 +2514,8 @@ fn compare_superscript<'s>( Ok(_) => {} }; + // TODO compare :use-brackets-p + Ok(DiffResult { status: this_status, name: emacs_name.to_owned(), diff --git a/src/compare/parse.rs b/src/compare/parse.rs index 700b5b1..6170ce7 100644 --- a/src/compare/parse.rs +++ b/src/compare/parse.rs @@ -11,6 +11,8 @@ where let elisp_script = format!( r#"(progn (erase-buffer) + (require 'org) + (defun org-table-align () t) (insert "{escaped_file_contents}") (org-mode) (message "%s" (pp-to-string (org-element-parse-buffer))) @@ -42,6 +44,8 @@ where ))?; let elisp_script = format!( r#"(progn + (require 'org) + (defun org-table-align () t) (org-mode) (message "%s" (pp-to-string (org-element-parse-buffer))) )"# diff --git a/src/context/global_settings.rs b/src/context/global_settings.rs index b0c9305..e91c93a 100644 --- a/src/context/global_settings.rs +++ b/src/context/global_settings.rs @@ -2,6 +2,7 @@ use std::collections::BTreeSet; use super::FileAccessInterface; use super::LocalFileAccessInterface; +use crate::types::IndentationLevel; use crate::types::Object; // TODO: Ultimately, I think we'll need most of this: https://orgmode.org/manual/In_002dbuffer-Settings.html @@ -12,6 +13,20 @@ pub struct GlobalSettings<'g, 's> { pub file_access: &'g dyn FileAccessInterface, pub in_progress_todo_keywords: BTreeSet, pub complete_todo_keywords: BTreeSet, + /// Set to true to allow for plain lists using single letters as the bullet in the same way that numbers are used. + /// + /// Corresponds to the org-list-allow-alphabetical elisp variable. + pub org_list_allow_alphabetical: bool, + + /// How many spaces a tab should be equal to. + /// + /// Corresponds to the tab-width elisp variable. + pub tab_width: IndentationLevel, + + /// Whether to only allow odd headline levels. + /// + /// Corresponds to org-odd-levels-only elisp variable. + pub odd_levels_only: HeadlineLevelFilter, } impl<'g, 's> GlobalSettings<'g, 's> { @@ -23,6 +38,9 @@ impl<'g, 's> GlobalSettings<'g, 's> { }, in_progress_todo_keywords: BTreeSet::new(), complete_todo_keywords: BTreeSet::new(), + org_list_allow_alphabetical: false, + tab_width: 8, + odd_levels_only: HeadlineLevelFilter::OddEven, } } } @@ -32,3 +50,9 @@ impl<'g, 's> Default for GlobalSettings<'g, 's> { GlobalSettings::new() } } + +#[derive(Debug, Clone)] +pub enum HeadlineLevelFilter { + Odd, + OddEven, +} diff --git a/src/context/mod.rs b/src/context/mod.rs index a396cd3..c2d4a0e 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -25,5 +25,6 @@ pub(crate) use exiting::ExitClass; pub use file_access_interface::FileAccessInterface; pub use file_access_interface::LocalFileAccessInterface; pub use global_settings::GlobalSettings; +pub use global_settings::HeadlineLevelFilter; pub(crate) use list::List; pub(crate) use parser_with_context::parser_with_context; diff --git a/src/parser/dynamic_block.rs b/src/parser/dynamic_block.rs index d368385..b2c5559 100644 --- a/src/parser/dynamic_block.rs +++ b/src/parser/dynamic_block.rs @@ -1,14 +1,18 @@ use nom::branch::alt; use nom::bytes::complete::is_not; +use nom::bytes::complete::tag; use nom::bytes::complete::tag_no_case; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; +use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::recognize; +use nom::multi::many0; use nom::multi::many_till; +use nom::sequence::preceded; use nom::sequence::tuple; use super::org_source::OrgSource; @@ -67,24 +71,23 @@ pub(crate) fn dynamic_block<'b, 'g, 'r, 's>( }; let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, children) = match tuple(( - not(exit_matcher), + not(exit_matcher)(remaining)?; + let (remaining, leading_blank_lines) = opt(consumed(tuple(( blank_line, - many_till(blank_line, exit_matcher), - ))(remaining) - { - Ok((remain, (_not_immediate_exit, first_line, (_trailing_whitespace, _exit_contents)))) => { + many0(preceded(not(exit_matcher), blank_line)), + ))))(remaining)?; + let leading_blank_lines = + leading_blank_lines.map(|(source, (first_line, _remaining_lines))| { let mut element = Element::Paragraph(Paragraph::of_text(first_line.into())); - let source = get_consumed(remaining, remain); element.set_source(source.into()); - (remain, vec![element]) - } - Err(_) => { - let (remaining, (children, _exit_contents)) = - many_till(element_matcher, exit_matcher)(remaining)?; - (remaining, children) - } - }; + element + }); + let (remaining, (mut children, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; + if let Some(lines) = leading_blank_lines { + children.insert(0, lines); + } + let (remaining, _end) = dynamic_block_end(&parser_context, remaining)?; let source = get_consumed(input, remaining); @@ -117,7 +120,8 @@ fn dynamic_block_end<'b, 'g, 'r, 's>( start_of_line(input)?; let (remaining, source) = recognize(tuple(( space0, - tag_no_case("#+end:"), + tag_no_case("#+end"), + opt(tag(":")), alt((eof, line_ending)), )))(input)?; Ok((remaining, source)) diff --git a/src/parser/element_parser.rs b/src/parser/element_parser.rs index e210384..92da53c 100644 --- a/src/parser/element_parser.rs +++ b/src/parser/element_parser.rs @@ -141,7 +141,7 @@ fn _detect_element<'b, 'g, 'r, 's>( can_be_paragraph: bool, ) -> Res, ()> { if alt(( - detect_plain_list, + parser_with_context!(detect_plain_list)(context), detect_footnote_definition, detect_diary_sexp, detect_comment, diff --git a/src/parser/entity.rs b/src/parser/entity.rs index 4fcb8cf..53a0b09 100644 --- a/src/parser/entity.rs +++ b/src/parser/entity.rs @@ -1,10 +1,10 @@ use nom::branch::alt; use nom::bytes::complete::tag; -use nom::bytes::complete::tag_no_case; use nom::character::complete::satisfy; use nom::combinator::eof; use nom::combinator::peek; use nom::combinator::recognize; +use nom::sequence::tuple; use super::org_source::OrgSource; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; @@ -439,7 +439,7 @@ pub(crate) fn entity<'b, 'g, 'r, 's>( ) -> Res, Entity<'s>> { let (remaining, _) = tag("\\")(input)?; let (remaining, entity_name) = name(context, remaining)?; - let (remaining, _) = alt((tag("{}"), peek(recognize(entity_end))))(remaining)?; + let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; @@ -460,9 +460,12 @@ fn name<'b, 'g, 'r, 's>( ) -> Res, OrgSource<'s>> { // TODO: This should be defined by org-entities and optionally org-entities-user for entity in ORG_ENTITIES { - let result = tag_no_case::<_, _, CustomError<_>>(entity)(input); + let result = tuple(( + tag::<_, _, CustomError<_>>(entity), + alt((tag("{}"), peek(recognize(entity_end)))), + ))(input); match result { - Ok((remaining, ent)) => { + Ok((remaining, (ent, _))) => { return Ok((remaining, ent)); } Err(_) => {} diff --git a/src/parser/fixed_width_area.rs b/src/parser/fixed_width_area.rs index 449774f..f501591 100644 --- a/src/parser/fixed_width_area.rs +++ b/src/parser/fixed_width_area.rs @@ -3,15 +3,16 @@ use nom::bytes::complete::is_not; use nom::bytes::complete::tag; use nom::character::complete::line_ending; use nom::character::complete::space0; -use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::not; -use nom::combinator::opt; +use nom::combinator::recognize; use nom::multi::many0; use nom::sequence::preceded; use nom::sequence::tuple; use super::org_source::OrgSource; +use super::util::only_space1; +use super::util::org_line_ending; use crate::context::parser_with_context; use crate::context::RefContext; use crate::error::Res; @@ -47,10 +48,10 @@ fn fixed_width_area_line<'b, 'g, 'r, 's>( ) -> Res, OrgSource<'s>> { start_of_line(input)?; let (remaining, _indent) = space0(input)?; - let (remaining, (_colon, _leading_whitespace_and_content, _line_ending)) = tuple(( + let (remaining, _) = tuple(( tag(":"), - opt(tuple((space1, is_not("\r\n")))), - alt((line_ending, eof)), + alt((recognize(tuple((only_space1, is_not("\r\n")))), space0)), + org_line_ending, ))(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, source)) diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index a344697..75bb3ef 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -2,7 +2,6 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while; -use nom::character::complete::digit1; use nom::character::complete::space0; use nom::combinator::opt; use nom::combinator::recognize; @@ -94,10 +93,7 @@ pub(crate) fn footnote_definition<'b, 'g, 'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub(crate) fn label<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { - alt(( - digit1, - take_while(|c| WORD_CONSTITUENT_CHARACTERS.contains(c) || "-_".contains(c)), - ))(input) + take_while(|c| WORD_CONSTITUENT_CHARACTERS.contains(c) || "-_".contains(c))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index 1721163..97a00c3 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -4,11 +4,14 @@ use nom::bytes::complete::tag_no_case; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; +use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::verify; +use nom::multi::many0; use nom::multi::many_till; +use nom::sequence::preceded; use nom::sequence::tuple; use super::org_source::OrgSource; @@ -80,25 +83,23 @@ pub(crate) fn greater_block<'b, 'g, 'r, 's>( let element_matcher = parser_with_context!(element(true))(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - // Check for a completely empty block - let (remaining, children) = match tuple(( - not(exit_matcher), + not(exit_matcher)(remaining)?; + let (remaining, leading_blank_lines) = opt(consumed(tuple(( blank_line, - many_till(blank_line, exit_matcher), - ))(remaining) - { - Ok((remain, (_not_immediate_exit, first_line, (_trailing_whitespace, _exit_contents)))) => { + many0(preceded(not(exit_matcher), blank_line)), + ))))(remaining)?; + let leading_blank_lines = + leading_blank_lines.map(|(source, (first_line, _remaining_lines))| { let mut element = Element::Paragraph(Paragraph::of_text(first_line.into())); - let source = get_consumed(remaining, remain); element.set_source(source.into()); - (remain, vec![element]) - } - Err(_) => { - let (remaining, (children, _exit_contents)) = - many_till(element_matcher, exit_matcher)(remaining)?; - (remaining, children) - } - }; + element + }); + let (remaining, (mut children, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; + if let Some(lines) = leading_blank_lines { + children.insert(0, lines); + } + let (remaining, _end) = exit_with_name(&parser_context, remaining)?; // Not checking if parent exit matcher is causing exit because the greater_block_end matcher asserts we matched a full greater block @@ -126,7 +127,6 @@ fn parameters<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { } fn greater_block_end<'c>(name: &'c str) -> impl ContextMatcher + 'c { - // TODO: Can this be done without making an owned copy? move |context, input: OrgSource<'_>| _greater_block_end(context, input, name) } diff --git a/src/parser/headline.rs b/src/parser/headline.rs index 3b472de..d6e57bb 100644 --- a/src/parser/headline.rs +++ b/src/parser/headline.rs @@ -1,24 +1,26 @@ use nom::branch::alt; +use nom::bytes::complete::is_a; use nom::bytes::complete::tag; use nom::character::complete::anychar; -use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; -use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; use nom::combinator::opt; +use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many0; use nom::multi::many1; -use nom::multi::many1_count; use nom::multi::separated_list1; use nom::sequence::tuple; use super::org_source::OrgSource; use super::section::section; use super::util::get_consumed; +use super::util::org_line_ending; +use super::util::org_space; +use super::util::org_space_or_line_ending; use super::util::start_of_line; use crate::context::parser_with_context; use crate::context::ContextElement; @@ -32,30 +34,39 @@ use crate::parser::object_parser::standard_set_object; use crate::parser::util::blank_line; use crate::types::DocumentElement; use crate::types::Heading; +use crate::types::HeadlineLevel; use crate::types::Object; use crate::types::PriorityCookie; use crate::types::TodoKeywordType; pub(crate) const fn heading( - parent_stars: usize, + parent_level: HeadlineLevel, ) -> impl for<'b, 'g, 'r, 's> Fn( RefContext<'b, 'g, 'r, 's>, OrgSource<'s>, ) -> Res, Heading<'s>> { - move |context, input: OrgSource<'_>| _heading(context, input, parent_stars) + move |context, input: OrgSource<'_>| _heading(context, input, parent_level) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn _heading<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, - parent_stars: usize, + parent_star_count: HeadlineLevel, ) -> Res, Heading<'s>> { not(|i| context.check_exit_matcher(i))(input)?; let ( remaining, - (star_count, maybe_todo_keyword, maybe_priority, maybe_comment, title, heading_tags), - ) = headline(context, input, parent_stars)?; + ( + headline_level, + star_count, + maybe_todo_keyword, + maybe_priority, + maybe_comment, + title, + heading_tags, + ), + ) = headline(context, input, parent_star_count)?; let section_matcher = parser_with_context!(section)(context); let heading_matcher = parser_with_context!(heading(star_count))(context); let (remaining, maybe_section) = @@ -80,11 +91,11 @@ fn _heading<'b, 'g, 'r, 's>( remaining, Heading { source: source.into(), - stars: star_count, - todo_keyword: maybe_todo_keyword.map(|((todo_keyword_type, todo_keyword), _ws)| { + level: headline_level, + todo_keyword: maybe_todo_keyword.map(|(todo_keyword_type, todo_keyword)| { (todo_keyword_type, Into::<&str>::into(todo_keyword)) }), - priority_cookie: maybe_priority.map(|(priority, _)| priority), + priority_cookie: maybe_priority.map(|(_, priority)| priority), title, tags: heading_tags, children, @@ -104,14 +115,15 @@ pub(crate) fn detect_headline<'s>(input: OrgSource<'s>) -> Res, () fn headline<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, - parent_stars: usize, + parent_star_count: HeadlineLevel, ) -> Res< OrgSource<'s>, ( - usize, - Option<((TodoKeywordType, OrgSource<'s>), OrgSource<'s>)>, - Option<(PriorityCookie, OrgSource<'s>)>, - Option<(OrgSource<'s>, OrgSource<'s>)>, + HeadlineLevel, + HeadlineLevel, + Option<(TodoKeywordType, OrgSource<'s>)>, + Option<(OrgSource<'s>, PriorityCookie)>, + Option>, Vec>, Vec<&'s str>, ), @@ -122,45 +134,47 @@ fn headline<'b, 'g, 'r, 's>( }); let parser_context = context.with_additional_node(&parser_context); - let ( - remaining, - ( - _, - star_count, - _, - maybe_todo_keyword, - maybe_priority, - maybe_comment, - title, - maybe_tags, - _, - _, - ), - ) = tuple(( + let (remaining, (_, (headline_level, star_count, _), _)) = tuple(( start_of_line, - verify(many1_count(tag("*")), |star_count| { - *star_count > parent_stars - }), - space1, - opt(tuple(( - parser_with_context!(heading_keyword)(&parser_context), - space1, - ))), - opt(tuple((priority_cookie, space1))), - opt(tuple((tag("COMMENT"), space1))), - many1(parser_with_context!(standard_set_object)(&parser_context)), - opt(tuple((space0, tags))), - space0, - alt((line_ending, eof)), + verify( + parser_with_context!(headline_level)(&parser_context), + |(_, count, _)| *count > parent_star_count, + ), + peek(org_space), ))(input)?; + + let (remaining, maybe_todo_keyword) = opt(tuple(( + space1, + parser_with_context!(heading_keyword)(&parser_context), + peek(org_space_or_line_ending), + )))(remaining)?; + + let (remaining, maybe_priority) = opt(tuple((space1, priority_cookie)))(remaining)?; + + let (remaining, maybe_comment) = opt(tuple(( + space1, + tag("COMMENT"), + peek(org_space_or_line_ending), + )))(remaining)?; + + let (remaining, maybe_title) = opt(tuple(( + space1, + many1(parser_with_context!(standard_set_object)(&parser_context)), + )))(remaining)?; + + let (remaining, maybe_tags) = opt(tuple((space0, tags)))(remaining)?; + + let (remaining, _) = tuple((space0, org_line_ending))(remaining)?; + Ok(( remaining, ( + headline_level, star_count, - maybe_todo_keyword, + maybe_todo_keyword.map(|(_, todo, _)| todo), maybe_priority, - maybe_comment, - title, + maybe_comment.map(|(_, comment, _)| comment), + maybe_title.map(|(_, title)| title).unwrap_or(Vec::new()), maybe_tags .map(|(_ws, tags)| { tags.into_iter() @@ -177,10 +191,7 @@ fn headline_title_end<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { - recognize(tuple(( - opt(tuple((space0, tags, space0))), - alt((line_ending, eof)), - )))(input) + recognize(tuple((space0, opt(tuple((tags, space0))), org_line_ending)))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -256,3 +267,23 @@ fn priority_cookie<'s>(input: OrgSource<'s>) -> Res, PriorityCooki })?; Ok((remaining, cookie)) } + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn headline_level<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, (HeadlineLevel, HeadlineLevel, OrgSource<'s>)> { + let (remaining, stars) = is_a("*")(input)?; + let count = stars.len().try_into().unwrap(); + let level = match context.get_global_settings().odd_levels_only { + crate::context::HeadlineLevelFilter::Odd => { + if count % 2 == 0 { + (count + 2) / 2 + } else { + (count + 1) / 2 + } + } + crate::context::HeadlineLevelFilter::OddEven => count, + }; + Ok((remaining, (level, count, stars))) +} diff --git a/src/parser/in_buffer_settings.rs b/src/parser/in_buffer_settings.rs index da93d08..40f8505 100644 --- a/src/parser/in_buffer_settings.rs +++ b/src/parser/in_buffer_settings.rs @@ -1,13 +1,17 @@ use nom::branch::alt; +use nom::bytes::complete::is_not; use nom::bytes::complete::tag_no_case; use nom::character::complete::anychar; +use nom::character::complete::space1; use nom::combinator::map; use nom::multi::many0; use nom::multi::many_till; +use nom::multi::separated_list0; use super::keyword::filtered_keyword; use super::keyword_todo::todo_keywords; use super::OrgSource; +use crate::context::HeadlineLevelFilter; use crate::error::Res; use crate::types::Keyword; use crate::GlobalSettings; @@ -50,6 +54,7 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( ) -> Result, String> { let mut new_settings = original_settings.clone(); + // Todo Keywords for kw in keywords.iter().filter(|kw| { kw.key.eq_ignore_ascii_case("todo") || kw.key.eq_ignore_ascii_case("seq_todo") @@ -65,5 +70,21 @@ pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>( .extend(complete_words.into_iter().map(str::to_string)); } + // Startup settings + for kw in keywords + .iter() + .filter(|kw| kw.key.eq_ignore_ascii_case("startup")) + { + let (_remaining, settings) = + separated_list0(space1::<&str, nom::error::Error<_>>, is_not(" \t"))(kw.value) + .map_err(|err: nom::Err<_>| err.to_string())?; + if settings.contains(&"odd") { + new_settings.odd_levels_only = HeadlineLevelFilter::Odd; + } + if settings.contains(&"oddeven") { + new_settings.odd_levels_only = HeadlineLevelFilter::OddEven; + } + } + Ok(new_settings) } diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index 4a71976..8510fe9 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -12,6 +12,7 @@ use nom::combinator::eof; use nom::combinator::not; use nom::combinator::peek; use nom::combinator::recognize; +use nom::combinator::verify; use nom::multi::many_till; use nom::sequence::tuple; @@ -116,7 +117,9 @@ pub(crate) fn table_formula_keyword<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Keyword<'s>> { - filtered_keyword(table_formula_key)(input) + verify(filtered_keyword(table_formula_key), |kw| { + !kw.value.is_empty() + })(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] diff --git a/src/parser/keyword_todo.rs b/src/parser/keyword_todo.rs index 5a009ea..821f974 100644 --- a/src/parser/keyword_todo.rs +++ b/src/parser/keyword_todo.rs @@ -44,9 +44,17 @@ pub(crate) fn todo_keywords<'s>(input: &'s str) -> Res<&'s str, (Vec<&'s str>, V } fn todo_keyword_word<'s>(input: &'s str) -> Res<&'s str, &'s str> { - verify(take_till(|c| " \t\r\n|".contains(c)), |result: &str| { + let (remaining, keyword) = verify(take_till(|c| "( \t\r\n|".contains(c)), |result: &str| { !result.is_empty() - })(input) + })(input)?; + + let (remaining, _) = opt(tuple(( + tag("("), + take_till(|c| "() \t\r\n|".contains(c)), + tag(")"), + )))(remaining)?; + + Ok((remaining, keyword)) } #[cfg(test)] mod tests { diff --git a/src/parser/org_source.rs b/src/parser/org_source.rs index 4134812..f38b85d 100644 --- a/src/parser/org_source.rs +++ b/src/parser/org_source.rs @@ -59,6 +59,10 @@ impl<'s> OrgSource<'s> { self.end - self.start } + pub(crate) fn get_byte_offset(&self) -> usize { + self.start + } + pub(crate) fn get_preceding_character(&self) -> Option { self.preceding_character } diff --git a/src/parser/plain_link.rs b/src/parser/plain_link.rs index 1827fc3..b1a909a 100644 --- a/src/parser/plain_link.rs +++ b/src/parser/plain_link.rs @@ -5,17 +5,24 @@ use nom::character::complete::anychar; use nom::character::complete::none_of; use nom::character::complete::one_of; use nom::combinator::eof; +use nom::combinator::not; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; +use nom::multi::many0; +use nom::multi::many1; use nom::multi::many_till; +use nom::sequence::tuple; +use super::org_source::BracketDepth; use super::org_source::OrgSource; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; use crate::context::parser_with_context; use crate::context::ContextElement; +use crate::context::ContextMatcher; use crate::context::ExitClass; use crate::context::ExitMatcherNode; +use crate::context::Matcher; use crate::context::RefContext; use crate::error::CustomError; use crate::error::MyError; @@ -130,17 +137,77 @@ fn path_plain<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { - // TODO: "optionally containing parenthesis-wrapped non-whitespace non-bracket substrings up to a depth of two. The string must end with either a non-punctation non-whitespace character, a forwards slash, or a parenthesis-wrapped substring" + let path_plain_end = path_plain_end(input.get_parenthesis_depth()); let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { class: ExitClass::Gamma, exit_matcher: &path_plain_end, }); let parser_context = context.with_additional_node(&parser_context); - let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let (remaining, _components) = many1(alt(( + parser_with_context!(path_plain_no_parenthesis)(&parser_context), + parser_with_context!(path_plain_parenthesis)(&parser_context), + )))(input)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +fn path_plain_end(starting_parenthesis_depth: BracketDepth) -> impl ContextMatcher { + move |context, input: OrgSource<'_>| _path_plain_end(context, input, starting_parenthesis_depth) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn _path_plain_end<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, + starting_parenthesis_depth: BracketDepth, +) -> Res, OrgSource<'s>> { + let (remaining, _leading_punctuation) = many0(verify(anychar, |c| { + !" \t\r\n[]<>()/".contains(*c) && c.is_ascii_punctuation() + }))(input)?; + + let disallowed_character = recognize(one_of(" \t\r\n[]<>"))(remaining); + if disallowed_character.is_ok() { + return disallowed_character; + } + + let current_depth = remaining.get_parenthesis_depth() - starting_parenthesis_depth; + if current_depth == 0 { + let close_parenthesis = + tag::<&str, OrgSource<'_>, CustomError>>(")")(remaining); + if close_parenthesis.is_ok() { + return close_parenthesis; + } + + let open_parenthesis_without_match = recognize(tuple(( + peek(tag("(")), + not(parser_with_context!(path_plain_parenthesis)(context)), + )))(remaining); + if open_parenthesis_without_match.is_ok() { + return open_parenthesis_without_match; + } + } + + // many0 punctuation + Err(nom::Err::Error(CustomError::MyError(MyError( + "No path plain end".into(), + )))) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn path_plain_no_parenthesis<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { let (remaining, path) = recognize(verify( - many_till(anychar, peek(exit_matcher)), + many_till( + anychar, + alt(( + peek(path_plain_no_parenthesis_disallowed_character), + parser_with_context!(exit_matcher_parser)(context), + )), + ), |(children, _exit_contents)| !children.is_empty(), ))(input)?; @@ -148,14 +215,65 @@ fn path_plain<'b, 'g, 'r, 's>( } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn path_plain_end<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, +fn path_plain_no_parenthesis_disallowed_character<'s>( input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { - recognize(many_till( - verify(anychar, |c| { - *c != '/' && (c.is_ascii_punctuation() || c.is_whitespace()) - }), - one_of(" \t\r\n()[]<>"), - ))(input) + recognize(verify(anychar, |c| { + c.is_whitespace() || "()[]<>".contains(*c) + }))(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn path_plain_parenthesis<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + let (remaining, _opening) = tag("(")(input)?; + let starting_depth = remaining.get_parenthesis_depth(); + + let (remaining, _path) = recognize(verify( + many_till( + anychar, + alt(( + peek(path_plain_parenthesis_end(starting_depth)), + parser_with_context!(exit_matcher_parser)(context), + )), + ), + |(children, _exit_contents)| !children.is_empty(), + ))(remaining)?; + let (remaining, _opening) = tag(")")(remaining)?; + let source = get_consumed(input, remaining); + + Ok((remaining, source)) +} + +fn path_plain_parenthesis_end(starting_parenthesis_depth: BracketDepth) -> impl Matcher { + move |input: OrgSource<'_>| _path_plain_parenthesis_end(input, starting_parenthesis_depth) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn _path_plain_parenthesis_end<'s>( + input: OrgSource<'s>, + starting_parenthesis_depth: BracketDepth, +) -> Res, OrgSource<'s>> { + let current_depth = input.get_parenthesis_depth() - starting_parenthesis_depth; + if current_depth < 0 { + // This shouldn't be possible because if depth is 0 then a closing parenthesis should end the link. + unreachable!("Exceeded plain link parenthesis depth.") + } + if current_depth == 0 { + let close_parenthesis = tag::<&str, OrgSource<'_>, CustomError>>(")")(input); + if close_parenthesis.is_ok() { + return close_parenthesis; + } + } + if current_depth == 1 { + let open_parenthesis = tag::<&str, OrgSource<'_>, CustomError>>("(")(input); + if open_parenthesis.is_ok() { + return open_parenthesis; + } + } + Err(nom::Err::Error(CustomError::MyError(MyError( + "No closing parenthesis".into(), + )))) } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 389c504..2ccacee 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -7,6 +7,7 @@ use nom::character::complete::one_of; use nom::character::complete::space0; use nom::character::complete::space1; use nom::combinator::eof; +use nom::combinator::map; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; @@ -21,6 +22,7 @@ use super::element_parser::element; use super::object_parser::standard_set_object; use super::org_source::OrgSource; use super::util::include_input; +use super::util::indentation_level; use super::util::non_whitespace_character; use crate::context::parser_with_context; use crate::context::ContextElement; @@ -35,21 +37,27 @@ use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; +use crate::parser::util::org_space; use crate::parser::util::start_of_line; +use crate::types::CheckboxType; +use crate::types::IndentationLevel; use crate::types::Object; use crate::types::PlainList; use crate::types::PlainListItem; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -pub(crate) fn detect_plain_list<'s>(input: OrgSource<'s>) -> Res, ()> { +pub(crate) fn detect_plain_list<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, ()> { if verify( tuple(( start_of_line, space0, - bullet, + parser_with_context!(bullet)(context), alt((space1, line_ending, eof)), )), - |(_start, indent, bull, _after_whitespace)| { + |(_start, indent, (_bullet_type, bull), _after_whitespace)| { Into::<&str>::into(bull) != "*" || indent.len() > 0 }, )(input) @@ -81,7 +89,7 @@ pub(crate) fn plain_list<'b, 'g, 'r, 's>( let parser_context = parser_context.with_additional_node(&contexts[2]); // children stores tuple of (input string, parsed object) so we can re-parse the final item let mut children = Vec::new(); - let mut first_item_indentation: Option = None; + let mut first_item_indentation: Option = None; let mut remaining = input; // The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here: @@ -142,44 +150,27 @@ fn plain_list_item<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, PlainListItem<'s>> { start_of_line(input)?; - let (remaining, leading_whitespace) = space0(input)?; - // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) - let indent_level = leading_whitespace.len(); - let (remaining, bull) = verify(bullet, |bull: &OrgSource<'_>| { - Into::<&str>::into(bull) != "*" || indent_level > 0 - })(remaining)?; + let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?; + let (remaining, (bullet_type, bull)) = verify( + parser_with_context!(bullet)(context), + |(_bullet_type, bull)| Into::<&str>::into(bull) != "*" || indent_level > 0, + )(remaining)?; - let (remaining, _maybe_counter_set) = - opt(tuple((space1, tag("[@"), counter, tag("]"))))(remaining)?; + let (remaining, _maybe_counter_set) = opt(tuple(( + space1, + tag("[@"), + parser_with_context!(counter)(context), + tag("]"), + )))(remaining)?; - // TODO: parse checkbox + let (remaining, maybe_checkbox) = opt(tuple((space1, item_checkbox)))(remaining)?; - let (remaining, maybe_tag) = - opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)?; - - let maybe_contentless_item: Res, ()> = peek(parser_with_context!( - detect_contentless_item_contents - )(context))(remaining); - match maybe_contentless_item { - Ok((_rem, _ws)) => { - let (remaining, _trailing_ws) = opt(blank_line)(remaining)?; - let source = get_consumed(input, remaining); - return Ok(( - remaining, - PlainListItem { - source: source.into(), - indentation: indent_level, - bullet: bull.into(), - tag: maybe_tag - .map(|(_ws, item_tag)| item_tag) - .unwrap_or(Vec::new()), - children: Vec::new(), - }, - )); - } - Err(_) => {} + let (remaining, maybe_tag) = if let BulletType::Unordered = bullet_type { + opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)? + } else { + (remaining, None) }; - let (remaining, _ws) = item_tag_post_gap(context, remaining)?; + let exit_matcher = plain_list_item_end(indent_level); let contexts = [ ContextElement::ConsumeTrailingWhitespace(true), @@ -191,6 +182,35 @@ fn plain_list_item<'b, 'g, 'r, 's>( let parser_context = context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); + let maybe_contentless_item: Res, ()> = peek(parser_with_context!( + detect_contentless_item_contents + )(&parser_context))(remaining); + match maybe_contentless_item { + Ok((_rem, _ws)) => { + let (remaining, _trailing_ws) = if context.should_consume_trailing_whitespace() { + recognize(alt((recognize(many1(blank_line)), eof)))(remaining)? + } else { + recognize(alt((blank_line, eof)))(remaining)? + }; + let source = get_consumed(input, remaining); + return Ok(( + remaining, + PlainListItem { + source: source.into(), + indentation: indent_level, + bullet: bull.into(), + checkbox: None, + tag: maybe_tag + .map(|(_ws, item_tag)| item_tag) + .unwrap_or(Vec::new()), + children: Vec::new(), + }, + )); + } + Err(_) => {} + }; + let (remaining, _ws) = item_tag_post_gap(&parser_context, remaining)?; + let (mut remaining, (mut children, _exit_contents)) = many_till( include_input(parser_with_context!(element(true))(&parser_context)), parser_with_context!(exit_matcher_parser)(&parser_context), @@ -219,6 +239,8 @@ fn plain_list_item<'b, 'g, 'r, 's>( source: source.into(), indentation: indent_level, bullet: bull.into(), + checkbox: maybe_checkbox + .map(|(_, (checkbox_type, source))| (checkbox_type, Into::<&str>::into(source))), tag: maybe_tag .map(|(_ws, item_tag)| item_tag) .unwrap_or(Vec::new()), @@ -227,19 +249,46 @@ fn plain_list_item<'b, 'g, 'r, 's>( )); } -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn bullet<'s>(i: OrgSource<'s>) -> Res, OrgSource<'s>> { - alt(( - tag("*"), - tag("-"), - tag("+"), - recognize(tuple((counter, alt((tag("."), tag(")")))))), - ))(i) +#[derive(Debug)] +enum BulletType { + Ordered, + Unordered, } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn counter<'s>(i: OrgSource<'s>) -> Res, OrgSource<'s>> { - alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i) +fn bullet<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, (BulletType, OrgSource<'s>)> { + alt(( + map(tag("*"), |bull| (BulletType::Unordered, bull)), + map(tag("-"), |bull| (BulletType::Unordered, bull)), + map(tag("+"), |bull| (BulletType::Unordered, bull)), + map( + recognize(tuple(( + parser_with_context!(counter)(context), + alt((tag("."), tag(")"))), + ))), + |bull| (BulletType::Ordered, bull), + ), + ))(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn counter<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + if context.get_global_settings().org_list_allow_alphabetical { + alt(( + recognize(one_of( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", + )), + digit1, + ))(input) + } else { + digit1(input) + } } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -255,7 +304,7 @@ fn plain_list_end<'b, 'g, 'r, 's>( )))(input) } -const fn plain_list_item_end(indent_level: usize) -> impl ContextMatcher { +const fn plain_list_item_end(indent_level: IndentationLevel) -> impl ContextMatcher { let line_indented_lte_matcher = line_indented_lte(indent_level); move |context, input: OrgSource<'_>| { _plain_list_item_end(context, input, &line_indented_lte_matcher) @@ -278,20 +327,23 @@ fn _plain_list_item_end<'b, 'g, 'r, 's>( )))(input) } -const fn line_indented_lte(indent_level: usize) -> impl ContextMatcher { +const fn line_indented_lte(indent_level: IndentationLevel) -> impl ContextMatcher { move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn _line_indented_lte<'b, 'g, 'r, 's>( - _context: RefContext<'b, 'g, 'r, 's>, + context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, - indent_level: usize, + indent_level: IndentationLevel, ) -> Res, OrgSource<'s>> { let matched = recognize(verify( - tuple((space0::, _>, non_whitespace_character)), + tuple(( + parser_with_context!(indentation_level)(context), + non_whitespace_character, + )), // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) - |(_space0, _anychar)| _space0.len() <= indent_level, + |((indentation_level, _leading_whitespace), _anychar)| *indentation_level <= indent_level, ))(input)?; Ok(matched) @@ -324,22 +376,22 @@ fn item_tag_end<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { - alt(( - recognize(tuple(( - item_tag_divider, + alt((item_tag_divider, line_ending))(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn item_tag_divider<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + recognize(tuple(( + one_of(" \t"), + tag("::"), + peek(tuple(( opt(tuple(( peek(one_of(" \t")), many_till(anychar, peek(alt((item_tag_divider, line_ending, eof)))), ))), alt((line_ending, eof)), ))), - line_ending, - ))(input) -} - -#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn item_tag_divider<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { - recognize(tuple((one_of(" \t"), tag("::"))))(input) + )))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -363,6 +415,18 @@ fn item_tag_post_gap<'b, 'g, 'r, 's>( )(input) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn item_checkbox<'s>(input: OrgSource<'s>) -> Res, (CheckboxType, OrgSource<'s>)> { + alt(( + map( + recognize(tuple((tag("["), org_space, tag("]")))), + |capture| (CheckboxType::Off, capture), + ), + map(tag("[-]"), |capture| (CheckboxType::Trans, capture)), + map(tag("[X]"), |capture| (CheckboxType::On, capture)), + ))(input) +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn detect_contentless_item_contents<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, @@ -558,21 +622,30 @@ dolar"#, r#"+ "#, ); - let result = detect_plain_list(input); + let global_settings = GlobalSettings::default(); + let initial_context = ContextElement::document_context(); + let initial_context = Context::new(&global_settings, List::new(&initial_context)); + let result = detect_plain_list(&initial_context, input); assert!(result.is_ok()); } #[test] fn detect_eof() { let input = OrgSource::new(r#"+"#); - let result = detect_plain_list(input); + let global_settings = GlobalSettings::default(); + let initial_context = ContextElement::document_context(); + let initial_context = Context::new(&global_settings, List::new(&initial_context)); + let result = detect_plain_list(&initial_context, input); assert!(result.is_ok()); } #[test] fn detect_no_gap() { let input = OrgSource::new(r#"+foo"#); - let result = detect_plain_list(input); + let global_settings = GlobalSettings::default(); + let initial_context = ContextElement::document_context(); + let initial_context = Context::new(&global_settings, List::new(&initial_context)); + let result = detect_plain_list(&initial_context, input); // Since there is no whitespace after the '+' this is a paragraph, not a plain list. assert!(result.is_err()); } @@ -580,7 +653,10 @@ dolar"#, #[test] fn detect_with_gap() { let input = OrgSource::new(r#"+ foo"#); - let result = detect_plain_list(input); + let global_settings = GlobalSettings::default(); + let initial_context = ContextElement::document_context(); + let initial_context = Context::new(&global_settings, List::new(&initial_context)); + let result = detect_plain_list(&initial_context, input); assert!(result.is_ok()); } } diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 3b1e3c0..2671675 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -1,17 +1,26 @@ use nom::branch::alt; -use nom::bytes::complete::tag; +use nom::bytes::complete::is_not; +use nom::bytes::complete::tag_no_case; use nom::character::complete::anychar; -use nom::combinator::map; +use nom::character::complete::line_ending; +use nom::character::complete::one_of; +use nom::combinator::eof; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; +use nom::multi::many1; use nom::multi::many_till; +use nom::sequence::tuple; use super::org_source::OrgSource; use super::radio_link::RematchObject; use super::util::exit_matcher_parser; +use super::util::get_consumed; +use super::util::org_space_or_line_ending; use crate::context::parser_with_context; use crate::context::RefContext; +use crate::error::CustomError; +use crate::error::MyError; use crate::error::Res; use crate::types::Object; use crate::types::PlainText; @@ -72,11 +81,58 @@ impl<'x> RematchObject<'x> for PlainText<'x> { _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Object<'s>> { - map(tag(self.source), |s| { + let mut remaining = input; + let mut goal = self.source; + + loop { + if goal.is_empty() { + break; + } + + let is_not_whitespace = is_not::<&str, &str, CustomError<_>>(" \t\r\n")(goal); + match is_not_whitespace { + Ok((new_goal, payload)) => { + let (new_remaining, _) = tuple(( + tag_no_case(payload), + // TODO: Test to see what the REAL condition is. Checking for not-alphabetic works fine for now, but the real criteria might be something like the plain text exit matcher. + peek(alt(( + recognize(verify(anychar, |c| !c.is_alphanumeric())), + eof, + ))), + ))(remaining)?; + remaining = new_remaining; + goal = new_goal; + continue; + } + Err(_) => {} + }; + + let is_whitespace = recognize(many1(alt(( + recognize(one_of::<&str, &str, CustomError<_>>(" \t")), + line_ending, + ))))(goal); + match is_whitespace { + Ok((new_goal, _)) => { + let (new_remaining, _) = many1(org_space_or_line_ending)(remaining)?; + remaining = new_remaining; + goal = new_goal; + continue; + } + Err(_) => {} + }; + + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Target does not match.".into(), + )))); + } + + let source = get_consumed(input, remaining); + Ok(( + remaining, Object::PlainText(PlainText { - source: Into::<&str>::into(s), - }) - })(input) + source: Into::<&str>::into(source), + }), + )) } } diff --git a/src/parser/planning.rs b/src/parser/planning.rs index 15312bb..b50a9ea 100644 --- a/src/parser/planning.rs +++ b/src/parser/planning.rs @@ -1,16 +1,16 @@ use nom::branch::alt; -use nom::bytes::complete::is_not; use nom::bytes::complete::tag; use nom::bytes::complete::tag_no_case; -use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; -use nom::combinator::eof; -use nom::multi::separated_list1; +use nom::multi::many1; use nom::sequence::tuple; use super::org_source::OrgSource; +use super::timestamp::timestamp; use super::util::maybe_consume_trailing_whitespace_if_not_exiting; +use super::util::org_line_ending; +use crate::context::parser_with_context; use crate::context::RefContext; use crate::error::Res; use crate::parser::util::get_consumed; @@ -24,8 +24,9 @@ pub(crate) fn planning<'b, 'g, 'r, 's>( ) -> Res, Planning<'s>> { start_of_line(input)?; let (remaining, _leading_whitespace) = space0(input)?; - let (remaining, _planning_parameters) = separated_list1(space1, planning_parameter)(remaining)?; - let (remaining, _trailing_ws) = tuple((space0, alt((line_ending, eof))))(remaining)?; + let (remaining, _planning_parameters) = + many1(parser_with_context!(planning_parameter)(context))(remaining)?; + let (remaining, _trailing_ws) = tuple((space0, org_line_ending))(remaining)?; let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; @@ -40,15 +41,17 @@ pub(crate) fn planning<'b, 'g, 'r, 's>( } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] -fn planning_parameter<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { +fn planning_parameter<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { let (remaining, _planning_type) = alt(( tag_no_case("DEADLINE"), tag_no_case("SCHEDULED"), tag_no_case("CLOSED"), ))(input)?; let (remaining, _gap) = tuple((tag(":"), space1))(remaining)?; - // TODO: Make this invoke the real timestamp parser. - let (remaining, _timestamp) = tuple((tag("<"), is_not("\r\n>"), tag(">")))(remaining)?; + let (remaining, _timestamp) = timestamp(context, remaining)?; let source = get_consumed(input, remaining); Ok((remaining, source)) } diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index 2276231..be549c8 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -62,6 +62,22 @@ pub(crate) fn rematch_target<'x, 'b, 'g, 'r, 's>( remaining = new_remaining; new_matches.push(new_match); } + Object::Italic(italic) => { + let (new_remaining, new_match) = italic.rematch_object(context, remaining)?; + remaining = new_remaining; + new_matches.push(new_match); + } + Object::Underline(underline) => { + let (new_remaining, new_match) = underline.rematch_object(context, remaining)?; + remaining = new_remaining; + new_matches.push(new_match); + } + Object::StrikeThrough(strikethrough) => { + let (new_remaining, new_match) = + strikethrough.rematch_object(context, remaining)?; + remaining = new_remaining; + new_matches.push(new_match); + } Object::PlainText(plaintext) => { let (new_remaining, new_match) = plaintext.rematch_object(context, remaining)?; remaining = new_remaining; diff --git a/src/parser/regular_link.rs b/src/parser/regular_link.rs index 847b915..892ee59 100644 --- a/src/parser/regular_link.rs +++ b/src/parser/regular_link.rs @@ -2,7 +2,7 @@ use nom::branch::alt; use nom::bytes::complete::escaped; use nom::bytes::complete::tag; use nom::bytes::complete::take_till1; -use nom::character::complete::one_of; +use nom::character::complete::anychar; use nom::combinator::verify; use nom::multi::many_till; @@ -78,11 +78,11 @@ fn pathreg<'b, 'g, 'r, 's>( ) -> Res, OrgSource<'s>> { let (remaining, path) = escaped( take_till1(|c| match c { - '\\' | ']' => true, + '\\' | '[' | ']' => true, _ => false, }), '\\', - one_of(r#"]"#), + anychar, )(input)?; Ok((remaining, path)) } diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index f0eb6ba..c1b2829 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -23,6 +23,7 @@ use crate::context::ContextElement; use crate::context::ContextMatcher; use crate::context::ExitClass; use crate::context::ExitMatcherNode; +use crate::context::Matcher; use crate::context::RefContext; use crate::error::CustomError; use crate::error::MyError; @@ -112,6 +113,10 @@ fn script_body<'b, 'g, 'r, 's>( map(parser_with_context!(script_with_braces)(context), |body| { ScriptBody::WithBraces(body.into()) }), + map( + parser_with_context!(script_with_parenthesis)(context), + |body| ScriptBody::Braceless(body.into()), + ), ))(input) } @@ -199,3 +204,49 @@ fn _script_with_braces_end<'b, 'g, 'r, 's>( } tag("}")(input) } + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn script_with_parenthesis<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + let (remaining, _) = tag("(")(input)?; + let exit_with_depth = script_with_parenthesis_end(remaining.get_parenthesis_depth()); + + let (remaining, _) = many_till( + anychar, + alt(( + peek(exit_with_depth), + parser_with_context!(exit_matcher_parser)(context), + )), + )(remaining)?; + + let (remaining, _) = tag(")")(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +fn script_with_parenthesis_end(starting_parenthesis_depth: BracketDepth) -> impl Matcher { + move |input: OrgSource<'_>| _script_with_parenthesis_end(input, starting_parenthesis_depth) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn _script_with_parenthesis_end<'s>( + input: OrgSource<'s>, + starting_parenthesis_depth: BracketDepth, +) -> Res, OrgSource<'s>> { + let current_depth = input.get_parenthesis_depth() - starting_parenthesis_depth; + if current_depth < 0 { + // This shouldn't be possible because if depth is 0 then a closing bracket should end the citation. + unreachable!("Exceeded citation key suffix bracket depth.") + } + if current_depth == 0 { + let close_parenthesis = tag::<&str, OrgSource<'_>, CustomError>>(")")(input); + if close_parenthesis.is_ok() { + return close_parenthesis; + } + } + Err(nom::Err::Error(CustomError::MyError(MyError( + "No script parenthesis end.".into(), + )))) +} diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index bd74215..a32c4c1 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -20,6 +20,7 @@ use super::org_source::OrgSource; use super::radio_link::RematchObject; use super::util::in_object_section; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; +use super::util::start_of_line; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ContextMatcher; @@ -64,8 +65,7 @@ fn bold<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Bold<'s>> { - let text_markup_object_specialized = text_markup_object("*"); - let (remaining, children) = text_markup_object_specialized(context, input)?; + let (remaining, children) = text_markup_object("*")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, @@ -81,8 +81,7 @@ fn italic<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Italic<'s>> { - let text_markup_object_specialized = text_markup_object("/"); - let (remaining, children) = text_markup_object_specialized(context, input)?; + let (remaining, children) = text_markup_object("/")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, @@ -98,8 +97,7 @@ fn underline<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Underline<'s>> { - let text_markup_object_specialized = text_markup_object("_"); - let (remaining, children) = text_markup_object_specialized(context, input)?; + let (remaining, children) = text_markup_object("_")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, @@ -115,8 +113,7 @@ fn strike_through<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, StrikeThrough<'s>> { - let text_markup_object_specialized = text_markup_object("+"); - let (remaining, children) = text_markup_object_specialized(context, input)?; + let (remaining, children) = text_markup_object("+")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, @@ -132,8 +129,7 @@ fn verbatim<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Verbatim<'s>> { - let text_markup_string_specialized = text_markup_string("="); - let (remaining, contents) = text_markup_string_specialized(context, input)?; + let (remaining, contents) = text_markup_string("=")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, @@ -149,8 +145,7 @@ fn code<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Code<'s>> { - let text_markup_string_specialized = text_markup_string("~"); - let (remaining, contents) = text_markup_string_specialized(context, input)?; + let (remaining, contents) = text_markup_string("~")(context, input)?; let source = get_consumed(input, remaining); Ok(( remaining, @@ -168,8 +163,7 @@ fn text_markup_object<'c>( OrgSource<'s>, ) -> Res, Vec>> + 'c { - let marker_symbol = marker_symbol.to_owned(); - move |context, input: OrgSource<'_>| _text_markup_object(context, input, marker_symbol.as_str()) + move |context, input: OrgSource<'_>| _text_markup_object(context, input, marker_symbol) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -188,7 +182,7 @@ fn _text_markup_object<'b, 'g, 'r, 's, 'c>( let (remaining, open) = tag(marker_symbol)(remaining)?; let (remaining, _peek_not_whitespace) = peek(verify(anychar, |c| !c.is_whitespace() && *c != '\u{200B}'))(remaining)?; - let text_markup_end_specialized = text_markup_end(open.into()); + let text_markup_end_specialized = text_markup_end(open.into(), remaining.get_byte_offset()); let contexts = [ ContextElement::ContextObject(marker_symbol), ContextElement::ExitMatcherNode(ExitMatcherNode { @@ -250,7 +244,7 @@ fn _text_markup_string<'b, 'g, 'r, 's, 'c>( let (remaining, open) = tag(marker_symbol)(remaining)?; let (remaining, _peek_not_whitespace) = peek(verify(anychar, |c| !c.is_whitespace() && *c != '\u{200B}'))(remaining)?; - let text_markup_end_specialized = text_markup_end(open.into()); + let text_markup_end_specialized = text_markup_end(open.into(), remaining.get_byte_offset()); let contexts = [ ContextElement::ContextObject(marker_symbol), ContextElement::ExitMatcherNode(ExitMatcherNode { @@ -292,16 +286,22 @@ fn pre<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { + if start_of_line(input).is_ok() { + return Ok((input, ())); + } + if preceded_by_whitespace(true)(input).is_ok() { + return Ok((input, ())); + } let preceding_character = input.get_preceding_character(); match preceding_character { // If None, we are at the start of the file which is technically the beginning of a line. - None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(') - | Some('{') | Some('\'') | Some('"') | Some('<') => {} + Some('-') | Some('(') | Some('{') | Some('\'') | Some('"') => {} Some(_) => { return Err(nom::Err::Error(CustomError::MyError(MyError( "Not a valid pre character for text markup.".into(), )))); } + None => unreachable!(), // None is for start of file, which should already be handled by the start_of_line matcher above. }; Ok((input, ())) } @@ -311,12 +311,17 @@ fn post<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { - let (remaining, _) = alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"")), line_ending))(input)?; + let (remaining, _) = alt((recognize(one_of(" \r\n\t-.,;:!?')}[\"\\")), line_ending))(input)?; Ok((remaining, ())) } -fn text_markup_end<'c>(marker_symbol: &'c str) -> impl ContextMatcher + 'c { - move |context, input: OrgSource<'_>| _text_markup_end(context, input, marker_symbol) +fn text_markup_end<'c>( + marker_symbol: &'c str, + contents_start_offset: usize, +) -> impl ContextMatcher + 'c { + move |context, input: OrgSource<'_>| { + _text_markup_end(context, input, marker_symbol, contents_start_offset) + } } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -324,7 +329,13 @@ fn _text_markup_end<'b, 'g, 'r, 's, 'c>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, marker_symbol: &'c str, + contents_start_offset: usize, ) -> Res, OrgSource<'s>> { + if input.get_byte_offset() == contents_start_offset { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Text markup cannot be empty".into(), + )))); + } not(preceded_by_whitespace(false))(input)?; let (remaining, _marker) = terminated( tag(marker_symbol), @@ -354,6 +365,66 @@ impl<'x> RematchObject<'x> for Bold<'x> { } } +impl<'x> RematchObject<'x> for Italic<'x> { + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] + fn rematch_object<'b, 'g, 'r, 's>( + &'x self, + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, + ) -> Res, Object<'s>> { + let (remaining, children) = + _rematch_text_markup_object(_context, input, "/", &self.children)?; + let source = get_consumed(input, remaining); + Ok(( + remaining, + Object::Italic(Italic { + source: source.into(), + children, + }), + )) + } +} + +impl<'x> RematchObject<'x> for Underline<'x> { + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] + fn rematch_object<'b, 'g, 'r, 's>( + &'x self, + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, + ) -> Res, Object<'s>> { + let (remaining, children) = + _rematch_text_markup_object(_context, input, "_", &self.children)?; + let source = get_consumed(input, remaining); + Ok(( + remaining, + Object::Underline(Underline { + source: source.into(), + children, + }), + )) + } +} + +impl<'x> RematchObject<'x> for StrikeThrough<'x> { + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] + fn rematch_object<'b, 'g, 'r, 's>( + &'x self, + _context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, + ) -> Res, Object<'s>> { + let (remaining, children) = + _rematch_text_markup_object(_context, input, "+", &self.children)?; + let source = get_consumed(input, remaining); + Ok(( + remaining, + Object::StrikeThrough(StrikeThrough { + source: source.into(), + children, + }), + )) + } +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn _rematch_text_markup_object<'b, 'g, 'r, 's, 'x>( context: RefContext<'b, 'g, 'r, 's>, @@ -364,7 +435,7 @@ fn _rematch_text_markup_object<'b, 'g, 'r, 's, 'x>( let (remaining, _) = pre(context, input)?; let (remaining, open) = tag(marker_symbol)(remaining)?; let (remaining, _peek_not_whitespace) = peek(not(multispace1))(remaining)?; - let text_markup_end_specialized = text_markup_end(open.into()); + let text_markup_end_specialized = text_markup_end(open.into(), remaining.get_byte_offset()); let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { class: ExitClass::Gamma, exit_matcher: &text_markup_end_specialized, diff --git a/src/parser/timestamp.rs b/src/parser/timestamp.rs index c1fb5e2..1d4e50c 100644 --- a/src/parser/timestamp.rs +++ b/src/parser/timestamp.rs @@ -1,6 +1,7 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::anychar; +use nom::character::complete::digit0; use nom::character::complete::digit1; use nom::character::complete::one_of; use nom::character::complete::space1; @@ -414,7 +415,7 @@ fn repeater<'b, 'g, 'r, 's>( // ++ for catch-up type // .+ for restart type let (remaining, _mark) = alt((tag("++"), tag("+"), tag(".+")))(input)?; - let (remaining, _value) = digit1(remaining)?; + let (remaining, _value) = digit0(remaining)?; // h = hour, d = day, w = week, m = month, y = year let (remaining, _unit) = recognize(one_of("hdwmy"))(remaining)?; let source = get_consumed(input, remaining); @@ -429,7 +430,7 @@ fn warning_delay<'b, 'g, 'r, 's>( // - for all type // -- for first type let (remaining, _mark) = alt((tag("--"), tag("-")))(input)?; - let (remaining, _value) = digit1(remaining)?; + let (remaining, _value) = digit0(remaining)?; // h = hour, d = day, w = week, m = month, y = year let (remaining, _unit) = recognize(one_of("hdwmy"))(remaining)?; let source = get_consumed(input, remaining); diff --git a/src/parser/util.rs b/src/parser/util.rs index f7bfc10..7c5d733 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,4 +1,5 @@ use nom::branch::alt; +use nom::bytes::complete::is_a; use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::none_of; @@ -20,6 +21,7 @@ use crate::context::RefContext; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; +use crate::types::IndentationLevel; pub(crate) const WORD_CONSTITUENT_CHARACTERS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; @@ -212,6 +214,9 @@ fn text_until_eol<'r, 's>( Ok(line.trim()) } +/// Return a tuple of (input, output) from a nom parser. +/// +/// This is similar to recognize except it returns the input instead of the portion of the input that was consumed. pub(crate) fn include_input<'s, F, O>( mut inner: F, ) -> impl FnMut(OrgSource<'s>) -> Res, (OrgSource<'s>, O)> @@ -223,3 +228,50 @@ where Ok((remaining, (input, output))) } } + +/// Match at least one space character. +/// +/// This is similar to nom's space1 parser except space1 matches both spaces and tabs whereas this only matches spaces. +pub(crate) fn only_space1<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + is_a(" ")(input) +} + +/// Match single space or tab. +/// +/// In org-mode syntax, spaces and tabs are often (but not always!) interchangeable. +pub(crate) fn org_space<'s>(input: OrgSource<'s>) -> Res, char> { + one_of(" \t")(input) +} + +/// Matches a single space, tab, line ending, or end of file. +/// +/// In org-mode syntax there are often delimiters that could be any whitespace at all or the end of file. +pub(crate) fn org_space_or_line_ending<'s>( + input: OrgSource<'s>, +) -> Res, OrgSource<'s>> { + alt((recognize(org_space), org_line_ending))(input) +} + +/// Match a line break or the end of the file. +/// +/// In org-mode syntax, the end of the file can serve the same purpose as a line break syntactically. +pub(crate) fn org_line_ending<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { + alt((line_ending, eof))(input) +} + +/// Match the whitespace at the beginning of a line and give it an indentation level. +pub(crate) fn indentation_level<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, (IndentationLevel, OrgSource<'s>)> { + let (remaining, leading_whitespace) = space0(input)?; + let indentation_level = Into::<&str>::into(leading_whitespace) + .chars() + .map(|c| match c { + ' ' => 1, + '\t' => context.get_global_settings().tab_width, + _ => unreachable!(), + }) + .sum(); + Ok((remaining, (indentation_level, leading_whitespace))) +} diff --git a/src/types/document.rs b/src/types/document.rs index 142762d..1acc468 100644 --- a/src/types/document.rs +++ b/src/types/document.rs @@ -3,6 +3,7 @@ use super::Object; use super::Source; pub type PriorityCookie = u8; +pub type HeadlineLevel = u16; #[derive(Debug)] pub struct Document<'s> { @@ -14,7 +15,7 @@ pub struct Document<'s> { #[derive(Debug)] pub struct Heading<'s> { pub source: &'s str, - pub stars: usize, + pub level: HeadlineLevel, pub todo_keyword: Option<(TodoKeywordType, &'s str)>, pub priority_cookie: Option, pub title: Vec>, diff --git a/src/types/greater_element.rs b/src/types/greater_element.rs index e897945..dfbd904 100644 --- a/src/types/greater_element.rs +++ b/src/types/greater_element.rs @@ -10,15 +10,26 @@ pub struct PlainList<'s> { pub children: Vec>, } +/// The width that something is indented. For example, a single tab character could be a value of 4 or 8. +pub type IndentationLevel = u16; + #[derive(Debug)] pub struct PlainListItem<'s> { pub source: &'s str, - pub indentation: usize, + pub indentation: IndentationLevel, pub bullet: &'s str, + pub checkbox: Option<(CheckboxType, &'s str)>, pub tag: Vec>, pub children: Vec>, } +#[derive(Debug)] +pub enum CheckboxType { + On, + Trans, + Off, +} + #[derive(Debug)] pub struct GreaterBlock<'s> { pub source: &'s str, diff --git a/src/types/mod.rs b/src/types/mod.rs index f741024..af286cd 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -7,14 +7,17 @@ mod source; pub use document::Document; pub use document::DocumentElement; pub use document::Heading; +pub use document::HeadlineLevel; pub use document::PriorityCookie; pub use document::Section; pub use document::TodoKeywordType; pub use element::Element; +pub use greater_element::CheckboxType; pub use greater_element::Drawer; pub use greater_element::DynamicBlock; pub use greater_element::FootnoteDefinition; pub use greater_element::GreaterBlock; +pub use greater_element::IndentationLevel; pub use greater_element::NodeProperty; pub use greater_element::PlainList; pub use greater_element::PlainListItem;