15 Commits

Author SHA1 Message Date
Tom Alexander
503db94b2c Publish version 0.1.12.
All checks were successful
rustfmt Build rustfmt has succeeded
clippy Build clippy has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded
rust-build Build rust-build has succeeded
rust-test Build rust-test has succeeded
2023-10-18 19:01:43 -04:00
Tom Alexander
a4381e5e39 Merge branch 'keyword_constants' 2023-10-18 18:48:52 -04:00
Tom Alexander
e11de60def Clippy fixes.
All checks were successful
clippy Build clippy has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded
rust-build Build rust-build has succeeded
rust-test Build rust-test has succeeded
2023-10-18 18:39:04 -04:00
Tom Alexander
b2479e9de8 Remove Debug from the context variables.
Now that entities are stored in the settings struct, these variables are massive which makes them balloon trace sizes while being mostly unreadable. This removes Debug from them to serve as a static-analysis check that context is ALWAYS ignored in tracing calls.
2023-10-18 18:36:25 -04:00
Tom Alexander
49d1cef7ae Remove context from functions that no longer need it. 2023-10-18 18:28:24 -04:00
Tom Alexander
ba72cc1b29 The variables for keywords are actually constants.
These settings do not need to exist in GlobalSettings because they are actually constants in upstream Org-Mode.
2023-10-18 18:22:01 -04:00
Tom Alexander
c58b0e7c35 Add a script to dump an AST using docker.
All checks were successful
rust-test Build rust-test has succeeded
clippy Build clippy has succeeded
rustfmt Build rustfmt has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded
rust-build Build rust-build has succeeded
2023-10-18 15:39:52 -04:00
Tom Alexander
f19d262825 Merge branch 'bullshitium'
All checks were successful
rustfmt Build rustfmt has succeeded
clippy Build clippy has succeeded
rust-test Build rust-test has succeeded
rust-build Build rust-build has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded
2023-10-18 13:04:16 -04:00
Tom Alexander
68f3f2e159 Clippy fixes.
All checks were successful
clippy Build clippy has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded
rust-build Build rust-build has succeeded
rust-test Build rust-test has succeeded
2023-10-18 12:42:09 -04:00
Tom Alexander
269e23c1b1 No more expect-fail tests! 2023-10-18 12:41:12 -04:00
Tom Alexander
e111b8b9b8 Performance optimization. 2023-10-18 12:39:08 -04:00
Tom Alexander
353ff07420 Handle bullshitium for broken dynamic blocks. 2023-10-18 12:32:48 -04:00
Tom Alexander
94dec31130 Consuming trailing whitespace for 🔚 bullshitium. 2023-10-18 12:17:57 -04:00
Tom Alexander
cf5d3ed745 Add tests for the 🔚 bullshitium. 2023-10-18 11:59:55 -04:00
Tom Alexander
b0b287cd47 Handle bullshitium for 🔚. 2023-10-18 11:57:39 -04:00
19 changed files with 330 additions and 122 deletions

View File

@@ -2,7 +2,7 @@
[package]
name = "organic"
version = "0.1.11"
version = "0.1.12"
authors = ["Tom Alexander <tom@fizz.buzz>"]
description = "An org-mode parser."
edition = "2021"

View File

@@ -66,10 +66,6 @@ fn write_test(test_file: &mut File, test: &walkdir::DirEntry) {
}
#[cfg(feature = "compare")]
fn is_expect_fail(name: &str) -> Option<&str> {
match name {
"greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
"element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
_ => None,
}
fn is_expect_fail(_name: &str) -> Option<&str> {
None
}

View File

@@ -0,0 +1,3 @@
foo
:end:
bar

View File

@@ -0,0 +1,2 @@
foo
:end:

58
scripts/dump_ast.bash Executable file
View File

@@ -0,0 +1,58 @@
#!/usr/bin/env bash
#
# Dump the AST of an org-mode document from emacs
set -euo pipefail
IFS=$'\n\t'
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
REALPATH=$(command -v uu-realpath || command -v realpath)
MAKE=$(command -v gmake || command -v make)
############## Setup #########################
function die {
local status_code="$1"
shift
(>&2 echo "${@}")
exit "$status_code"
}
function log {
(>&2 echo "${@}")
}
############## Program #########################
function main {
if [ $# -eq 0 ]; then
dump_ast_stdin "${@}"
else
dump_ast_file "${@}"
fi
}
function dump_ast_stdin {
# Until we can find a good way to encode stdin as an elisp string in bash, I cannot operate on stdin.
die 1 "This script only works on files."
}
function dump_ast_file {
local target_file mounted_file elisp_script
target_file=$($REALPATH "$1")
mounted_file="/input${target_file}"
elisp_script=$(cat <<EOF
(progn
(erase-buffer)
(require 'org)
(defun org-table-align () t)
(find-file-read-only "${mounted_file}")
(org-mode)
(message "%s" (pp-to-string (org-element-parse-buffer)))
)
EOF
)
exec docker run --init --rm -i --mount type=tmpfs,destination=/tmp -v "/:/input:ro" --entrypoint "" organic-test emacs -q --no-site-file --no-splash --batch --eval "$elisp_script"
}
main "${@}"

View File

@@ -1,15 +1,27 @@
use super::global_settings::EntityDefinition;
pub(crate) const DEFAULT_ORG_ELEMENT_PARSED_KEYWORDS: [&str; 1] = ["CAPTION"];
/// Keywords that contain the standard set of objects (excluding footnote references).
///
/// Corresponds to org-element-parsed-keywords elisp variable.
pub(crate) const ORG_ELEMENT_PARSED_KEYWORDS: [&str; 1] = ["CAPTION"];
pub(crate) const DEFAULT_ORG_ELEMENT_DUAL_KEYWORDS: [&str; 2] = ["CAPTION", "RESULTS"];
/// Keywords that can have a secondary value in square brackets.
///
/// Corresponds to org-element-dual-keywords elisp variable.
pub(crate) const ORG_ELEMENT_DUAL_KEYWORDS: [&str; 2] = ["CAPTION", "RESULTS"];
pub(crate) const DEFAULT_ORG_ELEMENT_AFFILIATED_KEYWORDS: [&str; 13] = [
/// Keywords that can be affiliated with an element.
///
/// Corresponds to org-element-affiliated-keywords elisp variable.
pub(crate) const ORG_ELEMENT_AFFILIATED_KEYWORDS: [&str; 13] = [
"CAPTION", "DATA", "HEADER", "HEADERS", "LABEL", "NAME", "PLOT", "RESNAME", "RESULT",
"RESULTS", "SOURCE", "SRCNAME", "TBLNAME",
];
pub(crate) const DEFAULT_ORG_ELEMENT_KEYWORD_TRANSLATION_ALIST: [(&str, &str); 8] = [
/// Mapping of keyword names.
///
/// Corresponds to org-element-keyword-translation-alist elisp variable.
pub(crate) const ORG_ELEMENT_KEYWORD_TRANSLATION_ALIST: [(&str, &str); 8] = [
("DATA", "NAME"),
("LABEL", "NAME"),
("RESNAME", "NAME"),

View File

@@ -12,7 +12,6 @@ use crate::error::CustomError;
use crate::error::Res;
use crate::parser::OrgSource;
#[derive(Debug)]
pub(crate) enum ContextElement<'r, 's> {
/// Stores a parser that indicates that children should exit upon matching an exit matcher.
ExitMatcherNode(ExitMatcherNode<'r>),
@@ -34,15 +33,6 @@ pub(crate) struct ExitMatcherNode<'r> {
pub(crate) class: ExitClass,
}
impl<'r> std::fmt::Debug for ExitMatcherNode<'r> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut formatter = f.debug_struct("ExitMatcherNode");
formatter.field("class", &self.class.to_string());
formatter.finish()
}
}
#[derive(Debug)]
pub(crate) struct Context<'g, 'r, 's> {
global_settings: &'g GlobalSettings<'g, 's>,
tree: List<'r, &'r ContextElement<'r, 's>>,

View File

@@ -1,13 +1,7 @@
#[derive(Debug, Copy, Clone)]
#[derive(Copy, Clone)]
pub(crate) enum ExitClass {
Document = 1,
Alpha = 2,
Beta = 3,
Gamma = 4,
}
impl std::fmt::Display for ExitClass {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}

View File

@@ -1,17 +1,16 @@
use std::fmt::Debug;
use std::path::PathBuf;
#[cfg(any(feature = "compare", feature = "foreign_document_test"))]
pub trait FileAccessInterface: Sync + Debug {
pub trait FileAccessInterface: Sync {
fn read_file(&self, path: &str) -> Result<String, std::io::Error>;
}
#[cfg(not(any(feature = "compare", feature = "foreign_document_test")))]
pub trait FileAccessInterface: Debug {
pub trait FileAccessInterface {
fn read_file(&self, path: &str) -> Result<String, std::io::Error>;
}
#[derive(Debug, Clone)]
#[derive(Clone)]
pub struct LocalFileAccessInterface {
pub working_directory: Option<PathBuf>,
}

View File

@@ -5,16 +5,12 @@ use super::constants::DEFAULT_ORG_ENTITIES;
use super::constants::DEFAULT_ORG_LINK_PARAMETERS;
use super::FileAccessInterface;
use super::LocalFileAccessInterface;
use crate::context::constants::DEFAULT_ORG_ELEMENT_AFFILIATED_KEYWORDS;
use crate::context::constants::DEFAULT_ORG_ELEMENT_DUAL_KEYWORDS;
use crate::context::constants::DEFAULT_ORG_ELEMENT_KEYWORD_TRANSLATION_ALIST;
use crate::context::constants::DEFAULT_ORG_ELEMENT_PARSED_KEYWORDS;
use crate::types::IndentationLevel;
use crate::types::Object;
// TODO: Ultimately, I think we'll need most of this: https://orgmode.org/manual/In_002dbuffer-Settings.html
#[derive(Debug, Clone)]
#[derive(Clone)]
pub struct GlobalSettings<'g, 's> {
pub radio_targets: Vec<&'g Vec<Object<'s>>>,
pub file_access: &'g dyn FileAccessInterface,
@@ -58,26 +54,6 @@ pub struct GlobalSettings<'g, 's> {
///
/// Corresponds to org-entities elisp variable.
pub entities: &'g [EntityDefinition<'s>],
/// Keywords that contain the standard set of objects (excluding footnote references).
///
/// Corresponds to org-element-parsed-keywords elisp variable.
pub element_parsed_keywords: &'g [&'s str],
/// Keywords that can have a secondary value in square brackets.
///
/// Corresponds to org-element-dual-keywords elisp variable.
pub element_dual_keywords: &'g [&'s str],
/// Keywords that can be affiliated with an element.
///
/// Corresponds to org-element-affiliated-keywords elisp variable.
pub element_affiliated_keywords: &'g [&'s str],
/// Mapping of keyword names.
///
/// Corresponds to org-element-keyword-translation-alist elisp variable.
pub element_keyword_translation_alist: &'g [(&'s str, &'s str)],
}
pub const DEFAULT_TAB_WIDTH: IndentationLevel = 8;
@@ -112,10 +88,6 @@ impl<'g, 's> GlobalSettings<'g, 's> {
link_parameters: &DEFAULT_ORG_LINK_PARAMETERS,
link_templates: BTreeMap::new(),
entities: &DEFAULT_ORG_ENTITIES,
element_parsed_keywords: &DEFAULT_ORG_ELEMENT_PARSED_KEYWORDS,
element_dual_keywords: &DEFAULT_ORG_ELEMENT_DUAL_KEYWORDS,
element_affiliated_keywords: &DEFAULT_ORG_ELEMENT_AFFILIATED_KEYWORDS,
element_keyword_translation_alist: &DEFAULT_ORG_ELEMENT_KEYWORD_TRANSLATION_ALIST,
}
}
}
@@ -126,7 +98,7 @@ impl<'g, 's> Default for GlobalSettings<'g, 's> {
}
}
#[derive(Debug, Clone, PartialEq, Default)]
#[derive(Clone, PartialEq, Default)]
pub enum HeadlineLevelFilter {
Odd,

View File

@@ -1,7 +1,7 @@
use crate::error::Res;
use crate::parser::OrgSource;
mod constants;
pub(crate) mod constants;
#[allow(clippy::module_inception)]
mod context;
mod exiting;

View File

@@ -19,29 +19,27 @@ use super::object_parser::standard_set_object;
use super::util::confine_context;
use super::OrgSource;
use crate::context::bind_context;
use crate::context::constants::ORG_ELEMENT_DUAL_KEYWORDS;
use crate::context::constants::ORG_ELEMENT_KEYWORD_TRANSLATION_ALIST;
use crate::context::constants::ORG_ELEMENT_PARSED_KEYWORDS;
use crate::context::Context;
use crate::context::ContextElement;
use crate::context::GlobalSettings;
use crate::context::List;
use crate::context::RefContext;
use crate::error::Res;
use crate::types::AffiliatedKeywordValue;
use crate::types::AffiliatedKeywords;
use crate::types::Keyword;
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(context))
)]
pub(crate) fn affiliated_keywords<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn affiliated_keywords<'s>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Vec<Keyword<'s>>> {
let mut ret = Vec::new();
let mut remaining = input;
loop {
let result = affiliated_keyword(context, remaining);
let result = affiliated_keyword(remaining);
match result {
Ok((remain, kw)) => {
remaining = remain;
@@ -65,8 +63,8 @@ where
{
let mut ret = BTreeMap::new();
for kw in input {
let translated_name = translate_name(global_settings, kw.key);
let keyword_type = identify_keyword_type(global_settings, translated_name.as_str());
let translated_name = translate_name(kw.key);
let keyword_type = identify_keyword_type(translated_name.as_str());
match keyword_type {
AffiliatedKeywordType::SingleString => {
ret.insert(
@@ -151,12 +149,12 @@ where
AffiliatedKeywords { keywords: ret }
}
fn translate_name<'g, 's>(global_settings: &'g GlobalSettings<'g, 's>, name: &'s str) -> String {
fn translate_name(name: &str) -> String {
let name_until_optval = name
.split_once('[')
.map(|(before, _after)| before)
.unwrap_or(name);
for (src, dst) in global_settings.element_keyword_translation_alist {
for (src, dst) in ORG_ELEMENT_KEYWORD_TRANSLATION_ALIST {
if name_until_optval.eq_ignore_ascii_case(src) {
return dst.to_lowercase();
}
@@ -171,20 +169,15 @@ enum AffiliatedKeywordType {
ObjectTree,
}
fn identify_keyword_type<'g, 's>(
global_settings: &'g GlobalSettings<'g, 's>,
name: &'s str,
) -> AffiliatedKeywordType {
fn identify_keyword_type(name: &str) -> AffiliatedKeywordType {
let is_multiple = ["CAPTION", "HEADER"]
.into_iter()
.any(|candidate| name.eq_ignore_ascii_case(candidate))
|| name.to_lowercase().starts_with("attr_");
let is_parsed = global_settings
.element_parsed_keywords
let is_parsed = ORG_ELEMENT_PARSED_KEYWORDS
.iter()
.any(|candidate| name.eq_ignore_ascii_case(candidate));
let can_have_optval = global_settings
.element_dual_keywords
let can_have_optval = ORG_ELEMENT_DUAL_KEYWORDS
.iter()
.any(|candidate| name.eq_ignore_ascii_case(candidate));
match (is_multiple, is_parsed, can_have_optval) {

165
src/parser/bullshitium.rs Normal file
View File

@@ -0,0 +1,165 @@
use nom::branch::alt;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::anychar;
use nom::character::complete::space0;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::paragraph::paragraph;
use super::util::maybe_consume_trailing_whitespace_if_not_exiting;
use super::util::org_line_ending;
use super::util::start_of_line;
use super::OrgSource;
use crate::context::bind_context;
use crate::context::RefContext;
use crate::error::CustomError;
use crate::error::Res;
use crate::parser::macros::element;
use crate::types::AffiliatedKeywords;
use crate::types::Object;
use crate::types::Paragraph;
use crate::types::PlainText;
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(context))
)]
pub(crate) fn bullshitium<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Paragraph<'s>> {
alt((
bind_context!(broken_end, context),
bind_context!(broken_dynamic_block, context),
))(input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(context))
)]
pub(crate) fn detect_bullshitium<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
element!(detect_broken_end, context, input);
element!(detect_broken_dynamic_block, context, input);
Err(nom::Err::Error(CustomError::Static("No bullshitium.")))
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(context))
)]
pub(crate) fn broken_end<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Paragraph<'s>> {
start_of_line(input)?;
let (remaining, _) = space0(input)?;
let (remaining, _) = tag_no_case(":end:")(remaining)?;
let (lead_in_remaining, _) = tuple((space0, org_line_ending))(remaining)?;
if let Ok((remaining, mut paragraph)) =
paragraph(std::iter::empty(), lead_in_remaining, context, input)
{
match paragraph.children.first_mut() {
Some(Object::PlainText(plain_text)) => {
plain_text.source = input.get_until_end_of_str(plain_text.source).into();
}
Some(obj) => {
panic!("Unhandled first object type inside bullshitium {:?}", obj);
}
None => {
unreachable!("Paragraph must have children.");
}
};
Ok((remaining, paragraph))
} else {
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, lead_in_remaining)?;
Ok((
remaining,
Paragraph {
source: input.get_until(remaining).into(),
affiliated_keywords: AffiliatedKeywords::default(),
children: vec![Object::PlainText(PlainText {
source: input.get_until(lead_in_remaining).into(),
})],
},
))
}
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(_context))
)]
pub(crate) fn detect_broken_end<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
start_of_line(input)?;
let (remaining, _) = space0(input)?;
let (remaining, _) = tag_no_case(":end:")(remaining)?;
let (_remaining, _) = tuple((space0, org_line_ending))(remaining)?;
Ok((input, ()))
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(context))
)]
pub(crate) fn broken_dynamic_block<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Paragraph<'s>> {
start_of_line(input)?;
let (remaining, _) = space0(input)?;
let (remaining, _) = tag_no_case("#+BEGIN:")(remaining)?;
let (lead_in_remaining, _) = many_till(anychar, org_line_ending)(remaining)?;
if let Ok((remaining, mut paragraph)) =
paragraph(std::iter::empty(), lead_in_remaining, context, input)
{
match paragraph.children.first_mut() {
Some(Object::PlainText(plain_text)) => {
plain_text.source = input.get_until_end_of_str(plain_text.source).into();
}
Some(obj) => {
panic!("Unhandled first object type inside bullshitium {:?}", obj);
}
None => {
unreachable!("Paragraph must have children.");
}
};
Ok((remaining, paragraph))
} else {
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, lead_in_remaining)?;
Ok((
remaining,
Paragraph {
source: input.get_until(remaining).into(),
affiliated_keywords: AffiliatedKeywords::default(),
children: vec![Object::PlainText(PlainText {
source: input.get_until(lead_in_remaining).into(),
})],
},
))
}
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(_context))
)]
pub(crate) fn detect_broken_dynamic_block<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
start_of_line(input)?;
let (remaining, _) = space0(input)?;
let (_remaining, _) = tag_no_case("#+BEGIN:")(remaining)?;
Ok((input, ()))
}

View File

@@ -32,6 +32,8 @@ use crate::event_count::record_event;
#[cfg(feature = "event_count")]
use crate::event_count::EventType;
use crate::parser::affiliated_keyword::affiliated_keywords;
use crate::parser::bullshitium::bullshitium;
use crate::parser::bullshitium::detect_bullshitium;
use crate::parser::macros::ak_element;
use crate::parser::macros::element;
use crate::parser::table::org_mode_table;
@@ -57,8 +59,7 @@ fn _element<'b, 'g, 'r, 's>(
) -> Res<OrgSource<'s>, Element<'s>> {
#[cfg(feature = "event_count")]
record_event(EventType::ElementStart, input);
let (post_affiliated_keywords_input, affiliated_keywords) =
affiliated_keywords(context, input)?;
let (post_affiliated_keywords_input, affiliated_keywords) = affiliated_keywords(input)?;
let mut affiliated_keywords = affiliated_keywords.into_iter();
@@ -242,6 +243,9 @@ fn _element<'b, 'g, 'r, 's>(
);
if can_be_paragraph {
// Fake paragraphs
element!(bullshitium, context, input, Element::Paragraph);
// Paragraph without affiliated keyword
ak_element!(
paragraph,
@@ -272,8 +276,7 @@ fn _detect_element<'b, 'g, 'r, 's>(
input: OrgSource<'s>,
can_be_paragraph: bool,
) -> Res<OrgSource<'s>, ()> {
let (post_affiliated_keywords_input, affiliated_keywords) =
affiliated_keywords(context, input)?;
let (post_affiliated_keywords_input, affiliated_keywords) = affiliated_keywords(input)?;
let mut affiliated_keywords = affiliated_keywords.into_iter();
@@ -319,6 +322,11 @@ fn _detect_element<'b, 'g, 'r, 's>(
input
);
// Fake paragraphs
if !can_be_paragraph {
element!(detect_bullshitium, context, input);
}
if _element(context, input, can_be_paragraph).is_ok() {
return Ok((input, ()));
}

View File

@@ -84,7 +84,10 @@ fn in_buffer_settings_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSou
))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "debug", skip(original_settings))
)]
pub(crate) fn apply_in_buffer_settings<'g, 's, 'sf>(
keywords: Vec<Keyword<'sf>>,
original_settings: &'g GlobalSettings<'g, 's>,

View File

@@ -21,7 +21,8 @@ use super::org_source::OrgSource;
use super::util::get_consumed;
use super::util::maybe_consume_trailing_whitespace_if_not_exiting;
use super::util::org_line_ending;
use crate::context::bind_context;
use crate::context::constants::ORG_ELEMENT_AFFILIATED_KEYWORDS;
use crate::context::constants::ORG_ELEMENT_DUAL_KEYWORDS;
use crate::context::RefContext;
use crate::error::CustomError;
use crate::error::Res;
@@ -98,11 +99,8 @@ where
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn affiliated_keyword<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Keyword<'s>> {
filtered_keyword(bind_context!(affiliated_key, context))(input)
pub(crate) fn affiliated_keyword<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, Keyword<'s>> {
filtered_keyword(affiliated_key)(input)
}
#[cfg_attr(
@@ -137,24 +135,18 @@ fn regular_keyword_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn affiliated_key<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
element!(dual_affiliated_key, context, input);
element!(plain_affiliated_key, context, input);
fn affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
element!(dual_affiliated_key, input);
element!(plain_affiliated_key, input);
element!(export_keyword, input);
Err(nom::Err::Error(CustomError::Static("No affiliated key.")))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn plain_affiliated_key<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
for keyword in context.get_global_settings().element_affiliated_keywords {
fn plain_affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
for keyword in ORG_ELEMENT_AFFILIATED_KEYWORDS {
let result = map(
tuple((tag_no_case::<_, _, CustomError>(*keyword), peek(tag(":")))),
tuple((tag_no_case::<_, _, CustomError>(keyword), peek(tag(":")))),
|(key, _)| key,
)(input);
if let Ok((remaining, ent)) = result {
@@ -166,13 +158,10 @@ fn plain_affiliated_key<'b, 'g, 'r, 's>(
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn dual_affiliated_key<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
for keyword in context.get_global_settings().element_dual_keywords {
fn dual_affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
for keyword in ORG_ELEMENT_DUAL_KEYWORDS {
let result = recognize(tuple((
tag_no_case::<_, _, CustomError>(*keyword),
tag_no_case::<_, _, CustomError>(keyword),
tag("["),
optval,
tag("]"),
@@ -232,19 +221,12 @@ mod tests {
use test::Bencher;
use super::*;
use crate::context::Context;
use crate::context::ContextElement;
use crate::context::GlobalSettings;
use crate::context::List;
use crate::parser::OrgSource;
#[bench]
fn bench_affiliated_keyword(b: &mut Bencher) {
let input = OrgSource::new("#+CAPTION[*foo*]: bar *baz*");
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
b.iter(|| assert!(affiliated_keyword(&initial_context, input).is_ok()));
b.iter(|| assert!(affiliated_keyword(input).is_ok()));
}
}

View File

@@ -1,6 +1,7 @@
mod affiliated_keyword;
mod angle_link;
mod babel_call;
mod bullshitium;
mod citation;
mod citation_reference;
mod clock;

View File

@@ -82,6 +82,15 @@ impl<'s> OrgSource<'s> {
self.slice(..(other.end - self.start))
}
pub(crate) fn get_until_end_of_str(&self, other: &'s str) -> OrgSource<'s> {
let full_source_start = self.full_source.as_ptr() as usize;
let other_start = other.as_ptr() as usize - full_source_start;
let other_end = other_start + other.len();
debug_assert!(other_start >= self.start);
debug_assert!(other_end <= self.end);
self.slice(..(other_end - self.start))
}
pub(crate) fn get_start_of_line(&self) -> OrgSource<'s> {
let skipped_text = self.text_since_line_break();
let mut bracket_depth = self.bracket_depth;

View File

@@ -2,7 +2,7 @@ use std::borrow::Cow;
/// Removes all whitespace from a string if any line breaks are present.
///
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar".
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar".
pub(crate) fn remove_whitespace_if_line_break(input: &str) -> Cow<'_, str> {
let mut state = RemoveWhitespaceIfLineBreakState::Normal;
for (offset, c) in input.char_indices() {
@@ -78,7 +78,7 @@ enum RemoveLineBreakState {
/// Removes all whitespace from a string if any line breaks are present.
///
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar".
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foo bar".
pub(crate) fn coalesce_whitespace_if_line_break(input: &str) -> Cow<'_, str> {
let mut state = CoalesceWhitespaceIfLineBreakState::Normal;
for (offset, c) in input.char_indices() {
@@ -515,4 +515,25 @@ mod tests {
assert!(matches!(output, Cow::Borrowed(_)));
Ok(())
}
#[test]
fn test_coalesce_whitespace_if_line_break() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(coalesce_whitespace_if_line_break("foo bar"), "foo bar");
assert_eq!(coalesce_whitespace_if_line_break("foo \n bar"), "foo bar");
Ok(())
}
#[test]
fn test_remove_whitespace_if_line_break() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(remove_whitespace_if_line_break("foo bar"), "foo bar");
assert_eq!(remove_whitespace_if_line_break("foo \n bar"), "foobar");
Ok(())
}
#[test]
fn test_remove_line_break() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(remove_line_break("foo bar"), "foo bar");
assert_eq!(remove_line_break("foo \n bar"), "foo bar");
Ok(())
}
}