Compare commits
14 Commits
b48d472546
...
v0.1.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
28aca041f7 | ||
|
|
d82def2a70 | ||
|
|
d471f7178b | ||
|
|
2c5c26c55f | ||
|
|
7944659802 | ||
|
|
58aca53144 | ||
|
|
6f2d90162b | ||
|
|
f170a557ed | ||
|
|
eaa38ce772 | ||
|
|
a6d742a536 | ||
|
|
45be9e7bde | ||
|
|
f6c895319f | ||
|
|
2682779534 | ||
|
|
ea6faf728c |
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "organic"
|
name = "organic"
|
||||||
version = "0.1.2"
|
version = "0.1.3"
|
||||||
authors = ["Tom Alexander <tom@fizz.buzz>"]
|
authors = ["Tom Alexander <tom@fizz.buzz>"]
|
||||||
description = "An org-mode parser."
|
description = "An org-mode parser."
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
@@ -14,7 +14,8 @@ include = [
|
|||||||
"LICENSE",
|
"LICENSE",
|
||||||
"**/*.rs",
|
"**/*.rs",
|
||||||
"Cargo.toml",
|
"Cargo.toml",
|
||||||
"tests/*"
|
"tests/*",
|
||||||
|
"org_mode_samples/"
|
||||||
]
|
]
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
@@ -40,7 +41,7 @@ tracing-subscriber = { version = "0.3.17", optional = true, features = ["env-fil
|
|||||||
walkdir = "2.3.3"
|
walkdir = "2.3.3"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["compare"]
|
default = []
|
||||||
compare = []
|
compare = []
|
||||||
tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"]
|
tracing = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry-semantic-conventions", "dep:tokio", "dep:tracing", "dep:tracing-opentelemetry", "dep:tracing-subscriber"]
|
||||||
|
|
||||||
|
|||||||
1
build.rs
1
build.rs
@@ -75,6 +75,7 @@ fn is_expect_fail(name: &str) -> Option<&str> {
|
|||||||
"autogen_greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
|
"autogen_greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
|
||||||
"autogen_element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
|
"autogen_element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
|
||||||
"autogen_lesser_element_paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
|
"autogen_lesser_element_paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
|
||||||
|
"autogen_unicode_hearts" => Some("Unicode is coming out of emacs strange."),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
- foo ::
|
||||||
|
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
- foo :: bar
|
||||||
|
- cat ::
|
||||||
|
dog
|
||||||
|
- lorem
|
||||||
|
:: ipsum
|
||||||
|
-
|
||||||
|
lorem :: ipsum
|
||||||
|
- dolar *bold* foo :: ipsum
|
||||||
|
- big gap ::
|
||||||
|
|
||||||
|
stuff
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
** foo
|
||||||
|
:PROPERTIES:
|
||||||
|
:DESCRIPTION: lorem
|
||||||
|
:ALT_TITLE: ipsum
|
||||||
|
:END:
|
||||||
|
|
||||||
|
bar
|
||||||
1
org_mode_samples/unicode/hearts.org
Normal file
1
org_mode_samples/unicode/hearts.org
Normal file
@@ -0,0 +1 @@
|
|||||||
|
🧡💛💚💙💜
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -51,7 +51,7 @@ pub fn assert_bounds<'s, S: Source<'s>>(
|
|||||||
);
|
);
|
||||||
let (rust_begin, rust_end) = get_offsets(source, rust);
|
let (rust_begin, rust_end) = get_offsets(source, rust);
|
||||||
if (rust_begin + 1) != begin || (rust_end + 1) != end {
|
if (rust_begin + 1) != begin || (rust_end + 1) != end {
|
||||||
Err(format!("Rust bounds ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?;
|
Err(format!("Rust bounds (in bytes) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -137,3 +137,23 @@ fn maybe_token_to_usize(
|
|||||||
.flatten() // Outer option is whether or not the param exists, inner option is whether or not it is nil
|
.flatten() // Outer option is whether or not the param exists, inner option is whether or not it is nil
|
||||||
.map_or(Ok(None), |r| r.map(Some))?)
|
.map_or(Ok(None), |r| r.map(Some))?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_property<'s, 'x>(
|
||||||
|
emacs: &'s Token<'s>,
|
||||||
|
key: &'x str,
|
||||||
|
) -> Result<Option<&'s Token<'s>>, Box<dyn std::error::Error>> {
|
||||||
|
let children = emacs.as_list()?;
|
||||||
|
let attributes_child = children
|
||||||
|
.iter()
|
||||||
|
.nth(1)
|
||||||
|
.ok_or("Should have an attributes child.")?;
|
||||||
|
let attributes_map = attributes_child.as_map()?;
|
||||||
|
let prop = attributes_map
|
||||||
|
.get(key)
|
||||||
|
.ok_or(format!("Missing {} attribute.", key))?;
|
||||||
|
match prop.as_atom() {
|
||||||
|
Ok("nil") => return Ok(None),
|
||||||
|
_ => {}
|
||||||
|
};
|
||||||
|
Ok(Some(*prop))
|
||||||
|
}
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ fn run_compare<P: AsRef<str>>(org_contents: P) -> Result<(), Box<dyn std::error:
|
|||||||
eprintln!(
|
eprintln!(
|
||||||
"This program was built with compare disabled. Only parsing with organic, not comparing."
|
"This program was built with compare disabled. Only parsing with organic, not comparing."
|
||||||
);
|
);
|
||||||
let (remaining, rust_parsed) = document(org_contents.as_ref()).map_err(|e| e.to_string())?;
|
let (_remaining, rust_parsed) = document(org_contents.as_ref()).map_err(|e| e.to_string())?;
|
||||||
println!("{:#?}", rust_parsed);
|
println!("{:#?}", rust_parsed);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use super::element::Element;
|
use super::element::Element;
|
||||||
use super::lesser_element::TableCell;
|
use super::lesser_element::TableCell;
|
||||||
use super::source::Source;
|
use super::source::Source;
|
||||||
|
use super::Object;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct PlainList<'s> {
|
pub struct PlainList<'s> {
|
||||||
@@ -13,6 +14,7 @@ pub struct PlainListItem<'s> {
|
|||||||
pub source: &'s str,
|
pub source: &'s str,
|
||||||
pub indentation: usize,
|
pub indentation: usize,
|
||||||
pub bullet: &'s str,
|
pub bullet: &'s str,
|
||||||
|
pub tag: Vec<Object<'s>>,
|
||||||
pub children: Vec<Element<'s>>,
|
pub children: Vec<Element<'s>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,19 +6,24 @@ use nom::character::complete::one_of;
|
|||||||
use nom::character::complete::space0;
|
use nom::character::complete::space0;
|
||||||
use nom::character::complete::space1;
|
use nom::character::complete::space1;
|
||||||
use nom::combinator::eof;
|
use nom::combinator::eof;
|
||||||
|
use nom::combinator::not;
|
||||||
use nom::combinator::opt;
|
use nom::combinator::opt;
|
||||||
|
use nom::combinator::peek;
|
||||||
use nom::combinator::recognize;
|
use nom::combinator::recognize;
|
||||||
use nom::combinator::verify;
|
use nom::combinator::verify;
|
||||||
|
use nom::multi::many0;
|
||||||
use nom::multi::many1;
|
use nom::multi::many1;
|
||||||
use nom::multi::many_till;
|
use nom::multi::many_till;
|
||||||
use nom::sequence::tuple;
|
use nom::sequence::tuple;
|
||||||
|
|
||||||
use super::greater_element::PlainList;
|
use super::greater_element::PlainList;
|
||||||
use super::greater_element::PlainListItem;
|
use super::greater_element::PlainListItem;
|
||||||
|
use super::object_parser::standard_set_object;
|
||||||
use super::org_source::OrgSource;
|
use super::org_source::OrgSource;
|
||||||
use super::parser_with_context::parser_with_context;
|
use super::parser_with_context::parser_with_context;
|
||||||
use super::util::non_whitespace_character;
|
use super::util::non_whitespace_character;
|
||||||
use super::Context;
|
use super::Context;
|
||||||
|
use super::Object;
|
||||||
use crate::error::CustomError;
|
use crate::error::CustomError;
|
||||||
use crate::error::MyError;
|
use crate::error::MyError;
|
||||||
use crate::error::Res;
|
use crate::error::Res;
|
||||||
@@ -138,7 +143,6 @@ pub fn plain_list_item<'r, 's>(
|
|||||||
Into::<&str>::into(bull) != "*" || indent_level > 0
|
Into::<&str>::into(bull) != "*" || indent_level > 0
|
||||||
})(remaining)?;
|
})(remaining)?;
|
||||||
|
|
||||||
// TODO: This isn't taking into account items that immediately line break and then have contents
|
|
||||||
let maybe_contentless_item: Res<OrgSource<'_>, OrgSource<'_>> = eof(remaining);
|
let maybe_contentless_item: Res<OrgSource<'_>, OrgSource<'_>> = eof(remaining);
|
||||||
match maybe_contentless_item {
|
match maybe_contentless_item {
|
||||||
Ok((rem, _ws)) => {
|
Ok((rem, _ws)) => {
|
||||||
@@ -149,6 +153,7 @@ pub fn plain_list_item<'r, 's>(
|
|||||||
source: source.into(),
|
source: source.into(),
|
||||||
indentation: indent_level,
|
indentation: indent_level,
|
||||||
bullet: bull.into(),
|
bullet: bull.into(),
|
||||||
|
tag: Vec::new(),
|
||||||
children: Vec::new(),
|
children: Vec::new(),
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
@@ -156,7 +161,12 @@ pub fn plain_list_item<'r, 's>(
|
|||||||
Err(_) => {}
|
Err(_) => {}
|
||||||
};
|
};
|
||||||
|
|
||||||
let (remaining, _ws) = alt((space1, line_ending))(remaining)?;
|
let (remaining, maybe_tag) = opt(tuple((
|
||||||
|
space1,
|
||||||
|
parser_with_context!(item_tag)(context),
|
||||||
|
tag(" ::"),
|
||||||
|
)))(remaining)?;
|
||||||
|
let (remaining, _ws) = item_tag_post_gap(context, remaining)?;
|
||||||
let exit_matcher = plain_list_item_end(indent_level);
|
let exit_matcher = plain_list_item_end(indent_level);
|
||||||
let parser_context = context
|
let parser_context = context
|
||||||
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
|
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
|
||||||
@@ -167,7 +177,10 @@ pub fn plain_list_item<'r, 's>(
|
|||||||
|
|
||||||
let (remaining, (children, _exit_contents)) = many_till(
|
let (remaining, (children, _exit_contents)) = many_till(
|
||||||
parser_with_context!(element(true))(&parser_context),
|
parser_with_context!(element(true))(&parser_context),
|
||||||
|
alt((
|
||||||
|
peek(recognize(tuple((start_of_line, many0(blank_line), eof)))),
|
||||||
parser_with_context!(exit_matcher_parser)(&parser_context),
|
parser_with_context!(exit_matcher_parser)(&parser_context),
|
||||||
|
)),
|
||||||
)(remaining)?;
|
)(remaining)?;
|
||||||
|
|
||||||
let (remaining, _trailing_ws) =
|
let (remaining, _trailing_ws) =
|
||||||
@@ -180,6 +193,9 @@ pub fn plain_list_item<'r, 's>(
|
|||||||
source: source.into(),
|
source: source.into(),
|
||||||
indentation: indent_level,
|
indentation: indent_level,
|
||||||
bullet: bull.into(),
|
bullet: bull.into(),
|
||||||
|
tag: maybe_tag
|
||||||
|
.map(|(_ws, item_tag, _divider)| item_tag)
|
||||||
|
.unwrap_or(Vec::new()),
|
||||||
children,
|
children,
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
@@ -262,6 +278,60 @@ fn _line_indented_lte<'r, 's>(
|
|||||||
Ok(matched)
|
Ok(matched)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||||
|
fn item_tag<'r, 's>(
|
||||||
|
context: Context<'r, 's>,
|
||||||
|
input: OrgSource<'s>,
|
||||||
|
) -> Res<OrgSource<'s>, Vec<Object<'s>>> {
|
||||||
|
let parser_context =
|
||||||
|
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||||
|
class: ExitClass::Gamma,
|
||||||
|
exit_matcher: &item_tag_end,
|
||||||
|
}));
|
||||||
|
let (remaining, (children, _exit_contents)) = verify(
|
||||||
|
many_till(
|
||||||
|
// TODO: Should this be using a different set like the minimal set?
|
||||||
|
parser_with_context!(standard_set_object)(&parser_context),
|
||||||
|
parser_with_context!(exit_matcher_parser)(&parser_context),
|
||||||
|
),
|
||||||
|
|(children, _exit_contents)| !children.is_empty(),
|
||||||
|
)(input)?;
|
||||||
|
Ok((remaining, children))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||||
|
fn item_tag_end<'r, 's>(
|
||||||
|
_context: Context<'r, 's>,
|
||||||
|
input: OrgSource<'s>,
|
||||||
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||||
|
recognize(alt((
|
||||||
|
line_ending,
|
||||||
|
tag(" :: "),
|
||||||
|
recognize(tuple((tag(" ::"), alt((line_ending, eof))))),
|
||||||
|
)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||||
|
fn item_tag_post_gap<'r, 's>(
|
||||||
|
context: Context<'r, 's>,
|
||||||
|
input: OrgSource<'s>,
|
||||||
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
||||||
|
verify(
|
||||||
|
recognize(tuple((
|
||||||
|
alt((blank_line, space0)),
|
||||||
|
many_till(
|
||||||
|
blank_line,
|
||||||
|
alt((
|
||||||
|
peek(recognize(not(blank_line))),
|
||||||
|
peek(recognize(tuple((many0(blank_line), eof)))),
|
||||||
|
parser_with_context!(exit_matcher_parser)(context),
|
||||||
|
)),
|
||||||
|
),
|
||||||
|
))),
|
||||||
|
|gap| gap.len() > 0,
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@@ -2,13 +2,14 @@ use nom::branch::alt;
|
|||||||
use nom::bytes::complete::is_not;
|
use nom::bytes::complete::is_not;
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
use nom::bytes::complete::tag_no_case;
|
use nom::bytes::complete::tag_no_case;
|
||||||
|
use nom::character::complete::anychar;
|
||||||
use nom::character::complete::line_ending;
|
use nom::character::complete::line_ending;
|
||||||
use nom::character::complete::space0;
|
use nom::character::complete::space0;
|
||||||
use nom::character::complete::space1;
|
use nom::character::complete::space1;
|
||||||
use nom::combinator::eof;
|
use nom::combinator::eof;
|
||||||
use nom::combinator::map;
|
|
||||||
use nom::combinator::opt;
|
use nom::combinator::opt;
|
||||||
use nom::combinator::recognize;
|
use nom::combinator::recognize;
|
||||||
|
use nom::combinator::verify;
|
||||||
use nom::multi::many_till;
|
use nom::multi::many_till;
|
||||||
use nom::sequence::tuple;
|
use nom::sequence::tuple;
|
||||||
|
|
||||||
@@ -23,7 +24,6 @@ use crate::parser::greater_element::PropertyDrawer;
|
|||||||
use crate::parser::parser_context::ContextElement;
|
use crate::parser::parser_context::ContextElement;
|
||||||
use crate::parser::parser_context::ExitMatcherNode;
|
use crate::parser::parser_context::ExitMatcherNode;
|
||||||
use crate::parser::parser_with_context::parser_with_context;
|
use crate::parser::parser_with_context::parser_with_context;
|
||||||
use crate::parser::plain_text::plain_text;
|
|
||||||
use crate::parser::util::exit_matcher_parser;
|
use crate::parser::util::exit_matcher_parser;
|
||||||
use crate::parser::util::get_consumed;
|
use crate::parser::util::get_consumed;
|
||||||
use crate::parser::util::immediate_in_section;
|
use crate::parser::util::immediate_in_section;
|
||||||
@@ -147,11 +147,16 @@ fn node_property_name<'r, 's>(
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
let (remaining, name) = recognize(tuple((
|
let (remaining, name) = recognize(tuple((
|
||||||
map(parser_with_context!(plain_text)(&parser_context), |pt| {
|
verify(
|
||||||
pt.source
|
many_till(
|
||||||
}),
|
anychar,
|
||||||
|
parser_with_context!(exit_matcher_parser)(&parser_context),
|
||||||
|
),
|
||||||
|
|(children, _exit_contents)| !children.is_empty(),
|
||||||
|
),
|
||||||
opt(tag("+")),
|
opt(tag("+")),
|
||||||
)))(input)?;
|
)))(input)?;
|
||||||
|
|
||||||
Ok((remaining, name))
|
Ok((remaining, name))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user