Rename blog_post module to intermediate.

This module is mostly the intermediate representation of the AST, so the renaming is to make that more clear. The three forms are parsed => intermediate => render.

Parsed comes from Organic and is a direct translation of the org-mode text.

Intermediate converts the parsed data into owned values and does any calculations that are needed on the data (for example: assigning numbers to footnotes.)

Render takes intermediate and translates it into the format expected by the dust templates. The processing in this step should be minimal since all the logic should be in the intermediate step.
This commit is contained in:
Tom Alexander
2023-10-27 13:05:34 -04:00
parent 1ac39c2a6f
commit e3b5f7f74f
20 changed files with 64 additions and 64 deletions

168
src/intermediate/convert.rs Normal file
View File

@@ -0,0 +1,168 @@
use std::path::Component;
use std::path::Path;
use std::path::PathBuf;
use crate::config::Config;
use crate::context::GlobalSettings;
use crate::context::RenderBlogPostPage;
use crate::context::RenderDocumentElement;
use crate::context::RenderHeading;
use crate::context::RenderSection;
use crate::error::CustomError;
use super::BlogPost;
use super::BlogPostPage;
use super::IDocumentElement;
pub(crate) fn convert_blog_post_page_to_render_context<D: AsRef<Path>, F: AsRef<Path>>(
config: &Config,
output_directory: D,
output_file: F,
_post: &BlogPost,
page: &BlogPostPage,
) -> Result<RenderBlogPostPage, CustomError> {
let output_directory = output_directory.as_ref();
let output_file = output_file.as_ref();
let css_files = vec![get_web_path(
config,
output_directory,
output_file,
"main.css",
)?];
let js_files = vec![get_web_path(
config,
output_directory,
output_file,
"blog_post.js",
)?];
let global_settings = GlobalSettings::new(page.title.clone(), css_files, js_files);
let link_to_blog_post = get_web_path(
config,
output_directory,
output_file,
output_file.strip_prefix(output_directory)?,
)?;
let children = {
let mut children = Vec::new();
for child in page.children.iter() {
match child {
IDocumentElement::Heading(heading) => {
children.push(RenderDocumentElement::Heading(RenderHeading::new(
config,
output_directory,
output_file,
heading,
)?));
}
IDocumentElement::Section(section) => {
children.push(RenderDocumentElement::Section(RenderSection::new(
config,
output_directory,
output_file,
section,
)?));
}
}
}
children
};
let ret = RenderBlogPostPage::new(
global_settings,
page.title.clone(),
Some(link_to_blog_post),
children,
);
Ok(ret)
}
fn get_web_path<D: AsRef<Path>, F: AsRef<Path>, P: AsRef<Path>>(
config: &Config,
output_directory: D,
containing_file: F,
path_from_web_root: P,
) -> Result<String, CustomError> {
let path_from_web_root = path_from_web_root.as_ref();
if config.use_relative_paths() {
let output_directory = output_directory.as_ref();
let containing_file = containing_file.as_ref();
let containing_file_relative_to_output_directory =
containing_file.strip_prefix(output_directory)?;
let shared_stem = get_shared_steps(
containing_file_relative_to_output_directory
.parent()
.ok_or("File should exist in a folder.")?,
path_from_web_root
.parent()
.ok_or("File should exist in a folder.")?,
)
.collect::<PathBuf>();
// Subtracting 1 from the depth to "remove" the file name.
let depth_from_shared_stem = containing_file_relative_to_output_directory
.strip_prefix(&shared_stem)?
.components()
.count()
- 1;
let final_path = PathBuf::from("../".repeat(depth_from_shared_stem))
.join(path_from_web_root.strip_prefix(shared_stem)?);
let final_string = final_path
.as_path()
.to_str()
.map(str::to_string)
.ok_or("Path should be valid utf-8.")?;
Ok(final_string)
} else {
let web_root = config
.get_web_root()
.ok_or("Must either use_relative_paths or set the web_root in the config.")?;
let final_path = PathBuf::from(web_root).join(path_from_web_root);
let final_string = final_path
.as_path()
.to_str()
.map(str::to_string)
.ok_or("Path should be valid utf-8.")?;
Ok(final_string)
}
}
fn get_shared_steps<'a>(left: &'a Path, right: &'a Path) -> impl Iterator<Item = Component<'a>> {
let shared_stem = left
.components()
.zip(right.components())
.take_while(|(l, r)| l == r)
.map(|(l, _r)| l);
shared_stem
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_shared_steps() {
assert_eq!(
get_shared_steps(Path::new(""), Path::new("")).collect::<PathBuf>(),
PathBuf::from("")
);
assert_eq!(
get_shared_steps(Path::new("foo.txt"), Path::new("foo.txt")).collect::<PathBuf>(),
PathBuf::from("foo.txt")
);
assert_eq!(
get_shared_steps(Path::new("cat/foo.txt"), Path::new("dog/foo.txt"))
.collect::<PathBuf>(),
PathBuf::from("")
);
assert_eq!(
get_shared_steps(
Path::new("foo/bar/baz/lorem.txt"),
Path::new("foo/bar/ipsum/dolar.txt")
)
.collect::<PathBuf>(),
PathBuf::from("foo/bar")
);
}
}

View File

@@ -0,0 +1,95 @@
use std::path::Path;
use std::path::PathBuf;
use tokio::task::JoinHandle;
use walkdir::WalkDir;
use crate::error::CustomError;
use super::BlogPostPage;
use super::IDocumentElement;
#[derive(Debug)]
pub(crate) struct BlogPost {
pub(crate) id: String,
pub(crate) pages: Vec<BlogPostPage>,
pub(crate) children: Vec<IDocumentElement>,
}
impl BlogPost {
pub(crate) async fn load_blog_post<P: AsRef<Path>, R: AsRef<Path>>(
root_dir: R,
post_dir: P,
) -> Result<BlogPost, CustomError> {
async fn inner(_root_dir: &Path, post_dir: &Path) -> Result<BlogPost, CustomError> {
let post_id = post_dir
.file_name()
.expect("The post directory should have a name.");
let org_files = {
let mut ret = Vec::new();
let org_files_iter = get_org_files(post_dir)?;
for entry in org_files_iter {
ret.push(entry.await??);
}
ret
};
let parsed_org_files = {
let mut ret = Vec::new();
for (path, contents) in org_files.iter() {
let parsed = organic::parser::parse_file(contents.as_str(), Some(path))
.map_err(|_| CustomError::Static("Failed to parse org-mode document."))?;
ret.push((path, contents, parsed));
}
ret
};
let pages = {
let mut ret = Vec::new();
for (real_path, _contents, parsed_document) in parsed_org_files {
let relative_to_post_dir_path = real_path.strip_prefix(post_dir)?;
ret.push(BlogPostPage::new(
relative_to_post_dir_path,
parsed_document,
)?);
}
ret
};
Ok(BlogPost {
id: post_id.to_string_lossy().into_owned(),
pages,
children: Vec::new(),
})
}
inner(root_dir.as_ref(), post_dir.as_ref()).await
}
}
async fn read_file(path: PathBuf) -> std::io::Result<(PathBuf, String)> {
let contents = tokio::fs::read_to_string(&path).await?;
Ok((path, contents))
}
fn get_org_files<P: AsRef<Path>>(
root_dir: P,
) -> Result<impl Iterator<Item = JoinHandle<std::io::Result<(PathBuf, String)>>>, walkdir::Error> {
let org_files = WalkDir::new(root_dir)
.into_iter()
.filter(|e| match e {
Ok(dir_entry) => {
dir_entry.file_type().is_file()
&& Path::new(dir_entry.file_name())
.extension()
.map(|ext| ext.to_ascii_lowercase() == "org")
.unwrap_or(false)
}
Err(_) => true,
})
.collect::<Result<Vec<_>, _>>()?;
let org_files = org_files
.into_iter()
.map(walkdir::DirEntry::into_path)
.map(|path| tokio::spawn(read_file(path)));
Ok(org_files)
}

View File

@@ -0,0 +1,8 @@
use super::IHeading;
use super::ISection;
#[derive(Debug)]
pub(crate) enum IDocumentElement {
Heading(IHeading),
Section(ISection),
}

View File

@@ -0,0 +1,2 @@
#[derive(Debug)]
pub(crate) enum IElement {}

View File

@@ -0,0 +1,23 @@
use crate::error::CustomError;
use super::IObject;
#[derive(Debug)]
pub(crate) struct IHeading {
pub(crate) level: organic::types::HeadlineLevel,
pub(crate) title: Vec<IObject>,
}
impl IHeading {
pub(crate) fn new(heading: &organic::types::Heading<'_>) -> Result<IHeading, CustomError> {
let title = heading
.title
.iter()
.map(IObject::new)
.collect::<Result<Vec<_>, _>>()?;
Ok(IHeading {
title,
level: heading.level,
})
}
}

19
src/intermediate/mod.rs Normal file
View File

@@ -0,0 +1,19 @@
mod convert;
mod definition;
mod document_element;
mod element;
mod heading;
mod object;
mod page;
mod plain_text;
mod section;
mod util;
pub(crate) use convert::convert_blog_post_page_to_render_context;
pub(crate) use definition::BlogPost;
pub(crate) use document_element::IDocumentElement;
pub(crate) use element::IElement;
pub(crate) use heading::IHeading;
pub(crate) use object::IObject;
pub(crate) use page::BlogPostPage;
pub(crate) use plain_text::IPlainText;
pub(crate) use section::ISection;

View File

@@ -0,0 +1,44 @@
use crate::error::CustomError;
use super::plain_text::IPlainText;
#[derive(Debug)]
pub(crate) enum IObject {
PlainText(IPlainText),
}
impl IObject {
pub(crate) fn new(obj: &organic::types::Object<'_>) -> Result<IObject, CustomError> {
match obj {
organic::types::Object::Bold(_) => todo!(),
organic::types::Object::Italic(_) => todo!(),
organic::types::Object::Underline(_) => todo!(),
organic::types::Object::StrikeThrough(_) => todo!(),
organic::types::Object::Code(_) => todo!(),
organic::types::Object::Verbatim(_) => todo!(),
organic::types::Object::PlainText(plain_text) => {
Ok(IObject::PlainText(IPlainText::new(plain_text)?))
}
organic::types::Object::RegularLink(_) => todo!(),
organic::types::Object::RadioLink(_) => todo!(),
organic::types::Object::RadioTarget(_) => todo!(),
organic::types::Object::PlainLink(_) => todo!(),
organic::types::Object::AngleLink(_) => todo!(),
organic::types::Object::OrgMacro(_) => todo!(),
organic::types::Object::Entity(_) => todo!(),
organic::types::Object::LatexFragment(_) => todo!(),
organic::types::Object::ExportSnippet(_) => todo!(),
organic::types::Object::FootnoteReference(_) => todo!(),
organic::types::Object::Citation(_) => todo!(),
organic::types::Object::CitationReference(_) => todo!(),
organic::types::Object::InlineBabelCall(_) => todo!(),
organic::types::Object::InlineSourceBlock(_) => todo!(),
organic::types::Object::LineBreak(_) => todo!(),
organic::types::Object::Target(_) => todo!(),
organic::types::Object::StatisticsCookie(_) => todo!(),
organic::types::Object::Subscript(_) => todo!(),
organic::types::Object::Superscript(_) => todo!(),
organic::types::Object::Timestamp(_) => todo!(),
}
}
}

59
src/intermediate/page.rs Normal file
View File

@@ -0,0 +1,59 @@
use std::path::PathBuf;
use crate::error::CustomError;
use super::IDocumentElement;
use super::IHeading;
use super::ISection;
#[derive(Debug)]
pub(crate) struct BlogPostPage {
/// Relative path from the root of the blog post.
pub(crate) path: PathBuf,
pub(crate) title: Option<String>,
pub(crate) children: Vec<IDocumentElement>,
}
impl BlogPostPage {
pub(crate) fn new<P: Into<PathBuf>>(
path: P,
document: organic::types::Document<'_>,
) -> Result<BlogPostPage, CustomError> {
let path = path.into();
let mut children = Vec::new();
if let Some(section) = document.zeroth_section.as_ref() {
children.push(IDocumentElement::Section(ISection::new(section)?));
}
for heading in document.children.iter() {
children.push(IDocumentElement::Heading(IHeading::new(heading)?));
}
Ok(BlogPostPage {
path,
title: get_title(&document),
children,
})
}
/// Get the output path relative to the post directory.
pub(crate) fn get_output_path(&self) -> PathBuf {
let mut ret = self.path.clone();
ret.set_extension("html");
ret
}
}
fn get_title(document: &organic::types::Document<'_>) -> Option<String> {
organic::types::AstNode::from(document)
.iter_all_ast_nodes()
.filter_map(|node| match node {
organic::types::AstNode::Keyword(kw) if kw.key.eq_ignore_ascii_case("title") => {
Some(kw)
}
_ => None,
})
.last()
.map(|kw| kw.value.to_owned())
}

View File

@@ -0,0 +1,17 @@
use crate::error::CustomError;
use crate::intermediate::util::coalesce_whitespace;
#[derive(Debug)]
pub(crate) struct IPlainText {
source: String,
}
impl IPlainText {
pub(crate) fn new(
plain_text: &organic::types::PlainText<'_>,
) -> Result<IPlainText, CustomError> {
Ok(IPlainText {
source: coalesce_whitespace(plain_text.source).into_owned(),
})
}
}

View File

@@ -0,0 +1,10 @@
use crate::error::CustomError;
#[derive(Debug)]
pub(crate) struct ISection {}
impl ISection {
pub(crate) fn new(section: &organic::types::Section<'_>) -> Result<ISection, CustomError> {
Ok(ISection {})
}
}

48
src/intermediate/util.rs Normal file
View File

@@ -0,0 +1,48 @@
use std::borrow::Cow;
/// Removes all whitespace from a string.
///
/// Example: "foo bar" => "foobar" and "foo \n bar" => "foobar".
#[allow(dead_code)]
pub(crate) fn coalesce_whitespace(input: &str) -> Cow<'_, str> {
let mut state = CoalesceWhitespace::Normal;
for (offset, c) in input.char_indices() {
match (&mut state, c) {
(CoalesceWhitespace::Normal, ' ' | '\t' | '\r' | '\n') => {
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..offset]);
ret.push(' ');
state = CoalesceWhitespace::HasWhitespace {
in_whitespace: true,
ret,
};
}
(CoalesceWhitespace::Normal, _) => {}
(
CoalesceWhitespace::HasWhitespace { in_whitespace, ret },
' ' | '\t' | '\r' | '\n',
) => {
if !*in_whitespace {
*in_whitespace = true;
ret.push(' ');
}
}
(CoalesceWhitespace::HasWhitespace { in_whitespace, ret }, _) => {
*in_whitespace = false;
ret.push(c);
}
}
}
match state {
CoalesceWhitespace::Normal => Cow::Borrowed(input),
CoalesceWhitespace::HasWhitespace {
in_whitespace: _,
ret,
} => Cow::Owned(ret),
}
}
enum CoalesceWhitespace {
Normal,
HasWhitespace { in_whitespace: bool, ret: String },
}