natter/src/intermediate/blog_post.rs

144 lines
4.9 KiB
Rust

use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Mutex;
use tokio::task::JoinHandle;
use walkdir::WalkDir;
use crate::error::CustomError;
use crate::intermediate::page::BlogPostPageInput;
use crate::intermediate::registry::Registry;
use super::BlogPostPage;
#[derive(Debug)]
pub(crate) struct BlogPost {
pub(crate) id: String,
pub(crate) pages: Vec<BlogPostPage>,
}
impl BlogPost {
pub(crate) async fn load_blog_post<P: AsRef<Path>, R: AsRef<Path>>(
root_dir: R,
post_dir: P,
) -> Result<BlogPost, CustomError> {
async fn inner(_root_dir: &Path, post_dir: &Path) -> Result<BlogPost, CustomError> {
let post_id = post_dir
.file_name()
.expect("The post directory should have a name.");
let org_files = {
let mut ret = Vec::new();
let org_files_iter = get_org_files(post_dir)?;
for entry in org_files_iter {
ret.push(entry.await??);
}
ret
};
let parsed_org_files = {
let mut ret = Vec::new();
for (path, contents) in org_files.iter() {
let parsed = organic::parser::parse_file(contents.as_str(), Some(path))
.map_err(|_| CustomError::Static("Failed to parse org-mode document."))?;
ret.push((path, contents, parsed));
}
ret
};
let pages = {
let mut ret = Vec::new();
for (real_path, _contents, parsed_document) in parsed_org_files.iter() {
let mut registry = Registry::new();
// Assign IDs to the targets
organic::types::AstNode::from(parsed_document)
.iter_all_ast_nodes()
.for_each(|node| match node {
organic::types::AstNode::Target(target) => {
registry.get_target(target.value);
}
_ => {}
});
let registry = Arc::new(Mutex::new(registry));
let relative_to_post_dir_path = real_path.strip_prefix(post_dir)?;
ret.push(
BlogPostPage::new(
registry,
BlogPostPageInput::new(relative_to_post_dir_path, parsed_document),
)
.await?,
);
}
ret
};
Ok(BlogPost {
id: post_id.to_string_lossy().into_owned(),
pages,
})
}
inner(root_dir.as_ref(), post_dir.as_ref()).await
}
/// Get the date for a blog post.
///
/// The date is set by the "#+date" export setting. This will
/// first attempt to read the date from an index.org if such a
/// file exists. If that file does not exist or that file does not
/// contain a date export setting, then this will iterate through
/// all the pages under the blog post looking for any page that
/// contains a date export setting. It will return the first date
/// found.
pub(crate) fn get_date(&self) -> Option<&str> {
let index_page_date = self
.get_index_page()
.map(|index_page| index_page.date.as_ref().map(String::as_str))
.flatten();
if index_page_date.is_some() {
return index_page_date;
}
self.pages
.iter()
.filter_map(|page| page.date.as_ref().map(String::as_str))
.next()
}
/// Get the blog post page for index.org
pub(crate) fn get_index_page(&self) -> Option<&BlogPostPage> {
self.pages
.iter()
.find(|page| page.path == Path::new("index.org"))
}
}
async fn read_file(path: PathBuf) -> std::io::Result<(PathBuf, String)> {
let contents = tokio::fs::read_to_string(&path).await?;
Ok((path, contents))
}
fn get_org_files<P: AsRef<Path>>(
root_dir: P,
) -> Result<impl Iterator<Item = JoinHandle<std::io::Result<(PathBuf, String)>>>, walkdir::Error> {
let org_files = WalkDir::new(root_dir)
.into_iter()
.filter(|e| match e {
Ok(dir_entry) => {
dir_entry.file_type().is_file()
&& Path::new(dir_entry.file_name())
.extension()
.map(|ext| ext.to_ascii_lowercase() == "org")
.unwrap_or(false)
}
Err(_) => true,
})
.collect::<Result<Vec<_>, _>>()?;
let org_files = org_files
.into_iter()
.map(walkdir::DirEntry::into_path)
.map(|path| tokio::spawn(read_file(path)));
Ok(org_files)
}