Compare commits

...

16 Commits

Author SHA1 Message Date
Tom Alexander
884a28e63a
Remove pointless copying.
All checks were successful
format Build format has succeeded
rust-test Build rust-test has succeeded
clippy Build clippy has succeeded
2024-10-19 17:28:50 -04:00
Tom Alexander
1c3e2ca4d9
Remove the last use of walkdir. 2024-10-19 17:26:37 -04:00
Tom Alexander
2081d25066
Remove a use of WalkDir. 2024-10-19 17:14:05 -04:00
Tom Alexander
7ddc4011b3
Use a type alias for read file results. 2024-10-19 17:03:52 -04:00
Tom Alexander
379850fe3d
Use the deep path as the post id. 2024-10-19 16:55:38 -04:00
Tom Alexander
8ab69e480e
Case insensitive match for org file extension. 2024-10-19 16:38:29 -04:00
Tom Alexander
c5621212bc
Using multi-level deep folders successfully but the output is still shallow. 2024-10-19 16:35:45 -04:00
Tom Alexander
b9f74b7eca
Write the filter to find the highest folders containing org documents. 2024-10-19 16:25:54 -04:00
Tom Alexander
493adb4688
Switch to iterative instead of recursive. 2024-10-19 16:15:23 -04:00
Tom Alexander
b6cc7a70b7
Future is not send. 2024-10-18 21:29:15 -04:00
Tom Alexander
8868cfb63f
Don't need to hard-code static lifetime. 2024-10-18 21:23:22 -04:00
Tom Alexander
ae3add9c81
Fixed recursion using a BoxFuture. 2024-10-18 21:22:39 -04:00
Tom Alexander
98fa43575d
Require boxing?. 2024-10-18 21:19:40 -04:00
Tom Alexander
5d3a6c4174
Also infinite. 2024-10-18 21:15:23 -04:00
Tom Alexander
be467c8074
infinite recursion? 2024-10-18 21:13:50 -04:00
Tom Alexander
0da375c529
Add a function to recursively list all entries in a directory with tokio. 2024-10-18 21:05:29 -04:00
8 changed files with 165 additions and 65 deletions

1
Cargo.lock generated
View File

@ -446,7 +446,6 @@ dependencies = [
"tokio",
"toml",
"url",
"walkdir",
]
[[package]]

View File

@ -32,7 +32,6 @@ serde_json = "1.0.107"
tokio = { version = "1.30.0", default-features = false, features = ["rt", "rt-multi-thread", "fs", "io-util"] }
toml = "0.8.2"
url = "2.5.0"
walkdir = "2.4.0"
# Optimized build for any sort of release.
[profile.release-lto]

View File

@ -1,11 +1,10 @@
use std::ffi::OsStr;
use std::path::Path;
use std::path::PathBuf;
use include_dir::include_dir;
use include_dir::Dir;
use tokio::fs::DirEntry;
use tokio::task::JoinHandle;
use walkdir::WalkDir;
use crate::config::Config;
use crate::context::RenderBlogPostPage;
@ -20,6 +19,9 @@ use crate::intermediate::BlogPost;
use crate::intermediate::IPage;
use crate::render::DusterRenderer;
use crate::render::RendererIntegration;
use crate::walk_fs::walk_fs;
use crate::walk_fs::WalkAction;
use crate::walk_fs::WalkFsFilterResult;
use super::stylesheet::Stylesheet;
@ -235,7 +237,7 @@ impl SiteRenderer {
if !static_files_directory.exists() {
return Ok(());
}
let static_files = get_all_files(&static_files_directory)?;
let static_files = get_all_files(&static_files_directory).await?;
for entry in static_files {
let (path, contents) = entry.await??;
let relative_path = path.strip_prefix(&static_files_directory)?;
@ -263,24 +265,31 @@ fn build_name_contents_pairs<'a>(
Ok((name, contents))
}
fn get_all_files<P: AsRef<Path>>(
root_dir: P,
) -> Result<impl Iterator<Item = JoinHandle<std::io::Result<(PathBuf, Vec<u8>)>>>, walkdir::Error> {
let files = WalkDir::new(root_dir)
.into_iter()
.filter(|e| match e {
Ok(dir_entry) => dir_entry.file_type().is_file(),
Err(_) => true,
})
.collect::<Result<Vec<_>, _>>()?;
let org_files = files
.into_iter()
.map(walkdir::DirEntry::into_path)
.map(|path| tokio::spawn(read_file(path)));
Ok(org_files)
type ReadFileResult = std::io::Result<(PathBuf, Vec<u8>)>;
async fn filter_to_files(entry: &DirEntry) -> WalkFsFilterResult {
let file_type = entry.file_type().await?;
if file_type.is_dir() {
return Ok(WalkAction::Recurse);
}
if file_type.is_file() {
return Ok(WalkAction::HaltAndCapture);
}
unreachable!("Unhandled file type.");
}
async fn read_file(path: PathBuf) -> std::io::Result<(PathBuf, Vec<u8>)> {
async fn get_all_files<P: Into<PathBuf>>(
root_dir: P,
) -> Result<impl Iterator<Item = JoinHandle<ReadFileResult>>, CustomError> {
let files = walk_fs(root_dir, filter_to_files).await?;
let files_and_content = files
.into_iter()
.map(|entry| tokio::spawn(read_file(entry.path())));
Ok(files_and_content)
}
async fn read_file(path: PathBuf) -> ReadFileResult {
let contents = tokio::fs::read(&path).await?;
Ok((path, contents))
}

View File

@ -14,8 +14,12 @@ use crate::intermediate::IPage;
use crate::intermediate::IntermediateContext;
use crate::intermediate::PageInput;
use crate::intermediate::Registry;
use crate::walk_fs::walk_fs;
use crate::walk_fs::WalkAction;
use crate::walk_fs::WalkFsFilterResult;
use include_dir::include_dir;
use include_dir::Dir;
use tokio::fs::DirEntry;
static DEFAULT_STYLESHEETS: Dir =
include_dir!("$CARGO_MANIFEST_DIR/default_environment/stylesheet");
@ -59,27 +63,54 @@ async fn get_output_directory(config: &Config) -> Result<PathBuf, CustomError> {
Ok(output_directory)
}
async fn get_post_directories(config: &Config) -> Result<Vec<PathBuf>, CustomError> {
let mut ret = Vec::new();
if !config.get_posts_directory().exists() {
return Ok(ret);
async fn filter_to_highest_folders_containing_org_files(entry: &DirEntry) -> WalkFsFilterResult {
let file_type = entry.file_type().await?;
if !file_type.is_dir() {
return Ok(WalkAction::Halt);
}
let mut entries = tokio::fs::read_dir(config.get_posts_directory()).await?;
let mut entries = tokio::fs::read_dir(entry.path()).await?;
while let Some(entry) = entries.next_entry().await? {
let file_type = entry.file_type().await?;
if file_type.is_dir() {
ret.push(entry.path());
let entry_type = entry.file_type().await?;
if !entry_type.is_file() {
continue;
}
match entry.path().extension().and_then(OsStr::to_str) {
Some(ext) if ext.eq_ignore_ascii_case("org") => {
return Ok(WalkAction::HaltAndCapture);
}
_ => {}
}
}
Ok(ret)
Ok(WalkAction::Recurse)
}
async fn get_post_directories(config: &Config) -> Result<Vec<PathBuf>, CustomError> {
if !config.get_posts_directory().exists() {
return Ok(Vec::new());
}
let top_level_org_folders = walk_fs(
config.get_posts_directory(),
filter_to_highest_folders_containing_org_files,
)
.await?;
Ok(top_level_org_folders
.into_iter()
.map(|entry| entry.path())
.collect())
}
async fn load_blog_posts(config: &Config) -> Result<Vec<BlogPost>, CustomError> {
let root_directory = config.get_root_directory().to_owned();
let posts_directory = config.get_posts_directory();
let post_directories = get_post_directories(config).await?;
let load_jobs = post_directories
.into_iter()
.map(|path| tokio::spawn(BlogPost::load_blog_post(root_directory.clone(), path)));
let load_jobs = post_directories.into_iter().map(|path| {
tokio::spawn(BlogPost::load_blog_post(
root_directory.clone(),
posts_directory.clone(),
path,
))
});
let mut blog_posts = Vec::new();
for job in load_jobs {
blog_posts.push(job.await??);
@ -94,7 +125,7 @@ async fn load_pages(config: &Config) -> Result<Vec<IPage>, CustomError> {
if !pages_source.exists() {
return Ok(Vec::new());
}
let page_files = get_org_files(&pages_source)?;
let page_files = get_org_files(&pages_source).await?;
let org_files = {
let mut ret = Vec::new();
for page in page_files {

View File

@ -8,7 +8,6 @@ pub(crate) enum CustomError {
IO(#[allow(dead_code)] std::io::Error),
TomlSerialize(#[allow(dead_code)] toml::ser::Error),
TomlDeserialize(#[allow(dead_code)] toml::de::Error),
WalkDir(#[allow(dead_code)] walkdir::Error),
Tokio(#[allow(dead_code)] tokio::task::JoinError),
Serde(#[allow(dead_code)] serde_json::Error),
Utf8(#[allow(dead_code)] Utf8Error),
@ -49,12 +48,6 @@ impl From<toml::de::Error> for CustomError {
}
}
impl From<walkdir::Error> for CustomError {
fn from(value: walkdir::Error) -> Self {
CustomError::WalkDir(value)
}
}
impl From<tokio::task::JoinError> for CustomError {
fn from(value: tokio::task::JoinError) -> Self {
CustomError::Tokio(value)

View File

@ -3,13 +3,16 @@ use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Mutex;
use tokio::fs::DirEntry;
use tokio::task::JoinHandle;
use walkdir::WalkDir;
use crate::error::CustomError;
use crate::intermediate::blog_post_page::BlogPostPageInput;
use crate::intermediate::registry::Registry;
use crate::intermediate::IntermediateContext;
use crate::walk_fs::walk_fs;
use crate::walk_fs::WalkAction;
use crate::walk_fs::WalkFsFilterResult;
use super::BlogPostPage;
@ -20,18 +23,21 @@ pub(crate) struct BlogPost {
}
impl BlogPost {
pub(crate) async fn load_blog_post<P: AsRef<Path>, R: AsRef<Path>>(
pub(crate) async fn load_blog_post<P: AsRef<Path>, R: AsRef<Path>, S: AsRef<Path>>(
root_dir: R,
posts_dir: S,
post_dir: P,
) -> Result<BlogPost, CustomError> {
async fn inner(_root_dir: &Path, post_dir: &Path) -> Result<BlogPost, CustomError> {
let post_id = post_dir
.file_name()
.expect("The post directory should have a name.");
async fn inner(
_root_dir: &Path,
posts_dir: &Path,
post_dir: &Path,
) -> Result<BlogPost, CustomError> {
let post_id = post_dir.strip_prefix(posts_dir)?.as_os_str();
let org_files = {
let mut ret = Vec::new();
let org_files_iter = get_org_files(post_dir)?;
let org_files_iter = get_org_files(post_dir).await?;
for entry in org_files_iter {
ret.push(entry.await??);
}
@ -80,7 +86,7 @@ impl BlogPost {
pages,
})
}
inner(root_dir.as_ref(), post_dir.as_ref()).await
inner(root_dir.as_ref(), posts_dir.as_ref(), post_dir.as_ref()).await
}
/// Get the date for a blog post.
@ -119,25 +125,33 @@ async fn read_file(path: PathBuf) -> std::io::Result<(PathBuf, String)> {
Ok((path, contents))
}
pub(crate) fn get_org_files<P: AsRef<Path>>(
pub(crate) async fn get_org_files<P: Into<PathBuf>>(
root_dir: P,
) -> Result<impl Iterator<Item = JoinHandle<std::io::Result<(PathBuf, String)>>>, walkdir::Error> {
let org_files = WalkDir::new(root_dir)
.into_iter()
.filter(|e| match e {
Ok(dir_entry) => {
dir_entry.file_type().is_file()
&& Path::new(dir_entry.file_name())
.extension()
.map(|ext| ext.to_ascii_lowercase() == "org")
.unwrap_or(false)
}
Err(_) => true,
})
.collect::<Result<Vec<_>, _>>()?;
) -> Result<impl Iterator<Item = JoinHandle<std::io::Result<(PathBuf, String)>>>, CustomError> {
let org_files = walk_fs(root_dir, filter_to_org_files).await?;
let org_files = org_files
.into_iter()
.map(walkdir::DirEntry::into_path)
.map(|entry| entry.path())
.map(|path| tokio::spawn(read_file(path)));
Ok(org_files)
}
async fn filter_to_org_files(entry: &DirEntry) -> WalkFsFilterResult {
let file_type = entry.file_type().await?;
if file_type.is_dir() {
return Ok(WalkAction::Recurse);
}
if file_type.is_file() {
if entry
.path()
.extension()
.map(|ext| ext.eq_ignore_ascii_case("org"))
.unwrap_or(false)
{
return Ok(WalkAction::HaltAndCapture);
}
return Ok(WalkAction::Halt);
}
unreachable!("Unhandled file type.");
}

View File

@ -1,4 +1,5 @@
#![feature(let_chains)]
#![feature(async_closure)]
use std::process::ExitCode;
use clap::Parser;
@ -15,6 +16,7 @@ mod context;
mod error;
mod intermediate;
mod render;
mod walk_fs;
fn main() -> Result<ExitCode, CustomError> {
let rt = tokio::runtime::Runtime::new()?;

53
src/walk_fs.rs Normal file
View File

@ -0,0 +1,53 @@
use std::collections::VecDeque;
use std::ops::AsyncFn;
use std::path::PathBuf;
use tokio::fs::DirEntry;
use crate::error::CustomError;
pub(crate) type WalkFsFilterResult = Result<WalkAction, CustomError>;
pub(crate) async fn walk_fs<P: Into<PathBuf>, F: AsyncFn(&DirEntry) -> WalkFsFilterResult>(
root: P,
filter: F,
) -> Result<Vec<DirEntry>, CustomError> {
let mut ret = Vec::new();
let mut backlog = VecDeque::new();
backlog.push_back(root.into());
while let Some(p) = backlog.pop_front() {
let mut entries = tokio::fs::read_dir(p).await?;
while let Some(entry) = entries.next_entry().await? {
let action = filter(&entry).await?;
match action {
WalkAction::HaltAndCapture => {
ret.push(entry);
}
WalkAction::Halt => {}
WalkAction::RecurseAndCapture => {
backlog.push_back(entry.path());
ret.push(entry);
}
WalkAction::Recurse => {
backlog.push_back(entry.path());
}
};
}
}
Ok(ret)
}
pub(crate) enum WalkAction {
/// Do not walk down this path but add it to the return list.
HaltAndCapture,
/// Do not walk down this path and do not add it to the return list.
Halt,
/// Walk down this path and add it to the return list.
#[allow(dead_code)]
RecurseAndCapture,
/// Walk down this path but do not add it to the return list.
Recurse,
}