natter/src/intermediate/blog_post.rs

use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Mutex;

use tokio::task::JoinHandle;
use walkdir::WalkDir;

use crate::error::CustomError;
use crate::intermediate::page::BlogPostPageInput;
use crate::intermediate::registry::Registry;

use super::BlogPostPage;

#[derive(Debug)]
pub(crate) struct BlogPost {
    pub(crate) id: String,
    pub(crate) pages: Vec<BlogPostPage>,
}

impl BlogPost {
    pub(crate) async fn load_blog_post<P: AsRef<Path>, R: AsRef<Path>>(
        root_dir: R,
        post_dir: P,
    ) -> Result<BlogPost, CustomError> {
        async fn inner(_root_dir: &Path, post_dir: &Path) -> Result<BlogPost, CustomError> {
            let post_id = post_dir
                .file_name()
                .expect("The post directory should have a name.");

            let org_files = {
                let mut ret = Vec::new();
                let org_files_iter = get_org_files(post_dir)?;
                for entry in org_files_iter {
                    ret.push(entry.await??);
                }
                ret
            };
            let parsed_org_files = {
                let mut ret = Vec::new();
                for (path, contents) in org_files.iter() {
                    let parsed = organic::parser::parse_file(contents.as_str(), Some(path))
                        .map_err(|_| CustomError::Static("Failed to parse org-mode document."))?;
                    ret.push((path, contents, parsed));
                }
                ret
            };

            let pages = {
                let mut ret = Vec::new();
                for (real_path, _contents, parsed_document) in parsed_org_files.iter() {
                    let mut registry = Registry::new();

                    // Assign IDs to the targets
                    organic::types::AstNode::from(parsed_document)
                        .iter_all_ast_nodes()
                        .for_each(|node| match node {
                            organic::types::AstNode::Target(target) => {
                                registry.get_target(target.value);
                            }
                            _ => {}
                        });

                    let registry = Arc::new(Mutex::new(registry));
                    let relative_to_post_dir_path = real_path.strip_prefix(post_dir)?;
                    ret.push(
                        BlogPostPage::new(
                            registry,
                            BlogPostPageInput::new(relative_to_post_dir_path, parsed_document),
                        )
                        .await?,
                    );
                }
                ret
            };

            Ok(BlogPost {
                id: post_id.to_string_lossy().into_owned(),
                pages,
            })
        }
        inner(root_dir.as_ref(), post_dir.as_ref()).await
    }

    /// Get the date for a blog post.
    ///
    /// The date is set by the "#+date" export setting. This will
    /// first attempt to read the date from an index.org if such a
    /// file exists. If that file does not exist or that file does not
    /// contain a date export setting, then this will iterate through
    /// all the pages under the blog post looking for any page that
    /// contains a date export setting. It will return the first date
    /// found.
    pub(crate) fn get_date(&self) -> Option<&str> {
        let index_page_date = self
            .get_index_page()
            .map(|index_page| index_page.date.as_ref().map(String::as_str))
            .flatten();
        if index_page_date.is_some() {
            return index_page_date;
        }

        self.pages
            .iter()
            .filter_map(|page| page.date.as_ref().map(String::as_str))
            .next()
    }

    /// Get the blog post page for index.org
    pub(crate) fn get_index_page(&self) -> Option<&BlogPostPage> {
        self.pages
            .iter()
            .find(|page| page.path == Path::new("index.org"))
    }
}

async fn read_file(path: PathBuf) -> std::io::Result<(PathBuf, String)> {
    let contents = tokio::fs::read_to_string(&path).await?;
    Ok((path, contents))
}

fn get_org_files<P: AsRef<Path>>(
    root_dir: P,
) -> Result<impl Iterator<Item = JoinHandle<std::io::Result<(PathBuf, String)>>>, walkdir::Error> {
    let org_files = WalkDir::new(root_dir)
        .into_iter()
        .filter(|e| match e {
            Ok(dir_entry) => {
                dir_entry.file_type().is_file()
                    && Path::new(dir_entry.file_name())
                        .extension()
                        .map(|ext| ext.to_ascii_lowercase() == "org")
                        .unwrap_or(false)
            }
            Err(_) => true,
        })
        .collect::<Result<Vec<_>, _>>()?;
    let org_files = org_files
        .into_iter()
        .map(walkdir::DirEntry::into_path)
        .map(|path| tokio::spawn(read_file(path)));
    Ok(org_files)
}