1
0
mirror of https://git.FreeBSD.org/ports.git synced 2025-01-29 10:18:30 +00:00

deskutils/py-paperless-ngx: New port

Paperless-ngx is a document management system that transforms
your physical documents into a searchable online archive so you
can keep, well, less paper.

Paperless-ngx is the official successor to the original Paperless
and Paperless-ng projects.
This commit is contained in:
Michael Gmelin 2024-02-18 21:03:51 +01:00
parent 5a510abe30
commit 61fe359cf6
20 changed files with 3889 additions and 0 deletions

View File

@ -230,6 +230,7 @@
SUBDIR += py-khal
SUBDIR += py-khard
SUBDIR += py-paperless
SUBDIR += py-paperless-ngx
SUBDIR += py-py3status
SUBDIR += py-pystash
SUBDIR += py-send2trash

View File

@ -0,0 +1,180 @@
PORTNAME= paperless-ngx
PORTVERSION= 2.5.0
DISTVERSIONPREFIX= v
CATEGORIES= deskutils python
MASTER_SITES= https://github.com/${PORTNAME}/${PORTNAME}/releases/download/${DISTVERSIONPREFIX}${DISTVERSION}/:webui \
GH:gh
PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
DISTFILES= ${PORTNAME}-${DISTVERSIONPREFIX}${DISTVERSION}${EXTRACT_SUFX}:webui
MAINTAINER= grembo@FreeBSD.org
COMMENT= Community-supported supercharged version of paperless
WWW= https://docs.paperless-ngx.com
LICENSE= GPLv3
LICENSE_FILE= ${WRKSRC}/LICENSE
BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}bleach>=6.1.0:www/py-bleach@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}concurrent-log-handler>=0.9.19:sysutils/py-concurrent-log-handler@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dateparser>=1.1.4:devel/py-dateparser@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-channels-redis>=4.2.0:www/py-dj42-channels-redis@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-django-allauth>=0.59.0:www/py-dj42-django-allauth@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-django-auditlog>=2.3.0:www/py-dj42-django-auditlog@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-django-celery-results>=2.5.1:www/py-dj42-django-celery-results@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-django-compression-middleware>=0.5.0:www/py-dj42-django-compression-middleware@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-django-cors-headers>=4.3.1:www/py-dj42-django-cors-headers@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-django-extensions>=3.2.3:www/py-dj42-django-extensions@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-django-filter>=23.5:www/py-dj42-django-filter@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-django-multiselectfield>=0.1.12:www/py-dj42-django-multiselectfield@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-djangorestframework-guardian>=0.3.0:www/py-dj42-djangorestframework-guardian@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}dj42-drf-writable-nested>=0.7.0:www/py-dj42-drf-writable-nested@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}filelock>=3.13.1:sysutils/py-filelock@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}flower>=2.0.1:www/py-flower@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}gotenberg-client>=0.5.0:converters/py-gotenberg-client@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}gunicorn>=21.2.0:www/py-gunicorn@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}h2>=4.1.0:www/py-h2@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}hiredis>=2.0.0:databases/py-hiredis@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}httptools>=0.6.1:www/py-httptools@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}imap-tools>=1.5.0:mail/py-imap-tools@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}inotifyrecursive>=0.3.5_1:devel/py-inotifyrecursive@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}langdetect>=1.0.9:textproc/py-langdetect@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}mysqlclient>=2.2.1:databases/py-mysqlclient@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}nltk>=3.8.1:textproc/py-nltk@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}ocrmypdf>=15.4.4:textproc/py-ocrmypdf@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}pathvalidate>=3.2.0:devel/py-pathvalidate@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}pdf2image>=1.17.0:graphics/py-pdf2image@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}pdftotext>=2.2.2:textproc/py-pdftotext@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}psycopg2>=2.9.9:databases/py-psycopg2@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}python-dotenv>=1.0.0:www/py-python-dotenv@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}python-gnupg>=0.5.1:security/py-python-gnupg@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}python-ipware>=2.0.1:net/py-python-ipware@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}python-magic>=0.4.27:devel/py-python-magic@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}pyzbar>=0.1.9:graphics/py-pyzbar@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}rapidfuzz>=3.6.1:devel/py-rapidfuzz@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}scikit-learn>=1.4.0:science/py-scikit-learn@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}setproctitle>=1.3.2:devel/py-setproctitle@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}sqlite3>0:databases/py-sqlite3@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}tika-client>=0.5.0:textproc/py-tika-client@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}uvicorn>=0.25.0:www/py-uvicorn@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}uvloop>=0.17.0:devel/py-uvloop@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}watchdog>=4.0.0:devel/py-watchdog@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}watchfiles>=0.21.0:devel/py-watchfiles@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}websockets>=12.0:devel/py-websockets@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}whitenoise>=6.2.0:www/py-whitenoise@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}whoosh>=2.7.4:textproc/py-whoosh@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}yaml>=6.0.1:devel/py-yaml@${PY_FLAVOR}
RUN_DEPENDS:= ${BUILD_DEPENDS}
RUN_DEPENDS+= gpg2:security/gnupg \
liberation-fonts-ttf>=2.1.3:x11-fonts/liberation-fonts-ttf \
optipng:graphics/optipng \
tesseract:graphics/tesseract \
unpaper:graphics/unpaper
USES= python shebangfix tar:xz
USE_GITHUB= yes
GH_ACCOUNT= paperless-ngx
USE_RC_SUBR= paperless-beat \
paperless-consumer \
paperless-flower \
paperless-migrate \
paperless-webui \
paperless-worker
SHEBANG_FILES= ${WRKSRC}/src/manage.py
CONFLICTS_INSTALL= ${PYTHON_PKGNAMEPREFIX}*paperless
NO_ARCH= yes
SUB_FILES= nginx.conf paperless-ngx.7 pkg-message README sshd_config
SUB_LIST= CHOWN=${CHOWN} \
ECHO=${ECHO} \
EGREP=${EGREP} \
PKGBASE=${PKGBASE} \
PYTHON_CMD=${PYTHON_CMD} \
PYTHONPREFIX_SITELIBDIR=${PYTHONPREFIX_SITELIBDIR} \
RM=${RM:Q} \
TOUCH=${TOUCH}
USERS= paperless
GROUPS= paperless
PORTDOCS= *
OPTIONS_DEFINE= DOCS EXAMPLES
OPTIONS_DEFAULT= IMAGICK7_X11
OPTIONS_SINGLE= IMAGICK
OPTIONS_SINGLE_IMAGICK= IMAGICK6_NOX11 IMAGICK6_X11 IMAGICK7_NOX11 IMAGICK7_X11
IMAGICK6_NOX11_DESC= ImageMagick6 without X11 support
IMAGICK6_X11_DESC= ImageMagick6 with X11 support
IMAGICK7_NOX11_DESC= ImageMagick7 without X11 support
IMAGICK7_X11_DESC= ImageMagick7 with X11 support
IMAGICK_DESC= ${IMAGEMAGICK_DESC}
IMAGICK6_NOX11_USES= magick:6,run,nox11
IMAGICK6_X11_USES= magick:6,run
IMAGICK7_NOX11_USES= magick:7,run,nox11
IMAGICK7_X11_USES= magick:7,run
PAPERLESS_STATICDIR= ${WRKSRC}/static
post-patch:
@${REINPLACE_CMD} -e 's|/usr/local|${LOCALBASE}|g' \
${WRKSRC}/src/paperless/settings.py
do-build:
@${PYTHON_CMD} ${PYTHON_LIBDIR}/compileall.py \
-d ${PYTHONPREFIX_SITELIBDIR} \
-x ".*/tests/" \
-f ${WRKSRC}/src
@${PYTHON_CMD} -O ${PYTHON_LIBDIR}/compileall.py \
-d ${PYTHONPREFIX_SITELIBDIR} \
-x ".*/tests/" \
-f ${WRKSRC}/src
@${PYTHON_CMD} -O ${PYTHON_LIBDIR}/compileall.py \
-d ${PYTHONPREFIX_SITELIBDIR} \
-f ${WRKSRC}/gunicorn.conf.py
@${RM} -r ${PAPERLESS_STATICDIR}
@${SETENV} \
PAPERLESS_STATICDIR=${PAPERLESS_STATICDIR} \
${PYTHON_CMD} ${WRKSRC}/src/manage.py collectstatic -i tests
do-install:
@${MKDIR} ${STAGEDIR}${PYTHON_SITELIBDIR}/paperless
@cd ${WRKSRC}/resources && \
${COPYTREE_SHARE} . ${STAGEDIR}${DATADIR}
@cd ${WRKSRC}/src && \
${COPYTREE_SHARE} . ${STAGEDIR}${PYTHON_SITELIBDIR}/paperless
@cd ${WRKSRC} && ${COPYTREE_SHARE} gunicorn.conf.py \
${STAGEDIR}${PYTHON_SITELIBDIR}/paperless
@${FIND} ${STAGEDIR}${PYTHON_SITELIBDIR} -name '*.orig' -delete
@${RLN} ${STAGEDIR}${PYTHON_SITELIBDIR}/paperless/manage.py \
${STAGEDIR}${PREFIX}/bin/paperless
@${CHMOD} +x ${STAGEDIR}${PREFIX}/bin/paperless
@${SED} -e 's|%%WWWDIR%%|${WWWDIR}|g;' \
${WRKSRC}/paperless.conf.example \
> ${STAGEDIR}${PREFIX}/etc/paperless.conf.sample
@${MKDIR} ${STAGEDIR}/var/db/paperless/consume/input \
${STAGEDIR}/var/db/paperless/data/log \
${STAGEDIR}/var/db/paperless/media \
${STAGEDIR}/var/db/paperless/nltkdata
${INSTALL_MAN} ${WRKDIR}/paperless-ngx.7 ${STAGEDIR}${PREFIX}/share/man/man7
${MKDIR} ${STAGEDIR}${WWWDIR}
@cd ${WRKDIR}/${PORTNAME} && ${COPYTREE_SHARE} static ${STAGEDIR}${WWWDIR}
@${MKDIR} ${STAGEDIR}/var/run/paperless
post-install-DOCS-on:
@${MKDIR} ${STAGEDIR}${DOCSDIR}/
@cd ${WRKSRC}/docs && \
${COPYTREE_SHARE} . ${STAGEDIR}${DOCSDIR}
post-install-EXAMPLES-on:
@${MKDIR} ${STAGEDIR}${EXAMPLESDIR}
${INSTALL_DATA} \
${WRKDIR}/README \
${WRKDIR}/nginx.conf \
${WRKDIR}/sshd_config \
${STAGEDIR}${EXAMPLESDIR}
.include <bsd.port.mk>

View File

@ -0,0 +1,5 @@
TIMESTAMP = 1707783446
SHA256 (paperless-ngx-v2.5.0.tar.xz) = fa0b90cf9203d1aa89dbf687c43ea103cd616eb7725ae2c6cb6d2ead4d66de6a
SIZE (paperless-ngx-v2.5.0.tar.xz) = 62063416
SHA256 (paperless-ngx-paperless-ngx-v2.5.0_GH0.tar.gz) = 805345b849eb40358562e85b633cab9d6a264ff9e886c9af2d6313ebca281cce
SIZE (paperless-ngx-paperless-ngx-v2.5.0_GH0.tar.gz) = 48300940

View File

@ -0,0 +1 @@
Please see `man paperless' for details on how to use the paperless examples.

View File

@ -0,0 +1,54 @@
worker_processes 1;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
#sendfile off;
keepalive_timeout 65;
upstream django {
server 127.0.0.1:8000;
}
server {
listen 443 ssl http2;
server_name example.com;
ssl_certificate selfsigned.crt;
ssl_certificate_key selfsigned.key;
location /static/ {
alias %%WWWDIR%%/static/;
}
location / {
proxy_pass http://django;
include uwsgi_params;
proxy_set_header Host $host;
proxy_http_version 1.1;
}
location /ws/status/ {
proxy_pass http://django/ws/status/;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
#location /flower/ {
# proxy_pass http://127.0.0.1:5555;
# proxy_set_header Host 127.0.0.1;
# proxy_http_version 1.1;
#}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root /usr/local/www/nginx-dist;
}
}
}

View File

@ -0,0 +1,49 @@
#!/bin/sh
# PROVIDE: paperless-beat
# REQUIRE: LOGIN
# BEFORE: paperless-consumer
# KEYWORD: shutdown
#
# Add the following line(s) to /etc/rc.conf to enable the paperless
# celery beat process:
#
# paperless_beat_enable (bool): Set to "NO" by default.
# Set it to "YES" to enable
# paperless_beat
# paperless_beat_log_facility(str): Set to "daemon" by default.
# Beat logging syslog facility.
# paperless_beat_log_priority(str): Set to "notice" by default.
# Beat logging syslog priority.
# paperless_beat_log_tag(str): Set to "paperless-beat" by default.
# Beat logging syslog tag.
. /etc/rc.subr
name="paperless_beat"
rcvar="${name}_enable"
# Set defaults
paperless_beat_enable=${paperless_beat_enable:-"NO"}
paperless_beat_daemon_user=${paperless_beat_user:-"paperless"}
paperless_beat_log_facility=${paperless_beat_log_facility:-"daemon"}
paperless_beat_log_priority=${paperless_beat_log_priority:-"notice"}
paperless_beat_log_tag=${paperless_beat_log_tag:-"paperless-beat"}
paperless_beat_env="LANG=en_US.UTF-8 PATH=$PATH:%%PREFIX%%/bin \
PYTHONPATH=%%PYTHONPREFIX_SITELIBDIR%%/paperless"
pidfile="/var/run/paperless/beat.pid"
procname="%%PYTHON_CMD%%"
command="/usr/sbin/daemon"
command_args="-cS -u ${paperless_beat_daemon_user} \
-p ${pidfile} \
-t paperless-beat \
-l ${paperless_beat_log_facility} \
-s ${paperless_beat_log_priority} \
-T ${paperless_beat_log_tag} \
%%PREFIX%%/bin/celery \
--app paperless beat --loglevel INFO"
load_rc_config ${name}
run_rc_command "$1"

View File

@ -0,0 +1,49 @@
#!/bin/sh
# PROVIDE: paperless-consumer
# REQUIRE: LOGIN
# BEFORE: paperless-flower
# KEYWORD: shutdown
#
# Add the following line(s) to /etc/rc.conf to enable the paperless
# celery consumer process:
#
# paperless_consumer_enable (bool): Set to "NO" by default.
# Set it to "YES" to enable
# paperless_consumer
# paperless_consumer_log_facility(str): Set to "daemon" by default.
# Consumer logging syslog facility.
# paperless_consumer_log_priority(str): Set to "notice" by default.
# Consumer logging syslog priority.
# paperless_consumer_log_tag(str): Set to "paperless-consumer" by default.
# Consumer logging syslog tag.
. /etc/rc.subr
name="paperless_consumer"
rcvar="${name}_enable"
# Set defaults
paperless_consumer_enable=${paperless_consumer_enable:-"NO"}
paperless_consumer_daemon_user=${paperless_consumer_user:-"paperless"}
paperless_consumer_log_facility=${paperless_consumer_log_facility:-"daemon"}
paperless_consumer_log_priority=${paperless_consumer_log_priority:-"notice"}
paperless_consumer_log_tag=${paperless_consumer_log_tag:-"paperless-consumer"}
paperless_consumer_env="LANG=en_US.UTF-8 PATH=$PATH:%%PREFIX%%/bin\
PYTHONPATH=%%PYTHONPREFIX_SITELIBDIR%%/paperless"
sig_stop="INT"
pidfile="/var/run/paperless/consumer.pid"
procname="%%PYTHON_CMD%%"
command="/usr/sbin/daemon"
command_args="-cS -u ${paperless_consumer_daemon_user} \
-p ${pidfile} \
-t paperless-consumer \
-l ${paperless_consumer_log_facility} \
-s ${paperless_consumer_log_priority} \
-T ${paperless_consumer_log_tag} \
%%PREFIX%%/bin/paperless document_consumer"
load_rc_config ${name}
run_rc_command "$1"

View File

@ -0,0 +1,48 @@
#!/bin/sh
# PROVIDE: paperless-flower
# REQUIRE: LOGIN
# KEYWORD: shutdown
#
# Add the following line(s) to /etc/rc.conf to enable the paperless
# celery flower process:
#
# paperless_flower_enable (bool): Set to "NO" by default.
# Set it to "YES" to enable
# paperless_flower
# paperless_flower_log_facility(str): Set to "daemon" by default.
# Flower logging syslog facility.
# paperless_flower_log_priority(str): Set to "notice" by default.
# Flower logging syslog priority.
# paperless_flower_log_tag(str): Set to "paperless-flower" by default.
# Flower logging syslog tag.
. /etc/rc.subr
name="paperless_flower"
rcvar="${name}_enable"
# Set defaults
paperless_flower_enable=${paperless_flower_enable:-"NO"}
paperless_flower_daemon_user=${paperless_flower_user:-"paperless"}
paperless_flower_log_facility=${paperless_flower_log_facility:-"daemon"}
paperless_flower_log_priority=${paperless_flower_log_priority:-"notice"}
paperless_flower_log_tag=${paperless_flower_log_tag:-"paperless-flower"}
paperless_flower_env="LANG=en_US.UTF-8 PATH=$PATH:%%PREFIX%%/bin \
PYTHONPATH=%%PYTHONPREFIX_SITELIBDIR%%/paperless"
pidfile="/var/run/paperless/flower.pid"
procname="%%PYTHON_CMD%%"
command="/usr/sbin/daemon"
command_args="-cS -u ${paperless_flower_daemon_user} \
-p ${pidfile} \
-t paperless-flower \
-l ${paperless_flower_log_facility} \
-s ${paperless_flower_log_priority} \
-T ${paperless_flower_log_tag} \
%%PREFIX%%/bin/celery \
--app paperless flower --url-prefix=flower --loglevel INFO"
load_rc_config ${name}
run_rc_command "$1"

View File

@ -0,0 +1,48 @@
#!/bin/sh
# PROVIDE: paperless-migrate
# REQUIRE: LOGIN mysql postgresql redis
# BEFORE: paperless-webui
# KEYWORD: shutdown
#
# Add the following line(s) to /etc/rc.conf to enable running paperless
# migrations:
#
# paperless_migrate_enable (bool): Set to "NO" by default.
# Set it to "YES" to enable
# paperless_migrate
# paperless_migrate_log_facility(str): Set to "daemon" by default.
# Migrate logging syslog facility.
# paperless_migrate_log_priority(str): Set to "notice" by default.
# Migrate logging syslog priority.
# paperless_migrate_log_tag(str): Set to "paperless-migrate" by default.
# Migrate logging syslog tag.
. /etc/rc.subr
name="paperless_migrate"
rcvar="${name}_enable"
# Set defaults
paperless_migrate_enable=${paperless_migrate_enable:-"NO"}
paperless_migrate_user=${paperless_migrate_user:-"paperless"}
paperless_migrate_log_facility=${paperless_migrate_log_facility:-"daemon"}
paperless_migrate_log_priority=${paperless_migrate_log_priority:-"notice"}
paperless_migrate_log_tag=${paperless_migrate_log_tag:-"paperless-migrate"}
paperless_migrate_env="LANG=en_US.UTF-8 PATH=$PATH:%%PREFIX%%/bin \
PYTHONPATH=%%PYTHONPREFIX_SITELIBDIR%%/paperless"
start_cmd="${name}_run | logger -s -t '$paperless_migrate_log_tag'"
stop_cmd=":"
paperless_migrate_run()
{
echo "Applying potential migrations to ${name}."
env ${paperless_migrate_env} su \
-m ${paperless_migrate_user} \
-c 'sh -c "%%PREFIX%%/bin/paperless migrate --no-color -v 0"'
}
load_rc_config ${name}
run_rc_command "$1"

View File

@ -0,0 +1,362 @@
.\"
.\" Copyright (c) 2024 Michael Gmelin
.\"
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR
.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
.\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd February 22, 2024
.Dt PAPERLESS-NGX 7
.Os
.Sh NAME
.Nm paperless-ngx
.Nd Index and archive scanned paper documents - installation
.Sh SYNOPSIS
.Nm pkg install %%PKGBASE%%
.Sh DESCRIPTION
.Em Paperless-ngx
is a Django-based document management system that transforms
physical documents into a searchable online archive.
It is the successor of the original Paperless and Paperless-ng projects.
.Pp
It consists of multiple parts, a web UI and a couple of backend
services for consuming and processing documents.
.Pp
This man page documents how the
.Fx
port is installed and configured.
It assumes that the paperless-ngx package was already installed, e.g., from the
.Fx
package repo as described in
.Sx SYNOPSIS .
.Pp
.Em IMPORTANT :
Please note that upgrading an existing installation of
deskutils/paperless needs special precautions.
See
.Sx UPGRADING FROM PAPERLESS
for how to approach that.
.Pp
For more information about using paperless-ngx, see
the official paperless-ngx documentation
.Pa ( https://docs.paperless-ngx.com ) .
.Pp
The package creates a symlink from
.Pa %%PYTHONPREFIX_SITELIBDIR%%/paperless/manage.py
to
.Pa %%PREFIX%%/bin/paperless
for convenience reasons, so whenever the official
documentation mentions
.Em manage.py
it can be substituted with
.Pa %%PREFIX%%/bin/paperless
or simply
.Pa paperless .
.Pp
.Em Paperless-ngx always needs to be run using the correct system user
and a UTF-8 codepage.
.Pp
The package %%PKGBASE%% created a user
.Em paperless
with the following home directory layout, setting appropriate
restrictive access permissions:
.Bl -tag -width "/var"
.It Pa /var/db/paperless
home directory (only writeable by root)
.Bl -tag -width "consume/" -compact
.It Pa consume/
Consume directory writable by root, used as chroot directory
for sftp access (see below).
.Bl -tag -width "123" -compact
.It Pa input/
Input files are dropped in there to be processed by the
paperless document consumer - either directly or via
a mechanism like sftp.
.El
.It Pa data/
Contains paperless-ngx's data, including its SQLite database
unless an external database like PostgreSQL or MariaDB is used.
.Bl -tag -width "123" -compact
.It Pa log/
This is where paperless stored its log files
(on top of what the services write to syslog).
.El
.It Pa media/
Directory used by paperless-ngx to store original files and
thumbnails.
.It Pa nltkdata/
Directory containing data used for natural language processing.
.El
.El
.Sh BACKEND SETUP
Paperless needs access to a running redis instance, which can be
installed locally:
.Bd -literal -offset indent
pkg install redis
service redis enable
service redis start
.Ed
.Pp
Modify
.Pa %%PREFIX%%/etc/paperless.conf
to match the configured credentials (when running on localhost,
it is possible to use no special credentials).
.Pp
The URL paperless is hosted on needs to be configued by setting
.Va PAPERLESS_URL ,
it is also possible to tune
.Va PAPERLESS_THREADS_PER_WORKER
in the same configuration file to limit the impact on system
performance.
.Pp
Now, the database needs to be initialized.
This can be accomplished by running
.Bd -literal -offset indent
service paperless-migrate onestart
.Ed
.Pp
In case database migrations should be applied on every
system start, paperless-migrate can be enabled to run on boot:
.Bd -literal -offset indent
service paperless-migrate enable
.Ed
.Pp
Next, mandatory backend services are enabled
.Bd -literal -offset indent
service paperless-beat enable
service paperless-consumer enable
service paperless-webui enable
service paperless-worker enable
.Ed
.Pp
and subsequently started
.Bd -literal -offset indent
service paperless-beat start
service paperless-consumer start
service paperless-webui start
service paperless-worker start
.Ed
.Sh NLTK DATA
In order to process scanned documents using machine learning,
paperless-ngx requires NLTK (natural language toolkit) data.
The required files can be downloaded by using these commands:
.Bd -literal -offset indent
%%PYTHON_CMD%% -m nltk.downloader \\
stopwords punkt -d /var/db/paperless/nltkdata
.Ed
.Pp
Normally, the document classifier is run automatically by
Celery, but it can also be initiated manually by calling
.Bd -literal -offset indent
su -l paperless \\
-c '%%PREFIX%%/bin/paperless document_create_classifier'
.Ed
.Sh OPTIONAL FLOWER SERVICE
paperless-ngx makes use of Celery to control a cluster of workers.
There is a component called flower which can be enabled optionally
to monitor the cluster.
It can be enabled and started like this:
.Bd -literal -offset indent
service paperless-flower enable
service paperless-flower start
.Ed
.Sh WEB UI SETUP
Before using the web ui, make sure to create a super user and assign
a password
.Bd -literal -offset indent
su -l paperless -c '%%PREFIX%%/bin/paperless createsuperuser'
.Ed
.Pp
It is recommended to host the web component using a real
web server, e.g., nginx:
.Bd -literal -offset indent
pkg install nginx
.Ed
.Pp
Copy-in basic server configuration:
.Bd -literal -offset indent
cp %%EXAMPLESDIR%%/nginx.conf \\
%%PREFIX%%/etc/nginx/nginx.conf
.Ed
.Pp
This server configuration contains TLS certificates, which
need to be created by the administrator.
See below for an example of how to create a self-signed
certificate to get started:
.Bd -literal -offset indent
openssl req -x509 -nodes -days 365 -newkey rsa:4096 \\
-keyout %%PREFIX%%/etc/nginx/selfsigned.key \\
-out %%PREFIX%%/etc/nginx/selfsigned.crt
.Ed
.Pp
Enable and start nginx:
.Bd -literal -offset indent
service nginx enable
service nginx start
.Ed
.Pp
The default nginx.conf can be adapted by the administrator to their
needs.
In case the optional flower service was enabled earlier, the commented
out block in the example file can be uncommented to make flower available
at /flower.
.Pp
.Em \&It is important to properly secure a public facing web server.
.Em Doing this properly is up to the administrator.
.Sh SFTP SETUP
Setting up
.Em sftp
enabled direct upload of files to be processed by the paperless
consumer.
Some scanners allow configuring sftp with key based authentication,
which is convenient as it scans directly to the paperless processing
pipeline.
.Pp
In case paperless is using a dedicated instance of
.Xr sshd 8 ,
access can be limited to the paperless user by adding
these lines to
.Pa /etc/ssh/sshd_config :
.Bd -literal -offset indent
# Only include if sshd is dedicated to paperless
# otherwise you'll lock yourself out
AllowUsers paperless
.Ed
.Pp
The following block limits the paperless user to using the
.Xr sftp 1
protocol and locks it into the consume directory:
.Bd -literal -offset indent
# paperless can only do sftp and is dropped into correct directory
Match User paperless
ChrootDirectory %h/consume
ForceCommand internal-sftp -u 0077 -d /input
AllowTcpForwarding no
X11Forwarding no
PasswordAuthentication no
.Ed
.Pp
The public keys of authorized users/devices need to be added to
.Pa /var/db/paperless/.ssh/authorized_keys :
.Bd -literal -offset indent
mkdir -p /var/db/paperless/.ssh
cat path/to/pubkey >>/var/db/paperless/.ssh/authorized_keys
.Ed
.Pp
Make sure
.Xr sshd 8
is enabled and restart (or reload) it:
.Bd -literal -offset indent
service sshd enable
service sshd restart
.Ed
.Pp
The user will be dropped into the correct directory, so uploading
a file is as simple as:
.Bd -literal -offset indent
echo put file.pdf | sftp -b - paperless@host
.Ed
.Sh UPGRADING FROM PAPERLESS
In case deskutils/paperless is installed, follow the upgrading
guide at:
.Pa https://docs.paperless-ngx.com/setup/#migrating-from-paperless
.Pp
This guide is for a docker based installation, so here a few basic
hints for upgrading a
.Fx
based installation:
.Bl -bullet -compact
.It
There need to be good and working backups before migrating
.It
In case PGP encryption was used, files need to be decrypted first
by using the existing installation of deskutils/py-paperless.
See
.Pa https://github.com/the-paperless-project/paperless/issues/714
for a description on how to do this and potential pitfalls.
The basic idea is to comment out lines 95 and 96 in
.Pa change_storage_type.py
and then run:
.Bd -literal -offset indent
su -l paperless -c \\
'%%PREFIX%%/bin/paperless change_storage_type gpg unencrypted'
.Ed
.Pp
.It
Deinstall py-paperless (it might be good to keep a backup of the
package).
.It
Move the old paperless configuration file out of the way before
installing paperless-ngx:
.Bd -literal -offset indent
mv %%PREFIX%%/etc/paperless.conf \\
%%PREFIX%%/etc/paperless.conf.old
.Ed
.Pp
.It
Install paperless-ngx:
.Bd -literal -offset indent
pkg install %%PKGBASE%%
.Ed
.Pp
.It
Configure
.Pa %%PREFIX%%/etc/paperless.conf
as described above.
.It
Re-index documents:
.Bd -literal -offset indent
su -l paperless \\
-c '%%PREFIX%%/bin/paperless document_index reindex'
.Ed
.Pp
.It
Check if documents are okay:
.Bd -literal -offset indent
su -l paperless \\
-c '%%PREFIX%%/bin/paperless document_sanity_checker'
.Ed
.Pp
.It
In general, things should be expected to fail, so being able to
restore from backup is vital.
.El
.Sh FILES
.Bl -tag -width ".Pa %%PREFIX%%/etc/paperless.conf" -compact
.It Pa %%PREFIX%%/etc/paperless.conf
See
.Pa %%PREFIX%%/etc/paperless.conf.sample
for an example.
.It Pa %%EXAMPLESDIR%%
Configuration examples, complementary to this man page.
.El
.Sh SEE ALSO
.Xr sftp 1 ,
.Xr sshd_config 5 ,
.Xr ports 7 ,
.Xr daemon 8 ,
.Xr service 8
.Pp
.Pa https://docs.paperless-nginx.com
.Sh AUTHORS
.An -nosplit
This manual page was written by
.An Michael Gmelin Aq Mt grembo@FreeBSD.org .

View File

@ -0,0 +1,58 @@
#!/bin/sh
# PROVIDE: paperless-webui
# REQUIRE: LOGIN
# BEFORE: paperless-worker
# KEYWORD: shutdown
#
# Add the following line(s) to /etc/rc.conf to enable the paperless
# celery webui process:
#
# paperless_webui_enable (bool): Set to "NO" by default.
# Set it to "YES" to enable
# paperless_webui
# paperless_webui_log_facility(str): Set to "daemon" by default.
# WebUI logging syslog facility.
# paperless_webui_log_priority(str): Set to "notice" by default.
# WebUI logging syslog priority.
# paperless_webui_log_tag(str): Set to "paperless-webui" by default.
# WebUI logging syslog tag.
# paperless_webui_restart_delay(int): Set to "5" by default.
# WebUI automatic restart delay in
# seconds.
. /etc/rc.subr
name="paperless_webui"
rcvar="${name}_enable"
# Set defaults
paperless_webui_enable=${paperless_webui_enable:-"NO"}
paperless_webui_daemon_user=${paperless_webui_user:-"paperless"}
paperless_webui_log_facility=${paperless_webui_log_facility:-"daemon"}
paperless_webui_log_priority=${paperless_webui_log_priority:-"notice"}
paperless_webui_log_tag=${paperless_webui_log_tag:-"paperless-webui"}
paperless_webui_listen_address=${paperless_webui_listen_address:-"127.0.0.1"}
paperless_webui_listen_port=${paperless_webui_listen_port:-"8000"}
paperless_webui_restart_delay=${paperless_webui_restart_delay:-"5"}
paperless_webui_env="LANG=en_US.UTF-8 PATH=$PATH:%%PREFIX%%/bin\
PYTHONPATH=%%PYTHONPREFIX_SITELIBDIR%%/paperless \
PAPERLESS_BIND_ADDR=${paperless_webui_listen_address} \
PAPERLESS_PORT=${paperless_webui_listen_port}"
pidfile="/var/run/paperless/webui.pid"
command="/usr/sbin/daemon"
command_args="-cS -u ${paperless_webui_daemon_user} \
-P ${pidfile} \
-t paperless-webui \
-l ${paperless_webui_log_facility} \
-s ${paperless_webui_log_priority} \
-T ${paperless_webui_log_tag} \
-R ${paperless_webui_restart_delay} \
%%PREFIX%%/bin/gunicorn \
-c %%PYTHONPREFIX_SITELIBDIR%%/paperless/gunicorn.conf.py \
paperless.asgi:application"
load_rc_config ${name}
run_rc_command "$1"

View File

@ -0,0 +1,49 @@
#!/bin/sh
# PROVIDE: paperless-worker
# REQUIRE: LOGIN
# BEFORE: paperless-beat
# KEYWORD: shutdown
#
# Add the following line(s) to /etc/rc.conf to enable the paperless
# worker:
#
# paperless_worker_enable (bool): Set to "NO" by default.
# Set it to "YES" to enable
# paperless_worker
# paperless_worker_log_facility(str): Set to "daemon" by default.
# Worker logging syslog facility.
# paperless_worker_log_priority(str): Set to "notice" by default.
# Worker logging syslog priority.
# paperless_worker_log_tag(str): Set to "paperless-worker" by default.
# Worker logging syslog tag.
. /etc/rc.subr
name="paperless_worker"
rcvar="${name}_enable"
# Set defaults
paperless_worker_enable=${paperless_worker_enable:-"NO"}
paperless_worker_daemon_user=${paperless_worker_user:-"paperless"}
paperless_worker_log_facility=${paperless_worker_log_facility:-"daemon"}
paperless_worker_log_priority=${paperless_worker_log_priority:-"notice"}
paperless_worker_log_tag=${paperless_worker_log_tag:-"paperless-worker"}
paperless_worker_env="LANG=en_US.UTF-8 PATH=$PATH:%%PREFIX%%/bin \
PYTHONPATH=%%PYTHONPREFIX_SITELIBDIR%%/paperless"
pidfile="/var/run/paperless/worker.pid"
procname="%%PYTHON_CMD%%"
command="/usr/sbin/daemon"
command_args="-cS -u ${paperless_worker_daemon_user} \
-p ${pidfile} \
-t paperless-worker \
-l ${paperless_worker_log_facility} \
-s ${paperless_worker_log_priority} \
-T ${paperless_worker_log_tag} \
%%PREFIX%%/bin/celery \
--app paperless worker --loglevel INFO --without-mingle --without-gossip"
load_rc_config ${name}
run_rc_command "$1"

View File

@ -0,0 +1,11 @@
--- gunicorn.conf.py.orig 2024-02-18 20:13:15 UTC
+++ gunicorn.conf.py
@@ -11,7 +11,7 @@ timeout = 120
preload_app = True
# https://docs.gunicorn.org/en/stable/faq.html#blocking-os-fchmod
-worker_tmp_dir = "/dev/shm"
+worker_tmp_dir = "/tmp"
def pre_fork(server, worker):

View File

@ -0,0 +1,42 @@
--- paperless.conf.example.orig 2024-02-10 19:45:46 UTC
+++ paperless.conf.example
@@ -17,10 +17,10 @@
# Paths and folders
-#PAPERLESS_CONSUMPTION_DIR=../consume
-#PAPERLESS_DATA_DIR=../data
+PAPERLESS_CONSUMPTION_DIR=/var/db/paperless/consume/input
+PAPERLESS_DATA_DIR=/var/db/paperless/data
#PAPERLESS_TRASH_DIR=
-#PAPERLESS_MEDIA_ROOT=../media
+PAPERLESS_MEDIA_ROOT=/var/db/paperless/media
#PAPERLESS_STATICDIR=../static
#PAPERLESS_FILENAME_FORMAT=
#PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=
@@ -40,6 +40,7 @@
# OCR settings
+# example: deu+eng
#PAPERLESS_OCR_LANGUAGE=eng
#PAPERLESS_OCR_MODE=skip
#PAPERLESS_OCR_SKIP_ARCHIVE_FILE=never
@@ -56,6 +57,7 @@
# Software tweaks
+PAPERLESS_CONSUMER_INOTIFY_DELAY=3
#PAPERLESS_TASK_WORKERS=1
#PAPERLESS_THREADS_PER_WORKER=1
#PAPERLESS_TIME_ZONE=UTC
@@ -92,3 +94,9 @@
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
#PAPERLESS_GS_BINARY=/usr/bin/gs
+
+# NLTK settings
+NLTK_DIR=/var/db/paperless/nltk_data
+#NLTK_ENABLED=yes
+# below defaults to OCR_LANGUAGE
+#NLTK_LANGUAGE=

View File

@ -0,0 +1,20 @@
--- src/documents/management/commands/document_consumer.py.orig 2024-02-19 16:25:10 UTC
+++ src/documents/management/commands/document_consumer.py
@@ -292,7 +292,7 @@ class Command(BaseCommand):
logger.debug(f"Configuring timeout to {timeout}ms")
inotify = INotify()
- inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO | flags.MODIFY
+ inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO | flags.MODIFY | flags.CREATE
if recursive:
descriptor = inotify.add_watch_recursive(directory, inotify_flags)
else:
@@ -342,7 +342,7 @@ class Command(BaseCommand):
# If files are waiting, need to exit read() to check them
# Otherwise, go back to infinite sleep time, but only if not testing
if len(notified_files) > 0:
- timeout = inotify_debounce
+ timeout = inotify_debounce * 1000
elif is_testing:
timeout = self.testing_timeout_ms
else:

View File

@ -0,0 +1,29 @@
--- src/paperless/settings.py.orig 2024-02-10 19:45:46 UTC
+++ src/paperless/settings.py
@@ -24,8 +24,6 @@ if configuration_path and os.path.exists(configuration
load_dotenv(configuration_path)
elif os.path.exists("../paperless.conf"):
load_dotenv("../paperless.conf")
-elif os.path.exists("/etc/paperless.conf"):
- load_dotenv("/etc/paperless.conf")
elif os.path.exists("/usr/local/etc/paperless.conf"):
load_dotenv("/usr/local/etc/paperless.conf")
@@ -254,7 +252,7 @@ THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
-NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
+NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/local/share/nltk_data")
TRASH_DIR = os.getenv("PAPERLESS_TRASH_DIR")
@@ -1002,7 +1000,7 @@ FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
THUMBNAIL_FONT_NAME = os.getenv(
"PAPERLESS_THUMBNAIL_FONT_NAME",
- "/usr/share/fonts/liberation/LiberationSerif-Regular.ttf",
+ "/usr/local/share/fonts/Liberation/LiberationSerif-Regular.ttf",
)
# Tika settings

View File

@ -0,0 +1,7 @@
[
{ type: install
message: <<EOM
Please see `man paperless-ngx' for details on how to configure paperless.
EOM
}
]

View File

@ -0,0 +1,14 @@
### Configuration for uploading documents via SFTP
### e.g. using a scanner that supports such a feature
# Only include if sshd is dedicated to paperless
# otherwise you'll lock yourself out
AllowUsers paperless
# paperless can only do sftp and is dropped into correct directory
Match User paperless
ChrootDirectory %h/consume
ForceCommand internal-sftp -u 0077 -d /input
AllowTcpForwarding no
X11Forwarding no
PasswordAuthentication no

View File

@ -0,0 +1,6 @@
Paperless-ngx is a document management system that transforms
your physical documents into a searchable online archive so you
can keep, well, less paper.
Paperless-ngx is the official successor of the original Paperless
and Paperless-ng projects.

File diff suppressed because it is too large Load Diff