112 lines
2.0 KiB
Nix

{
lib,
buildPythonPackage,
fetchFromGitHub,
# build-system
hatchling,
poetry-core,
# dependencies
docling,
pydantic-settings,
typer,
boto3,
pandas,
fastparquet,
pyarrow,
httpx,
# optional dependencies
tesserocr,
rapidocr-onnxruntime,
onnxruntime,
ray,
# tests
pytestCheckHook,
pytest-asyncio,
writableTmpDirAsHomeHook,
# options
withTesserocr ? false,
withRapidocr ? false,
withRay ? false,
}:
buildPythonPackage rec {
pname = "docling-jobkit";
version = "1.2.0";
pyproject = true;
src = fetchFromGitHub {
owner = "docling-project";
repo = "docling-jobkit";
tag = "v${version}";
hash = "sha256-bLLcMbN6GNpZ8U5Fhyq/XaHawOFcrFrobY7Jtpdm8Qo=";
};
build-system = [
hatchling
poetry-core
];
dependencies = [
docling
pydantic-settings
typer
boto3
pandas
fastparquet
pyarrow
httpx
]
++ lib.optionals withTesserocr optional-dependencies.tesserocr
++ lib.optionals withRapidocr optional-dependencies.rapidocr
++ lib.optionals withRay optional-dependencies.ray;
optional-dependencies = {
tesserocr = [ tesserocr ];
rapidocr = [
rapidocr-onnxruntime
onnxruntime
];
ray = [ ray ];
};
pythonRelaxDeps = [
"boto3"
"pyarrow"
];
pythonImportsCheck = [
"docling"
"docling_jobkit"
];
nativeCheckInputs = [
pytestCheckHook
pytest-asyncio
writableTmpDirAsHomeHook
];
disabledTests = [
# requires network access
"test_convert_url"
"test_convert_file"
"test_convert_warmup"
# Flaky due to comparison with magic object
# https://github.com/docling-project/docling-jobkit/issues/45
"test_options_validator"
];
meta = {
changelog = "https://github.com/docling-project/docling-jobkit/blob/${src.tag}/CHANGELOG.md";
description = "Running a distributed job processing documents with Docling";
homepage = "https://github.com/docling-project/docling-jobkit";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ codgician ];
};
}