112 lines
2.0 KiB
Nix
112 lines
2.0 KiB
Nix
{
|
|
lib,
|
|
buildPythonPackage,
|
|
fetchFromGitHub,
|
|
|
|
# build-system
|
|
hatchling,
|
|
poetry-core,
|
|
|
|
# dependencies
|
|
docling,
|
|
pydantic-settings,
|
|
typer,
|
|
boto3,
|
|
pandas,
|
|
fastparquet,
|
|
pyarrow,
|
|
httpx,
|
|
|
|
# optional dependencies
|
|
tesserocr,
|
|
rapidocr-onnxruntime,
|
|
onnxruntime,
|
|
ray,
|
|
|
|
# tests
|
|
pytestCheckHook,
|
|
pytest-asyncio,
|
|
writableTmpDirAsHomeHook,
|
|
|
|
# options
|
|
withTesserocr ? false,
|
|
withRapidocr ? false,
|
|
withRay ? false,
|
|
}:
|
|
|
|
buildPythonPackage rec {
|
|
pname = "docling-jobkit";
|
|
version = "1.2.0";
|
|
pyproject = true;
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "docling-project";
|
|
repo = "docling-jobkit";
|
|
tag = "v${version}";
|
|
hash = "sha256-bLLcMbN6GNpZ8U5Fhyq/XaHawOFcrFrobY7Jtpdm8Qo=";
|
|
};
|
|
|
|
build-system = [
|
|
hatchling
|
|
poetry-core
|
|
];
|
|
|
|
dependencies = [
|
|
docling
|
|
pydantic-settings
|
|
typer
|
|
boto3
|
|
pandas
|
|
fastparquet
|
|
pyarrow
|
|
httpx
|
|
]
|
|
++ lib.optionals withTesserocr optional-dependencies.tesserocr
|
|
++ lib.optionals withRapidocr optional-dependencies.rapidocr
|
|
++ lib.optionals withRay optional-dependencies.ray;
|
|
|
|
optional-dependencies = {
|
|
tesserocr = [ tesserocr ];
|
|
rapidocr = [
|
|
rapidocr-onnxruntime
|
|
onnxruntime
|
|
];
|
|
ray = [ ray ];
|
|
};
|
|
|
|
pythonRelaxDeps = [
|
|
"boto3"
|
|
"pyarrow"
|
|
];
|
|
|
|
pythonImportsCheck = [
|
|
"docling"
|
|
"docling_jobkit"
|
|
];
|
|
|
|
nativeCheckInputs = [
|
|
pytestCheckHook
|
|
pytest-asyncio
|
|
writableTmpDirAsHomeHook
|
|
];
|
|
|
|
disabledTests = [
|
|
# requires network access
|
|
"test_convert_url"
|
|
"test_convert_file"
|
|
"test_convert_warmup"
|
|
|
|
# Flaky due to comparison with magic object
|
|
# https://github.com/docling-project/docling-jobkit/issues/45
|
|
"test_options_validator"
|
|
];
|
|
|
|
meta = {
|
|
changelog = "https://github.com/docling-project/docling-jobkit/blob/${src.tag}/CHANGELOG.md";
|
|
description = "Running a distributed job processing documents with Docling";
|
|
homepage = "https://github.com/docling-project/docling-jobkit";
|
|
license = lib.licenses.mit;
|
|
maintainers = with lib.maintainers; [ codgician ];
|
|
};
|
|
}
|