2025-01-18 12:10:44 -05:00

107 lines
1.7 KiB
Nix

{
lib,
buildPythonPackage,
fetchFromGitHub,
cmake,
cxxopts,
deepsearch-toolkit,
docling-core,
fasttext,
fmt,
loguru,
matplotlib,
nlohmann_json,
pandas,
pcre2,
pkg-config,
poetry-core,
pybind11,
python-dotenv,
requests,
rich,
sentencepiece,
tabulate,
tqdm,
utf8cpp,
zlib,
}:
buildPythonPackage rec {
pname = "deepsearch-glm";
version = "1.0.0";
pyproject = true;
src = fetchFromGitHub {
owner = "DS4SD";
repo = "deepsearch-glm";
tag = "v${version}";
hash = "sha256-3sJNkrx0tTm6RMYAwV8Aha7x8dZjf4tGdds8OScRff8=";
};
dontUseCmakeConfigure = true;
nativeBuildInputs = [
cmake
pkg-config
];
build-system = [
poetry-core
pybind11
];
env = {
NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp";
USE_SYSTEM_DEPS = true;
};
optional-dependencies = {
docling = [
docling-core
pandas
];
pyplot = [
matplotlib
];
toolkit = [
deepsearch-toolkit
python-dotenv
];
utils = [
pandas
python-dotenv
requests
rich
tabulate
tqdm
];
};
buildInputs = [
cxxopts
fasttext
fmt
loguru
nlohmann_json
pcre2
sentencepiece
utf8cpp
zlib
];
# Test suite insists on downloading models, data etc. from s3 bucket
doCheck = false;
pythonImportsCheck = [
"deepsearch_glm"
];
meta = {
changelog = "https://github.com/DS4SD/deepsearch-glm/releases/tag/v${version}";
description = "Create fast graph language models from converted PDF documents for knowledge extraction and Q&A";
homepage = "https://github.com/DS4SD/deepsearch-glm";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ booxter ];
};
}