332 lines
5.6 KiB
Nix
332 lines
5.6 KiB
Nix
{
|
|
lib,
|
|
buildPythonPackage,
|
|
fetchFromGitHub,
|
|
|
|
# build-system
|
|
setuptools,
|
|
|
|
# dependencies
|
|
faiss,
|
|
torch,
|
|
transformers,
|
|
huggingface-hub,
|
|
numpy,
|
|
pyyaml,
|
|
regex,
|
|
|
|
# optional-dependencies
|
|
# agent
|
|
mcpadapt,
|
|
smolagents,
|
|
# ann
|
|
annoy,
|
|
hnswlib,
|
|
pgvector,
|
|
sqlalchemy,
|
|
sqlite-vec-c,
|
|
# api
|
|
aiohttp,
|
|
fastapi,
|
|
fastapi-mcp,
|
|
httpx,
|
|
pillow,
|
|
python-multipart,
|
|
uvicorn,
|
|
# cloud
|
|
# apache-libcloud, (unpackaged)
|
|
fasteners,
|
|
# console
|
|
rich,
|
|
# database
|
|
duckdb,
|
|
# graph
|
|
# grand-cypher (unpackaged)
|
|
# grand-graph (unpackaged)
|
|
networkx,
|
|
# model
|
|
onnx,
|
|
onnxruntime,
|
|
# pipeline-audio
|
|
# model2vec,
|
|
sounddevice,
|
|
soundfile,
|
|
scipy,
|
|
ttstokenizer,
|
|
webrtcvad,
|
|
# pipeline-data
|
|
beautifulsoup4,
|
|
nltk,
|
|
pandas,
|
|
tika,
|
|
# pipeline-image
|
|
imagehash,
|
|
timm,
|
|
# pipeline-llm
|
|
litellm,
|
|
# llama-cpp-python, (unpackaged)
|
|
# pipeline-text
|
|
gliner,
|
|
sentencepiece,
|
|
staticvectors,
|
|
# pipeline-train
|
|
accelerate,
|
|
bitsandbytes,
|
|
onnxmltools,
|
|
peft,
|
|
skl2onnx,
|
|
# vectors
|
|
fasttext,
|
|
# pymagnitude-lite, (unpackaged)
|
|
scikit-learn,
|
|
sentence-transformers,
|
|
skops,
|
|
# workflow
|
|
# apache-libcloud (unpackaged)
|
|
croniter,
|
|
openpyxl,
|
|
requests,
|
|
xmltodict,
|
|
|
|
# tests
|
|
msgpack,
|
|
pytestCheckHook,
|
|
}:
|
|
let
|
|
version = "8.6.0";
|
|
agent = [
|
|
mcpadapt
|
|
smolagents
|
|
];
|
|
ann = [
|
|
annoy
|
|
hnswlib
|
|
pgvector
|
|
sqlalchemy
|
|
sqlite-vec-c
|
|
];
|
|
api = [
|
|
aiohttp
|
|
fastapi
|
|
fastapi-mcp
|
|
httpx
|
|
pillow
|
|
python-multipart
|
|
uvicorn
|
|
];
|
|
cloud = [
|
|
# apache-libcloud
|
|
fasteners
|
|
];
|
|
console = [ rich ];
|
|
database = [
|
|
duckdb
|
|
pillow
|
|
sqlalchemy
|
|
];
|
|
graph = [
|
|
# grand-cypher
|
|
# grand-graph
|
|
networkx
|
|
sqlalchemy
|
|
];
|
|
model = [
|
|
onnx
|
|
onnxruntime
|
|
];
|
|
pipeline-audio = [
|
|
onnx
|
|
onnxruntime
|
|
scipy
|
|
sounddevice
|
|
soundfile
|
|
ttstokenizer
|
|
webrtcvad
|
|
];
|
|
pipeline-data = [
|
|
beautifulsoup4
|
|
nltk
|
|
pandas
|
|
tika
|
|
];
|
|
pipeline-image = [
|
|
imagehash
|
|
pillow
|
|
timm
|
|
];
|
|
pipeline-llm = [
|
|
litellm
|
|
# llama-cpp-python
|
|
];
|
|
pipeline-text = [
|
|
gliner
|
|
sentencepiece
|
|
staticvectors
|
|
];
|
|
pipeline-train = [
|
|
accelerate
|
|
bitsandbytes
|
|
onnx
|
|
onnxmltools
|
|
onnxruntime
|
|
peft
|
|
skl2onnx
|
|
];
|
|
pipeline =
|
|
pipeline-audio
|
|
++ pipeline-data
|
|
++ pipeline-image
|
|
++ pipeline-llm
|
|
++ pipeline-text
|
|
++ pipeline-train;
|
|
scoring = [ sqlalchemy ];
|
|
vectors = [
|
|
fasttext
|
|
litellm
|
|
# llama-cpp-python
|
|
# model2vec
|
|
# pymagnitude-lite
|
|
scikit-learn
|
|
sentence-transformers
|
|
skops
|
|
];
|
|
workflow = [
|
|
# apache-libcloud
|
|
croniter
|
|
openpyxl
|
|
pandas
|
|
pillow
|
|
requests
|
|
xmltodict
|
|
];
|
|
similarity = ann ++ vectors;
|
|
all =
|
|
agent
|
|
++ api
|
|
++ ann
|
|
++ console
|
|
++ database
|
|
++ graph
|
|
++ model
|
|
++ pipeline
|
|
++ scoring
|
|
++ similarity
|
|
++ workflow;
|
|
|
|
optional-dependencies = {
|
|
inherit
|
|
agent
|
|
ann
|
|
api
|
|
cloud
|
|
console
|
|
database
|
|
graph
|
|
model
|
|
pipeline-audio
|
|
pipeline-image
|
|
pipeline-llm
|
|
pipeline-text
|
|
pipeline-train
|
|
pipeline
|
|
scoring
|
|
similarity
|
|
workflow
|
|
all
|
|
;
|
|
};
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "neuml";
|
|
repo = "txtai";
|
|
tag = "v${version}";
|
|
hash = "sha256-xFGVX0Ustime6ttysY3dcOCWc+jB75xqpSDBuRetIJc=";
|
|
};
|
|
in
|
|
buildPythonPackage {
|
|
pname = "txtai";
|
|
inherit version src;
|
|
pyproject = true;
|
|
|
|
build-system = [ setuptools ];
|
|
|
|
pythonRemoveDeps = [
|
|
# We call it faiss, not faiss-cpu.
|
|
"faiss-cpu"
|
|
];
|
|
|
|
dependencies = [
|
|
faiss
|
|
huggingface-hub
|
|
msgpack
|
|
numpy
|
|
pyyaml
|
|
regex
|
|
torch
|
|
transformers
|
|
];
|
|
|
|
optional-dependencies = optional-dependencies;
|
|
|
|
# The Python imports check runs huggingface-hub which needs a writable directory.
|
|
# `pythonImportsCheck` runs in the installPhase (before checkPhase).
|
|
preInstall = ''
|
|
export HF_HOME=$(mktemp -d)
|
|
'';
|
|
|
|
pythonImportsCheck = [ "txtai" ];
|
|
|
|
nativeCheckInputs =
|
|
[
|
|
httpx
|
|
msgpack
|
|
pytestCheckHook
|
|
python-multipart
|
|
timm
|
|
sqlalchemy
|
|
]
|
|
++ optional-dependencies.agent
|
|
++ optional-dependencies.ann
|
|
++ optional-dependencies.api
|
|
++ optional-dependencies.similarity;
|
|
|
|
# The deselected paths depend on the huggingface hub and should be run as a passthru test
|
|
# disabledTestPaths won't work as the problem is with the classes containing the tests
|
|
# (in other words, it fails on __init__)
|
|
pytestFlagsArray = [
|
|
"test/python/test*.py"
|
|
"--deselect=test/python/testagent.py"
|
|
"--deselect=test/python/testcloud.py"
|
|
"--deselect=test/python/testconsole.py"
|
|
"--deselect=test/python/testembeddings.py"
|
|
"--deselect=test/python/testgraph.py"
|
|
"--deselect=test/python/testapi/testapiembeddings.py"
|
|
"--deselect=test/python/testapi/testapipipelines.py"
|
|
"--deselect=test/python/testapi/testapiworkflow.py"
|
|
"--deselect=test/python/testdatabase/testclient.py"
|
|
"--deselect=test/python/testdatabase/testduckdb.py"
|
|
"--deselect=test/python/testdatabase/testencoder.py"
|
|
"--deselect=test/python/testworkflow.py"
|
|
];
|
|
|
|
disabledTests = [
|
|
# Hardcoded paths
|
|
"testInvalidTar"
|
|
"testInvalidZip"
|
|
# Downloads from Huggingface
|
|
"testPipeline"
|
|
"testVectors"
|
|
# Not finding sqlite-vec despite being supplied
|
|
"testSQLite"
|
|
"testSQLiteCustom"
|
|
];
|
|
|
|
meta = {
|
|
description = "Semantic search and workflows powered by language models";
|
|
changelog = "https://github.com/neuml/txtai/releases/tag/${src.tag}";
|
|
homepage = "https://github.com/neuml/txtai";
|
|
license = lib.licenses.asl20;
|
|
maintainers = with lib.maintainers; [ happysalada ];
|
|
};
|
|
}
|