{ lib, aiohttp, aiosqlite, banks, buildPythonPackage, dataclasses-json, deprecated, dirtyjson, fetchFromGitHub, filetype, fsspec, hatchling, jsonpath-ng, llama-index-workflows, llamaindex-py-client, nest-asyncio, networkx, nltk-data, nltk, numpy, openai, pandas, pillow, pytest-asyncio, pytest-mock, pytestCheckHook, pythonOlder, pyvis, pyyaml, requests, spacy, sqlalchemy, tenacity, tiktoken, tree-sitter, typing-inspect, }: buildPythonPackage rec { pname = "llama-index-core"; version = "0.12.46"; pyproject = true; disabled = pythonOlder "3.9"; src = fetchFromGitHub { owner = "run-llama"; repo = "llama_index"; tag = "v${version}"; hash = "sha256-B1i5zabacapc/ipPTQtQzLVZql5ifqxfFoDhaBR+eYc="; }; sourceRoot = "${src.name}/${pname}"; # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to # download them if they aren't present. # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67 # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this # solution seems more elegant. postPatch = '' mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/ cp -r ${nltk-data.stopwords}/corpora/stopwords/* llama_index/core/_static/nltk_cache/corpora/stopwords/ mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/ cp -r ${nltk-data.punkt}/tokenizers/punkt/* llama_index/core/_static/nltk_cache/tokenizers/punkt/ ''; pythonRelaxDeps = [ "setuptools" "tenacity" ]; build-system = [ hatchling ]; dependencies = [ aiohttp aiosqlite banks dataclasses-json deprecated dirtyjson filetype fsspec jsonpath-ng llama-index-workflows llamaindex-py-client nest-asyncio networkx nltk numpy openai pandas pillow pyvis pyyaml requests spacy sqlalchemy tenacity tiktoken typing-inspect ]; nativeCheckInputs = [ tree-sitter pytest-asyncio pytest-mock pytestCheckHook ]; pythonImportsCheck = [ "llama_index" ]; disabledTestPaths = [ # Tests require network access "tests/agent/" "tests/callbacks/" "tests/chat_engine/" "tests/evaluation/" "tests/indices/" "tests/ingestion/" "tests/memory/" "tests/node_parser/" "tests/objects/" "tests/playground/" "tests/postprocessor/" "tests/query_engine/" "tests/question_gen/" "tests/response_synthesizers/" "tests/retrievers/" "tests/selectors/" "tests/test_utils.py" "tests/text_splitter/" "tests/token_predictor/" "tests/tools/" "tests/schema/" "tests/multi_modal_llms/" ]; disabledTests = [ # Tests require network access "test_context_extraction_basic" "test_context_extraction_custom_prompt" "test_context_extraction_oversized_document" "test_document_block_from_b64" "test_document_block_from_bytes" "test_document_block_from_path" "test_document_block_from_url" "test_from_namespaced_persist_dir" "test_from_persist_dir" "test_mimetype_raw_data" "test_multiple_documents_context" "test_resource" # asyncio.exceptions.InvalidStateError: invalid state "test_workflow_context_to_dict_mid_run" "test_SimpleDirectoryReader" # RuntimeError "test_str" ]; meta = with lib; { description = "Data framework for your LLM applications"; homepage = "https://github.com/run-llama/llama_index/"; changelog = "https://github.com/run-llama/llama_index/blob/${src.tag}/CHANGELOG.md"; license = licenses.mit; maintainers = with maintainers; [ fab ]; }; }