2025-07-24 14:27:50 +02:00

164 lines
3.6 KiB
Nix

{
lib,
stdenv,
buildPythonPackage,
fetchFromGitHub,
# build-system
setuptools,
setuptools-scm,
# dependencies
numpy,
packaging,
pandas,
pydantic,
typeguard,
typing-extensions,
typing-inspect,
# optional-dependencies
black,
dask,
fastapi,
geopandas,
hypothesis,
ibis-framework,
pandas-stubs,
polars,
pyyaml,
scipy,
shapely,
# tests
duckdb,
joblib,
pyarrow,
pyarrow-hotfix,
pytestCheckHook,
pytest-asyncio,
pythonAtLeast,
}:
buildPythonPackage rec {
pname = "pandera";
version = "0.25.0";
pyproject = true;
src = fetchFromGitHub {
owner = "unionai-oss";
repo = "pandera";
tag = "v${version}";
hash = "sha256-0YeLeGpunjHRWFvSvz0r2BokM4/eJKXuBajgcGquca4=";
};
build-system = [
setuptools
setuptools-scm
];
env.SETUPTOOLS_SCM_PRETEND_VERSION = version;
dependencies = [
packaging
pydantic
typeguard
typing-extensions
typing-inspect
];
optional-dependencies =
let
dask-dataframe = [ dask ] ++ dask.optional-dependencies.dataframe;
extras = {
strategies = [ hypothesis ];
hypotheses = [ scipy ];
io = [
pyyaml
black
#frictionless # not in nixpkgs
];
# pyspark expression does not define optional-dependencies.connect:
#pyspark = [ pyspark ] ++ pyspark.optional-dependencies.connect;
# modin not in nixpkgs:
#modin = [
# modin
# ray
#] ++ dask-dataframe;
#modin-ray = [
# modin
# ray
#];
#modin-dask = [
# modin
#] ++ dask-dataframe;
dask = dask-dataframe;
mypy = [ pandas-stubs ];
fastapi = [ fastapi ];
geopandas = [
geopandas
shapely
];
ibis = [
ibis-framework
duckdb
];
pandas = [
numpy
pandas
];
polars = [ polars ];
};
in
extras // { all = lib.concatLists (lib.attrValues extras); };
nativeCheckInputs = [
pytestCheckHook
pytest-asyncio
joblib
pyarrow
pyarrow-hotfix
]
++ optional-dependencies.all;
disabledTestPaths = [
"tests/fastapi/test_app.py" # tries to access network
"tests/pandas/test_docs_setting_column_widths.py" # tests doc generation, requires sphinx
"tests/modin" # requires modin, not in nixpkgs
"tests/mypy/test_pandas_static_type_checking.py" # some typing failures
"tests/pyspark" # requires spark
# KeyError: 'dask'
"tests/dask/test_dask.py::test_series_schema"
"tests/dask/test_dask_accessor.py::test_dataframe_series_add_schema"
];
disabledTests =
lib.optionals stdenv.hostPlatform.isDarwin [
# OOM error on ofborg:
"test_engine_geometry_coerce_crs"
# pandera.errors.SchemaError: Error while coercing 'geometry' to type geometry
"test_schema_dtype_crs_with_coerce"
]
++ lib.optionals (pythonAtLeast "3.13") [
# AssertionError: assert DataType(Sparse[float64, nan]) == DataType(Sparse[float64, nan])
"test_legacy_default_pandas_extension_dtype"
];
pythonImportsCheck = [
"pandera"
"pandera.api"
"pandera.config"
"pandera.dtypes"
"pandera.engines"
];
meta = {
description = "Light-weight, flexible, and expressive statistical data testing library";
homepage = "https://pandera.readthedocs.io";
changelog = "https://github.com/unionai-oss/pandera/releases/tag/${src.tag}";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ bcdarwin ];
};
}