python3Packages.vllm: 0.6.2 -> 0.7.1 (#379165)
This commit is contained in:
commit
ab36ef17c8
@ -6,6 +6,7 @@
|
|||||||
numpy,
|
numpy,
|
||||||
pydantic,
|
pydantic,
|
||||||
jsonschema,
|
jsonschema,
|
||||||
|
opencv-python-headless,
|
||||||
sentencepiece,
|
sentencepiece,
|
||||||
typing-extensions,
|
typing-extensions,
|
||||||
tiktoken,
|
tiktoken,
|
||||||
@ -37,6 +38,7 @@ buildPythonPackage rec {
|
|||||||
numpy
|
numpy
|
||||||
pydantic
|
pydantic
|
||||||
jsonschema
|
jsonschema
|
||||||
|
opencv-python-headless
|
||||||
sentencepiece
|
sentencepiece
|
||||||
typing-extensions
|
typing-extensions
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|||||||
@ -1,24 +0,0 @@
|
|||||||
From f6a7748bee79fc2e1898968fef844daacfa7860b Mon Sep 17 00:00:00 2001
|
|
||||||
From: SomeoneSerge <else@someonex.net>
|
|
||||||
Date: Wed, 31 Jul 2024 12:02:53 +0000
|
|
||||||
Subject: [PATCH 1/2] setup.py: don't ask for hipcc --version
|
|
||||||
|
|
||||||
---
|
|
||||||
setup.py | 1 +
|
|
||||||
1 file changed, 1 insertion(+)
|
|
||||||
|
|
||||||
diff --git a/setup.py b/setup.py
|
|
||||||
index 72ef26f1..01e006f9 100644
|
|
||||||
--- a/setup.py
|
|
||||||
+++ b/setup.py
|
|
||||||
@@ -279,6 +279,7 @@ def _install_punica() -> bool:
|
|
||||||
|
|
||||||
|
|
||||||
def get_hipcc_rocm_version():
|
|
||||||
+ return "0.0" # `hipcc --version` misbehaves ("unresolved paths") inside the nix sandbox
|
|
||||||
# Run the hipcc --version command
|
|
||||||
result = subprocess.run(['hipcc', '--version'],
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
--
|
|
||||||
2.45.1
|
|
||||||
|
|
||||||
@ -0,0 +1,12 @@
|
|||||||
|
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
|
||||||
|
index f5a02a5b..e830f987 100644
|
||||||
|
--- a/vllm/model_executor/models/registry.py
|
||||||
|
+++ b/vllm/model_executor/models/registry.py
|
||||||
|
@@ -482,6 +482,7 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T:
|
||||||
|
returned = subprocess.run(
|
||||||
|
[sys.executable, "-m", "vllm.model_executor.models.registry"],
|
||||||
|
input=input_bytes,
|
||||||
|
+ env={'PYTHONPATH': ':'.join(sys.path)},
|
||||||
|
capture_output=True)
|
||||||
|
|
||||||
|
# check if the subprocess is successful
|
||||||
18
pkgs/development/python-modules/vllm/0004-drop-lsmod.patch
Normal file
18
pkgs/development/python-modules/vllm/0004-drop-lsmod.patch
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
--- a/setup.py
|
||||||
|
+++ b/setup.py
|
||||||
|
@@ -340,14 +340,7 @@ def _is_hpu() -> bool:
|
||||||
|
out = subprocess.run(["hl-smi"], capture_output=True, check=True)
|
||||||
|
is_hpu_available = out.returncode == 0
|
||||||
|
except (FileNotFoundError, PermissionError, subprocess.CalledProcessError):
|
||||||
|
- if sys.platform.startswith("linux"):
|
||||||
|
- try:
|
||||||
|
- output = subprocess.check_output(
|
||||||
|
- 'lsmod | grep habanalabs | wc -l', shell=True)
|
||||||
|
- is_hpu_available = int(output) > 0
|
||||||
|
- except (ValueError, FileNotFoundError, PermissionError,
|
||||||
|
- subprocess.CalledProcessError):
|
||||||
|
- pass
|
||||||
|
+ is_hpu_available = False
|
||||||
|
return is_hpu_available
|
||||||
|
|
||||||
|
|
||||||
@ -5,14 +5,21 @@
|
|||||||
buildPythonPackage,
|
buildPythonPackage,
|
||||||
pythonRelaxDepsHook,
|
pythonRelaxDepsHook,
|
||||||
fetchFromGitHub,
|
fetchFromGitHub,
|
||||||
|
symlinkJoin,
|
||||||
|
autoAddDriverRunpath,
|
||||||
|
|
||||||
|
# build system
|
||||||
|
packaging,
|
||||||
|
setuptools,
|
||||||
|
wheel,
|
||||||
|
|
||||||
|
# dependencies
|
||||||
which,
|
which,
|
||||||
ninja,
|
ninja,
|
||||||
cmake,
|
cmake,
|
||||||
packaging,
|
setuptools-scm,
|
||||||
setuptools,
|
|
||||||
torch,
|
torch,
|
||||||
outlines,
|
outlines,
|
||||||
wheel,
|
|
||||||
psutil,
|
psutil,
|
||||||
ray,
|
ray,
|
||||||
pandas,
|
pandas,
|
||||||
@ -21,43 +28,174 @@
|
|||||||
numpy,
|
numpy,
|
||||||
transformers,
|
transformers,
|
||||||
xformers,
|
xformers,
|
||||||
|
xgrammar,
|
||||||
fastapi,
|
fastapi,
|
||||||
uvicorn,
|
uvicorn,
|
||||||
pydantic,
|
pydantic,
|
||||||
aioprometheus,
|
aioprometheus,
|
||||||
|
pynvml,
|
||||||
openai,
|
openai,
|
||||||
pyzmq,
|
pyzmq,
|
||||||
tiktoken,
|
tiktoken,
|
||||||
|
torchaudio,
|
||||||
torchvision,
|
torchvision,
|
||||||
py-cpuinfo,
|
py-cpuinfo,
|
||||||
lm-format-enforcer,
|
lm-format-enforcer,
|
||||||
prometheus-fastapi-instrumentator,
|
prometheus-fastapi-instrumentator,
|
||||||
cupy,
|
cupy,
|
||||||
writeShellScript,
|
gguf,
|
||||||
|
einops,
|
||||||
|
importlib-metadata,
|
||||||
|
partial-json-parser,
|
||||||
|
compressed-tensors,
|
||||||
|
mistral-common,
|
||||||
|
msgspec,
|
||||||
|
numactl,
|
||||||
|
tokenizers,
|
||||||
|
oneDNN,
|
||||||
|
blake3,
|
||||||
|
depyf,
|
||||||
|
opencv-python-headless,
|
||||||
|
|
||||||
config,
|
config,
|
||||||
|
|
||||||
cudaSupport ? config.cudaSupport,
|
cudaSupport ? config.cudaSupport,
|
||||||
cudaPackages ? { },
|
cudaPackages ? { },
|
||||||
|
rocmSupport ? config.rocmSupport,
|
||||||
# Has to be either rocm or cuda, default to the free one
|
|
||||||
rocmSupport ? !config.cudaSupport,
|
|
||||||
rocmPackages ? { },
|
rocmPackages ? { },
|
||||||
gpuTargets ? [ ],
|
gpuTargets ? [ ],
|
||||||
}@args:
|
}@args:
|
||||||
|
|
||||||
let
|
let
|
||||||
|
inherit (lib)
|
||||||
|
lists
|
||||||
|
strings
|
||||||
|
trivial
|
||||||
|
;
|
||||||
|
|
||||||
|
inherit (cudaPackages) cudaFlags;
|
||||||
|
|
||||||
|
shouldUsePkg =
|
||||||
|
pkg: if pkg != null && lib.meta.availableOn stdenv.hostPlatform pkg then pkg else null;
|
||||||
|
|
||||||
|
# see CMakeLists.txt, grepping for GIT_TAG near cutlass
|
||||||
|
# https://github.com/vllm-project/vllm/blob/${version}/CMakeLists.txt
|
||||||
cutlass = fetchFromGitHub {
|
cutlass = fetchFromGitHub {
|
||||||
owner = "NVIDIA";
|
owner = "NVIDIA";
|
||||||
repo = "cutlass";
|
repo = "cutlass";
|
||||||
rev = "refs/tags/v3.5.0";
|
tag = "v3.7.0";
|
||||||
sha256 = "sha256-D/s7eYsa5l/mfx73tE4mnFcTQdYqGmXa9d9TCryw4e4=";
|
hash = "sha256-GUTRXmv3DiM/GN5Bvv2LYovMLKZMlMhoKv4O0g627gs=";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
vllm-flash-attn = stdenv.mkDerivation rec {
|
||||||
|
pname = "vllm-flash-attn";
|
||||||
|
version = "2.6.2";
|
||||||
|
|
||||||
|
# see CMakeLists.txt, grepping for GIT_TAG near vllm-flash-attn
|
||||||
|
# https://github.com/vllm-project/vllm/blob/${version}/CMakeLists.txt
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "vllm-project";
|
||||||
|
repo = "flash-attention";
|
||||||
|
rev = "d4e09037abf588af1ec47d0e966b237ee376876c";
|
||||||
|
hash = "sha256-KFEsZlrwvCgvPzQ/pCLWcnbGq89mWE3yTDdtJSV9MII=";
|
||||||
|
};
|
||||||
|
|
||||||
|
dontConfigure = true;
|
||||||
|
|
||||||
|
# vllm-flash-attn normally relies on `git submodule update` to fetch cutlass
|
||||||
|
buildPhase = ''
|
||||||
|
rm -rf csrc/cutlass
|
||||||
|
ln -sf ${cutlass} csrc/cutlass
|
||||||
|
'';
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
cp -rva . $out
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
cpuSupport = !cudaSupport && !rocmSupport;
|
||||||
|
|
||||||
|
# https://github.com/pytorch/pytorch/blob/v2.4.0/torch/utils/cpp_extension.py#L1953
|
||||||
|
supportedTorchCudaCapabilities =
|
||||||
|
let
|
||||||
|
real = [
|
||||||
|
"3.5"
|
||||||
|
"3.7"
|
||||||
|
"5.0"
|
||||||
|
"5.2"
|
||||||
|
"5.3"
|
||||||
|
"6.0"
|
||||||
|
"6.1"
|
||||||
|
"6.2"
|
||||||
|
"7.0"
|
||||||
|
"7.2"
|
||||||
|
"7.5"
|
||||||
|
"8.0"
|
||||||
|
"8.6"
|
||||||
|
"8.7"
|
||||||
|
"8.9"
|
||||||
|
"9.0"
|
||||||
|
"9.0a"
|
||||||
|
];
|
||||||
|
ptx = lists.map (x: "${x}+PTX") real;
|
||||||
|
in
|
||||||
|
real ++ ptx;
|
||||||
|
|
||||||
|
# NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements
|
||||||
|
# of the first list *from* the second list. That means:
|
||||||
|
# lists.subtractLists a b = b - a
|
||||||
|
|
||||||
|
# For CUDA
|
||||||
|
supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities;
|
||||||
|
unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities;
|
||||||
|
|
||||||
|
isCudaJetson = cudaSupport && cudaPackages.cudaFlags.isJetsonBuild;
|
||||||
|
|
||||||
|
# Use trivial.warnIf to print a warning if any unsupported GPU targets are specified.
|
||||||
|
gpuArchWarner =
|
||||||
|
supported: unsupported:
|
||||||
|
trivial.throwIf (supported == [ ]) (
|
||||||
|
"No supported GPU targets specified. Requested GPU targets: "
|
||||||
|
+ strings.concatStringsSep ", " unsupported
|
||||||
|
) supported;
|
||||||
|
|
||||||
|
# Create the gpuTargetString.
|
||||||
|
gpuTargetString = strings.concatStringsSep ";" (
|
||||||
|
if gpuTargets != [ ] then
|
||||||
|
# If gpuTargets is specified, it always takes priority.
|
||||||
|
gpuTargets
|
||||||
|
else if cudaSupport then
|
||||||
|
gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities
|
||||||
|
else if rocmSupport then
|
||||||
|
rocmPackages.clr.gpuTargets
|
||||||
|
else
|
||||||
|
throw "No GPU targets specified"
|
||||||
|
);
|
||||||
|
|
||||||
|
mergedCudaLibraries = with cudaPackages; [
|
||||||
|
cuda_cudart # cuda_runtime.h, -lcudart
|
||||||
|
cuda_cccl
|
||||||
|
libcusparse # cusparse.h
|
||||||
|
libcusolver # cusolverDn.h
|
||||||
|
cuda_nvtx
|
||||||
|
cuda_nvrtc
|
||||||
|
libcublas
|
||||||
|
];
|
||||||
|
|
||||||
|
# Some packages are not available on all platforms
|
||||||
|
nccl = shouldUsePkg (cudaPackages.nccl or null);
|
||||||
|
|
||||||
|
getAllOutputs = p: [
|
||||||
|
(lib.getBin p)
|
||||||
|
(lib.getLib p)
|
||||||
|
(lib.getDev p)
|
||||||
|
];
|
||||||
|
|
||||||
in
|
in
|
||||||
|
|
||||||
buildPythonPackage rec {
|
buildPythonPackage rec {
|
||||||
pname = "vllm";
|
pname = "vllm";
|
||||||
version = "0.6.2";
|
version = "0.7.1";
|
||||||
pyproject = true;
|
pyproject = true;
|
||||||
|
|
||||||
stdenv = if cudaSupport then cudaPackages.backendStdenv else args.stdenv;
|
stdenv = if cudaSupport then cudaPackages.backendStdenv else args.stdenv;
|
||||||
@ -65,30 +203,54 @@ buildPythonPackage rec {
|
|||||||
src = fetchFromGitHub {
|
src = fetchFromGitHub {
|
||||||
owner = "vllm-project";
|
owner = "vllm-project";
|
||||||
repo = pname;
|
repo = pname;
|
||||||
rev = "refs/tags/v${version}";
|
tag = "v${version}";
|
||||||
hash = "sha256-zUkqAPPhDRdN9rDQ2biCl1B+trV0xIHXub++v9zsQGo=";
|
hash = "sha256-CImXKMEv+jHqngvcr8W6fQLiCo1mqmcZ0Ho0bfAgfbg=";
|
||||||
};
|
};
|
||||||
|
|
||||||
patches = [
|
patches = [
|
||||||
./0001-setup.py-don-t-ask-for-hipcc-version.patch
|
|
||||||
./0002-setup.py-nix-support-respect-cmakeFlags.patch
|
./0002-setup.py-nix-support-respect-cmakeFlags.patch
|
||||||
|
./0003-propagate-pythonpath.patch
|
||||||
|
./0004-drop-lsmod.patch
|
||||||
];
|
];
|
||||||
|
|
||||||
# Ignore the python version check because it hard-codes minor versions and
|
# Ignore the python version check because it hard-codes minor versions and
|
||||||
# lags behind `ray`'s python interpreter support
|
# lags behind `ray`'s python interpreter support
|
||||||
postPatch = ''
|
postPatch =
|
||||||
substituteInPlace CMakeLists.txt \
|
''
|
||||||
--replace-fail \
|
substituteInPlace CMakeLists.txt \
|
||||||
'set(PYTHON_SUPPORTED_VERSIONS' \
|
--replace-fail \
|
||||||
'set(PYTHON_SUPPORTED_VERSIONS "${lib.versions.majorMinor python.version}"'
|
'set(PYTHON_SUPPORTED_VERSIONS' \
|
||||||
'';
|
'set(PYTHON_SUPPORTED_VERSIONS "${lib.versions.majorMinor python.version}"'
|
||||||
|
|
||||||
nativeBuildInputs = [
|
# Relax torch dependency manually because the nonstandard requirements format
|
||||||
cmake
|
# is not caught by pythonRelaxDeps
|
||||||
ninja
|
substituteInPlace requirements*.txt pyproject.toml \
|
||||||
pythonRelaxDepsHook
|
--replace-warn 'torch==2.5.1' 'torch==${lib.getVersion torch}' \
|
||||||
which
|
--replace-warn 'torch == 2.5.1' 'torch == ${lib.getVersion torch}'
|
||||||
] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];
|
''
|
||||||
|
+ lib.optionalString (nccl == null) ''
|
||||||
|
# On platforms where NCCL is not supported (e.g. Jetson), substitute Gloo (provided by Torch)
|
||||||
|
substituteInPlace vllm/distributed/parallel_state.py \
|
||||||
|
--replace-fail '"nccl"' '"gloo"'
|
||||||
|
'';
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
cmake
|
||||||
|
ninja
|
||||||
|
pythonRelaxDepsHook
|
||||||
|
which
|
||||||
|
]
|
||||||
|
++ lib.optionals rocmSupport [
|
||||||
|
rocmPackages.hipcc
|
||||||
|
]
|
||||||
|
++ lib.optionals cudaSupport [
|
||||||
|
cudaPackages.cuda_nvcc
|
||||||
|
autoAddDriverRunpath
|
||||||
|
]
|
||||||
|
++ lib.optionals isCudaJetson [
|
||||||
|
cudaPackages.autoAddCudaCompatRunpath
|
||||||
|
];
|
||||||
|
|
||||||
build-system = [
|
build-system = [
|
||||||
packaging
|
packaging
|
||||||
@ -97,18 +259,22 @@ buildPythonPackage rec {
|
|||||||
];
|
];
|
||||||
|
|
||||||
buildInputs =
|
buildInputs =
|
||||||
(lib.optionals cudaSupport (
|
[
|
||||||
with cudaPackages;
|
setuptools-scm
|
||||||
[
|
torch
|
||||||
cuda_cudart # cuda_runtime.h, -lcudart
|
]
|
||||||
cuda_cccl
|
++ (lib.optionals cpuSupport ([
|
||||||
libcusparse # cusparse.h
|
numactl
|
||||||
libcusolver # cusolverDn.h
|
oneDNN
|
||||||
cuda_nvcc
|
]))
|
||||||
cuda_nvtx
|
++ (
|
||||||
libcublas
|
lib.optionals cudaSupport mergedCudaLibraries
|
||||||
]
|
++ (with cudaPackages; [
|
||||||
))
|
nccl
|
||||||
|
cudnn
|
||||||
|
libcufile
|
||||||
|
])
|
||||||
|
)
|
||||||
++ (lib.optionals rocmSupport (
|
++ (lib.optionals rocmSupport (
|
||||||
with rocmPackages;
|
with rocmPackages;
|
||||||
[
|
[
|
||||||
@ -123,10 +289,13 @@ buildPythonPackage rec {
|
|||||||
dependencies =
|
dependencies =
|
||||||
[
|
[
|
||||||
aioprometheus
|
aioprometheus
|
||||||
|
blake3
|
||||||
|
depyf
|
||||||
fastapi
|
fastapi
|
||||||
lm-format-enforcer
|
lm-format-enforcer
|
||||||
numpy
|
numpy
|
||||||
openai
|
openai
|
||||||
|
opencv-python-headless
|
||||||
outlines
|
outlines
|
||||||
pandas
|
pandas
|
||||||
prometheus-fastapi-instrumentator
|
prometheus-fastapi-instrumentator
|
||||||
@ -138,27 +307,64 @@ buildPythonPackage rec {
|
|||||||
ray
|
ray
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tiktoken
|
tiktoken
|
||||||
|
tokenizers
|
||||||
|
msgspec
|
||||||
|
gguf
|
||||||
|
einops
|
||||||
|
importlib-metadata
|
||||||
|
partial-json-parser
|
||||||
|
compressed-tensors
|
||||||
|
mistral-common
|
||||||
torch
|
torch
|
||||||
|
torchaudio
|
||||||
torchvision
|
torchvision
|
||||||
transformers
|
transformers
|
||||||
uvicorn
|
uvicorn
|
||||||
xformers
|
xformers
|
||||||
|
xgrammar
|
||||||
]
|
]
|
||||||
++ uvicorn.optional-dependencies.standard
|
++ uvicorn.optional-dependencies.standard
|
||||||
++ aioprometheus.optional-dependencies.starlette
|
++ aioprometheus.optional-dependencies.starlette
|
||||||
++ lib.optionals cudaSupport [
|
++ lib.optionals cudaSupport [
|
||||||
cupy
|
cupy
|
||||||
|
pynvml
|
||||||
];
|
];
|
||||||
|
|
||||||
dontUseCmakeConfigure = true;
|
dontUseCmakeConfigure = true;
|
||||||
cmakeFlags = [ (lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}") ];
|
cmakeFlags =
|
||||||
|
[
|
||||||
|
(lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}")
|
||||||
|
(lib.cmakeFeature "VLLM_FLASH_ATTN_SRC_DIR" "${lib.getDev vllm-flash-attn}")
|
||||||
|
]
|
||||||
|
++ lib.optionals cudaSupport [
|
||||||
|
(lib.cmakeFeature "TORCH_CUDA_ARCH_LIST" "${gpuTargetString}")
|
||||||
|
(lib.cmakeFeature "CUTLASS_NVCC_ARCHS_ENABLED" "${cudaPackages.cudaFlags.cmakeCudaArchitecturesString
|
||||||
|
}")
|
||||||
|
(lib.cmakeFeature "CUDA_TOOLKIT_ROOT_DIR" "${symlinkJoin {
|
||||||
|
name = "cuda-merged-${cudaPackages.cudaVersion}";
|
||||||
|
paths = builtins.concatMap getAllOutputs mergedCudaLibraries;
|
||||||
|
}}")
|
||||||
|
(lib.cmakeFeature "CAFFE2_USE_CUDNN" "ON")
|
||||||
|
(lib.cmakeFeature "CAFFE2_USE_CUFILE" "ON")
|
||||||
|
(lib.cmakeFeature "CUTLASS_ENABLE_CUBLAS" "ON")
|
||||||
|
]
|
||||||
|
++ lib.optionals cpuSupport [
|
||||||
|
(lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_ONEDNN" "${lib.getDev oneDNN}")
|
||||||
|
];
|
||||||
|
|
||||||
env =
|
env =
|
||||||
lib.optionalAttrs cudaSupport { CUDA_HOME = "${lib.getDev cudaPackages.cuda_nvcc}"; }
|
lib.optionalAttrs cudaSupport {
|
||||||
|
VLLM_TARGET_DEVICE = "cuda";
|
||||||
|
CUDA_HOME = "${lib.getDev cudaPackages.cuda_nvcc}";
|
||||||
|
}
|
||||||
// lib.optionalAttrs rocmSupport {
|
// lib.optionalAttrs rocmSupport {
|
||||||
|
VLLM_TARGET_DEVICE = "rocm";
|
||||||
# Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
|
# Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
|
||||||
PYTORCH_ROCM_ARCH = lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets;
|
PYTORCH_ROCM_ARCH = lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets;
|
||||||
ROCM_HOME = "${rocmPackages.clr}";
|
ROCM_HOME = "${rocmPackages.clr}";
|
||||||
|
}
|
||||||
|
// lib.optionalAttrs cpuSupport {
|
||||||
|
VLLM_TARGET_DEVICE = "cpu";
|
||||||
};
|
};
|
||||||
|
|
||||||
pythonRelaxDeps = true;
|
pythonRelaxDeps = true;
|
||||||
@ -174,8 +380,8 @@ buildPythonPackage rec {
|
|||||||
happysalada
|
happysalada
|
||||||
lach
|
lach
|
||||||
];
|
];
|
||||||
# RuntimeError: Unknown runtime environment
|
|
||||||
broken = true;
|
# CPU support relies on unpackaged dependency `intel_extension_for_pytorch`
|
||||||
# broken = !cudaSupport && !rocmSupport;
|
broken = cpuSupport;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
77
pkgs/development/python-modules/xgrammar/default.nix
Normal file
77
pkgs/development/python-modules/xgrammar/default.nix
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
{
|
||||||
|
lib,
|
||||||
|
stdenv,
|
||||||
|
buildPythonPackage,
|
||||||
|
fetchPypi,
|
||||||
|
python,
|
||||||
|
pythonOlder,
|
||||||
|
pythonAtLeast,
|
||||||
|
pydantic,
|
||||||
|
sentencepiece,
|
||||||
|
tiktoken,
|
||||||
|
torch,
|
||||||
|
transformers,
|
||||||
|
triton,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
pyShortVersion = "cp" + builtins.replaceStrings [ "." ] [ "" ] python.pythonVersion;
|
||||||
|
platforms = rec {
|
||||||
|
aarch64-darwin = "macosx_13_0_arm64";
|
||||||
|
x86_64-darwin = "macosx_10_15_x86_64";
|
||||||
|
x86_64-linux = "manylinux_2_27_x86_64.manylinux_2_28_x86_64";
|
||||||
|
};
|
||||||
|
platform = platforms.${stdenv.system} or (throw "Unsupported system: ${stdenv.system}");
|
||||||
|
# hashes retrieved via the following command
|
||||||
|
# curl https://pypi.org/pypi/xgrammar/${version}/json | jq -r '.urls[] | "\(.digests.sha256) \(.filename)"'
|
||||||
|
hashes = rec {
|
||||||
|
cp39-aarch64-darwin = "12dd579a7073c14981e01aeee566d20e60001bf90af23024e0e6692a770ff535";
|
||||||
|
cp39-x86_64-darwin = "035ec93306543b99bf2141dcc7f1a6dd0c255753fc8b5a2b5f3289a59fed8e37";
|
||||||
|
cp39-x86_64-linux = "3b3975dcf4b3ed7b16bbe3c068738b09847f841793e1c5e1b4a07dff36bbdc37";
|
||||||
|
cp310-aarch64-darwin = "93bb6c10cbdf1a2bda3b458d97b47436657d780f98dccf3d266e17e13568c0a9";
|
||||||
|
cp310-x86_64-darwin = "5ed31db2669dc499d9d29bb16f30b3395332ff9d0fb80b759697190a5ef5258b";
|
||||||
|
cp310-x86_64-linux = "9c6f571121e4af45e3b5dc55f3dadd751cffff1f85f1c6fc5c4276db2bbed222";
|
||||||
|
cp311-aarch64-darwin = "b293443725eddad31cf7b407bb24d5f3156c4b12a2c8041743cb7068a69fadcb";
|
||||||
|
cp311-x86_64-darwin = "b2106bceb2ce313628af915f2c2b1c9865612026dd3c9feddbfcc69e4ee6c971";
|
||||||
|
cp311-x86_64-linux = "7934c968371d55759cac35be3b218cdf4b13f323f535ea0faa233240bab803b9";
|
||||||
|
cp312-aarch64-darwin = "561f8d4307db8cf5d3c3b3ff46eda6d95379f6e801278dbf9153a9d5e8b6126c";
|
||||||
|
cp312-x86_64-darwin = "6ac3cbb0a82a3a9d07f0739f63b2e26cbef7855149d236057dcc7fee74b37970";
|
||||||
|
cp312-x86_64-linux = "1854d0fe6b908a3d2d42251a62e627224dbf6035a4322b844b1b5a277e3d0461";
|
||||||
|
};
|
||||||
|
hash =
|
||||||
|
hashes."${pyShortVersion}-${stdenv.system}"
|
||||||
|
or (throw "Unsupported Python version: ${python.pythonVersion}");
|
||||||
|
in
|
||||||
|
buildPythonPackage rec {
|
||||||
|
pname = "xgrammar";
|
||||||
|
version = "0.1.11";
|
||||||
|
format = "wheel";
|
||||||
|
|
||||||
|
disabled = pythonOlder "3.9" || pythonAtLeast "3.13";
|
||||||
|
|
||||||
|
src = fetchPypi {
|
||||||
|
inherit pname version format;
|
||||||
|
dist = pyShortVersion;
|
||||||
|
python = pyShortVersion;
|
||||||
|
abi = pyShortVersion;
|
||||||
|
platform = platform;
|
||||||
|
sha256 = hash;
|
||||||
|
};
|
||||||
|
|
||||||
|
pythonImportCheck = [ "xgrammar" ];
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
pydantic
|
||||||
|
sentencepiece
|
||||||
|
tiktoken
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
|
triton
|
||||||
|
];
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "Efficient, Flexible and Portable Structured Generation";
|
||||||
|
homepage = "https://xgrammar.mlc.ai";
|
||||||
|
license = licenses.asl20;
|
||||||
|
};
|
||||||
|
}
|
||||||
@ -10744,6 +10744,8 @@ with pkgs;
|
|||||||
openexr = openexr_3;
|
openexr = openexr_3;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
vllm = with python3Packages; toPythonApplication vllm;
|
||||||
|
|
||||||
vte-gtk4 = vte.override {
|
vte-gtk4 = vte.override {
|
||||||
gtkVersion = "4";
|
gtkVersion = "4";
|
||||||
};
|
};
|
||||||
|
|||||||
@ -18365,6 +18365,8 @@ self: super: with self; {
|
|||||||
inherit (pkgs) xgboost;
|
inherit (pkgs) xgboost;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
xgrammar = callPackage ../development/python-modules/xgrammar { };
|
||||||
|
|
||||||
xhtml2pdf = callPackage ../development/python-modules/xhtml2pdf { };
|
xhtml2pdf = callPackage ../development/python-modules/xhtml2pdf { };
|
||||||
|
|
||||||
xiaomi-ble = callPackage ../development/python-modules/xiaomi-ble { };
|
xiaomi-ble = callPackage ../development/python-modules/xiaomi-ble { };
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user