Connor Baker 688e14d21a _cuda: introduce to organize CUDA package set backbone
Signed-off-by: Connor Baker <ConnorBaker01@gmail.com>
2025-05-27 15:05:42 +00:00

300 lines
9.2 KiB
Nix
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{ lib }:
{
/**
Attribute set of supported CUDA capability mapped to information about that capability.
NOTE: For more on baseline, architecture-specific, and family-specific feature sets, see
https://developer.nvidia.com/blog/nvidia-blackwell-and-nvidia-cuda-12-9-introduce-family-specific-architecture-features.
NOTE: For information on when support for a given architecture was added, see
https://docs.nvidia.com/cuda/parallel-thread-execution/#release-notes
NOTE: For baseline feature sets, `dontDefaultAfterCudaMajorMinorVersion` is generally set to the CUDA release
immediately prior to TensorRT removing support for that architecture.
Many thanks to Arnon Shimoni for maintaining a list of these architectures and capabilities.
Without your work, this would have been much more difficult.
https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
# Type
```
cudaCapabilityToInfo ::
AttrSet
CudaCapability
{ archName :: String
, cudaCapability :: CudaCapability
, isJetson :: Bool
, isArchitectureSpecific :: Bool
, isFamilySpecific :: Bool
, minCudaMajorMinorVersion :: MajorMinorVersion
, maxCudaMajorMinorVersion :: MajorMinorVersion
, dontDefaultAfterCudaMajorMinorVersion :: Null | MajorMinorVersion
}
```
`archName`
: The name of the microarchitecture
`cudaCapability`
: The CUDA capability
`isJetson`
: Whether this capability is part of NVIDIA's line of Jetson embedded computers. This field is notable
because it tells us what architecture to build for (as Jetson devices are aarch64).
More on Jetson devices here: https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/
NOTE: These architectures are only built upon request.
`isArchitectureSpecific`
: Whether this capability is an architecture-specific feature set.
NOTE: These architectures are only built upon request.
`isFamilySpecific`
: Whether this capability is a family-specific feature set.
NOTE: These architectures are only built upon request.
`minCudaMajorMinorVersion`
: The minimum (inclusive) CUDA version that supports this capability.
`maxCudaMajorMinorVersion`
: The maximum (exclusive) CUDA version that supports this capability.
`null` means there is no maximum.
`dontDefaultAfterCudaMajorMinorVersion`
: The CUDA version after which to exclude this capability from the list of default capabilities we build.
*/
cudaCapabilityToInfo =
lib.mapAttrs
(
cudaCapability:
# Supplies default values.
{
archName,
isJetson ? false,
isArchitectureSpecific ? (lib.hasSuffix "a" cudaCapability),
isFamilySpecific ? (lib.hasSuffix "f" cudaCapability),
minCudaMajorMinorVersion,
maxCudaMajorMinorVersion ? null,
dontDefaultAfterCudaMajorMinorVersion ? null,
}:
{
inherit
archName
cudaCapability
isJetson
isArchitectureSpecific
isFamilySpecific
minCudaMajorMinorVersion
maxCudaMajorMinorVersion
dontDefaultAfterCudaMajorMinorVersion
;
}
)
{
# Tesla K40
"3.5" = {
archName = "Kepler";
minCudaMajorMinorVersion = "10.0";
dontDefaultAfterCudaMajorMinorVersion = "11.0";
maxCudaMajorMinorVersion = "11.8";
};
# Tesla K80
"3.7" = {
archName = "Kepler";
minCudaMajorMinorVersion = "10.0";
dontDefaultAfterCudaMajorMinorVersion = "11.0";
maxCudaMajorMinorVersion = "11.8";
};
# Tesla/Quadro M series
"5.0" = {
archName = "Maxwell";
minCudaMajorMinorVersion = "10.0";
dontDefaultAfterCudaMajorMinorVersion = "11.0";
};
# Quadro M6000 , GeForce 900, GTX-970, GTX-980, GTX Titan X
"5.2" = {
archName = "Maxwell";
minCudaMajorMinorVersion = "10.0";
dontDefaultAfterCudaMajorMinorVersion = "11.0";
};
# Quadro GP100, Tesla P100, DGX-1 (Generic Pascal)
"6.0" = {
archName = "Pascal";
minCudaMajorMinorVersion = "10.0";
# Removed from TensorRT 10.0, which corresponds to CUDA 12.4 release.
# https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-1001/support-matrix/index.html
dontDefaultAfterCudaMajorMinorVersion = "12.3";
};
# GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030 (GP108), GT 1010 (GP108) Titan Xp, Tesla
# P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
"6.1" = {
archName = "Pascal";
minCudaMajorMinorVersion = "10.0";
# Removed from TensorRT 10.0, which corresponds to CUDA 12.4 release.
# https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-1001/support-matrix/index.html
dontDefaultAfterCudaMajorMinorVersion = "12.3";
};
# DGX-1 with Volta, Tesla V100, GTX 1180 (GV104), Titan V, Quadro GV100
"7.0" = {
archName = "Volta";
minCudaMajorMinorVersion = "10.0";
# Removed from TensorRT 10.5, which corresponds to CUDA 12.6 release.
# https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-1050/support-matrix/index.html
dontDefaultAfterCudaMajorMinorVersion = "12.5";
};
# Jetson AGX Xavier, Drive AGX Pegasus, Xavier NX
"7.2" = {
archName = "Volta";
minCudaMajorMinorVersion = "10.0";
# Note: without `cuda_compat`, maxCudaMajorMinorVersion is 11.8
# https://docs.nvidia.com/cuda/cuda-for-tegra-appnote/index.html#deployment-considerations-for-cuda-upgrade-package
maxCudaMajorMinorVersion = "12.2";
isJetson = true;
};
# GTX/RTX Turing GTX 1660 Ti, RTX 2060, RTX 2070, RTX 2080, Titan RTX, Quadro RTX 4000,
# Quadro RTX 5000, Quadro RTX 6000, Quadro RTX 8000, Quadro T1000/T2000, Tesla T4
"7.5" = {
archName = "Turing";
minCudaMajorMinorVersion = "10.0";
};
# NVIDIA A100 (the name “Tesla” has been dropped GA100), NVIDIA DGX-A100
"8.0" = {
archName = "Ampere";
minCudaMajorMinorVersion = "11.2";
};
# Tesla GA10x cards, RTX Ampere RTX 3080, GA102 RTX 3090, RTX A2000, A3000, RTX A4000,
# A5000, A6000, NVIDIA A40, GA106 RTX 3060, GA104 RTX 3070, GA107 RTX 3050, RTX A10, RTX
# A16, RTX A40, A2 Tensor Core GPU
"8.6" = {
archName = "Ampere";
minCudaMajorMinorVersion = "11.2";
};
# Jetson AGX Orin and Drive AGX Orin only
"8.7" = {
archName = "Ampere";
minCudaMajorMinorVersion = "11.5";
isJetson = true;
};
# NVIDIA GeForce RTX 4090, RTX 4080, RTX 6000, Tesla L40
"8.9" = {
archName = "Ada";
minCudaMajorMinorVersion = "11.8";
};
# NVIDIA H100 (GH100)
"9.0" = {
archName = "Hopper";
minCudaMajorMinorVersion = "11.8";
};
"9.0a" = {
archName = "Hopper";
minCudaMajorMinorVersion = "12.0";
};
# NVIDIA B100
"10.0" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.7";
};
"10.0a" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.7";
};
"10.0f" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.9";
};
# NVIDIA Jetson Thor Blackwell
"10.1" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.7";
isJetson = true;
};
"10.1a" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.7";
isJetson = true;
};
"10.1f" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.9";
isJetson = true;
};
# NVIDIA ???
"10.3" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.9";
};
"10.3a" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.9";
};
"10.3f" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.9";
};
# NVIDIA GeForce RTX 5090 (GB202) etc.
"12.0" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.8";
};
"12.0a" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.8";
};
"12.0f" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.9";
};
# NVIDIA ???
"12.1" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.9";
};
"12.1a" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.9";
};
"12.1f" = {
archName = "Blackwell";
minCudaMajorMinorVersion = "12.9";
};
};
}