300 lines
9.2 KiB
Nix
300 lines
9.2 KiB
Nix
{ lib }:
|
||
{
|
||
|
||
/**
|
||
Attribute set of supported CUDA capability mapped to information about that capability.
|
||
|
||
NOTE: For more on baseline, architecture-specific, and family-specific feature sets, see
|
||
https://developer.nvidia.com/blog/nvidia-blackwell-and-nvidia-cuda-12-9-introduce-family-specific-architecture-features.
|
||
|
||
NOTE: For information on when support for a given architecture was added, see
|
||
https://docs.nvidia.com/cuda/parallel-thread-execution/#release-notes
|
||
|
||
NOTE: For baseline feature sets, `dontDefaultAfterCudaMajorMinorVersion` is generally set to the CUDA release
|
||
immediately prior to TensorRT removing support for that architecture.
|
||
|
||
Many thanks to Arnon Shimoni for maintaining a list of these architectures and capabilities.
|
||
Without your work, this would have been much more difficult.
|
||
https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
|
||
|
||
# Type
|
||
|
||
```
|
||
cudaCapabilityToInfo ::
|
||
AttrSet
|
||
CudaCapability
|
||
{ archName :: String
|
||
, cudaCapability :: CudaCapability
|
||
, isJetson :: Bool
|
||
, isArchitectureSpecific :: Bool
|
||
, isFamilySpecific :: Bool
|
||
, minCudaMajorMinorVersion :: MajorMinorVersion
|
||
, maxCudaMajorMinorVersion :: MajorMinorVersion
|
||
, dontDefaultAfterCudaMajorMinorVersion :: Null | MajorMinorVersion
|
||
}
|
||
```
|
||
|
||
`archName`
|
||
|
||
: The name of the microarchitecture
|
||
|
||
`cudaCapability`
|
||
|
||
: The CUDA capability
|
||
|
||
`isJetson`
|
||
|
||
: Whether this capability is part of NVIDIA's line of Jetson embedded computers. This field is notable
|
||
because it tells us what architecture to build for (as Jetson devices are aarch64).
|
||
More on Jetson devices here: https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/
|
||
NOTE: These architectures are only built upon request.
|
||
|
||
`isArchitectureSpecific`
|
||
|
||
: Whether this capability is an architecture-specific feature set.
|
||
NOTE: These architectures are only built upon request.
|
||
|
||
`isFamilySpecific`
|
||
|
||
: Whether this capability is a family-specific feature set.
|
||
NOTE: These architectures are only built upon request.
|
||
|
||
`minCudaMajorMinorVersion`
|
||
|
||
: The minimum (inclusive) CUDA version that supports this capability.
|
||
|
||
`maxCudaMajorMinorVersion`
|
||
|
||
: The maximum (exclusive) CUDA version that supports this capability.
|
||
`null` means there is no maximum.
|
||
|
||
`dontDefaultAfterCudaMajorMinorVersion`
|
||
|
||
: The CUDA version after which to exclude this capability from the list of default capabilities we build.
|
||
*/
|
||
cudaCapabilityToInfo =
|
||
lib.mapAttrs
|
||
(
|
||
cudaCapability:
|
||
# Supplies default values.
|
||
{
|
||
archName,
|
||
isJetson ? false,
|
||
isArchitectureSpecific ? (lib.hasSuffix "a" cudaCapability),
|
||
isFamilySpecific ? (lib.hasSuffix "f" cudaCapability),
|
||
minCudaMajorMinorVersion,
|
||
maxCudaMajorMinorVersion ? null,
|
||
dontDefaultAfterCudaMajorMinorVersion ? null,
|
||
}:
|
||
{
|
||
inherit
|
||
archName
|
||
cudaCapability
|
||
isJetson
|
||
isArchitectureSpecific
|
||
isFamilySpecific
|
||
minCudaMajorMinorVersion
|
||
maxCudaMajorMinorVersion
|
||
dontDefaultAfterCudaMajorMinorVersion
|
||
;
|
||
}
|
||
)
|
||
{
|
||
# Tesla K40
|
||
"3.5" = {
|
||
archName = "Kepler";
|
||
minCudaMajorMinorVersion = "10.0";
|
||
dontDefaultAfterCudaMajorMinorVersion = "11.0";
|
||
maxCudaMajorMinorVersion = "11.8";
|
||
};
|
||
|
||
# Tesla K80
|
||
"3.7" = {
|
||
archName = "Kepler";
|
||
minCudaMajorMinorVersion = "10.0";
|
||
dontDefaultAfterCudaMajorMinorVersion = "11.0";
|
||
maxCudaMajorMinorVersion = "11.8";
|
||
};
|
||
|
||
# Tesla/Quadro M series
|
||
"5.0" = {
|
||
archName = "Maxwell";
|
||
minCudaMajorMinorVersion = "10.0";
|
||
dontDefaultAfterCudaMajorMinorVersion = "11.0";
|
||
};
|
||
|
||
# Quadro M6000 , GeForce 900, GTX-970, GTX-980, GTX Titan X
|
||
"5.2" = {
|
||
archName = "Maxwell";
|
||
minCudaMajorMinorVersion = "10.0";
|
||
dontDefaultAfterCudaMajorMinorVersion = "11.0";
|
||
};
|
||
|
||
# Quadro GP100, Tesla P100, DGX-1 (Generic Pascal)
|
||
"6.0" = {
|
||
archName = "Pascal";
|
||
minCudaMajorMinorVersion = "10.0";
|
||
# Removed from TensorRT 10.0, which corresponds to CUDA 12.4 release.
|
||
# https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-1001/support-matrix/index.html
|
||
dontDefaultAfterCudaMajorMinorVersion = "12.3";
|
||
};
|
||
|
||
# GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030 (GP108), GT 1010 (GP108) Titan Xp, Tesla
|
||
# P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
|
||
"6.1" = {
|
||
archName = "Pascal";
|
||
minCudaMajorMinorVersion = "10.0";
|
||
# Removed from TensorRT 10.0, which corresponds to CUDA 12.4 release.
|
||
# https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-1001/support-matrix/index.html
|
||
dontDefaultAfterCudaMajorMinorVersion = "12.3";
|
||
};
|
||
|
||
# DGX-1 with Volta, Tesla V100, GTX 1180 (GV104), Titan V, Quadro GV100
|
||
"7.0" = {
|
||
archName = "Volta";
|
||
minCudaMajorMinorVersion = "10.0";
|
||
# Removed from TensorRT 10.5, which corresponds to CUDA 12.6 release.
|
||
# https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-1050/support-matrix/index.html
|
||
dontDefaultAfterCudaMajorMinorVersion = "12.5";
|
||
};
|
||
|
||
# Jetson AGX Xavier, Drive AGX Pegasus, Xavier NX
|
||
"7.2" = {
|
||
archName = "Volta";
|
||
minCudaMajorMinorVersion = "10.0";
|
||
# Note: without `cuda_compat`, maxCudaMajorMinorVersion is 11.8
|
||
# https://docs.nvidia.com/cuda/cuda-for-tegra-appnote/index.html#deployment-considerations-for-cuda-upgrade-package
|
||
maxCudaMajorMinorVersion = "12.2";
|
||
isJetson = true;
|
||
};
|
||
|
||
# GTX/RTX Turing – GTX 1660 Ti, RTX 2060, RTX 2070, RTX 2080, Titan RTX, Quadro RTX 4000,
|
||
# Quadro RTX 5000, Quadro RTX 6000, Quadro RTX 8000, Quadro T1000/T2000, Tesla T4
|
||
"7.5" = {
|
||
archName = "Turing";
|
||
minCudaMajorMinorVersion = "10.0";
|
||
};
|
||
|
||
# NVIDIA A100 (the name “Tesla” has been dropped – GA100), NVIDIA DGX-A100
|
||
"8.0" = {
|
||
archName = "Ampere";
|
||
minCudaMajorMinorVersion = "11.2";
|
||
};
|
||
|
||
# Tesla GA10x cards, RTX Ampere – RTX 3080, GA102 – RTX 3090, RTX A2000, A3000, RTX A4000,
|
||
# A5000, A6000, NVIDIA A40, GA106 – RTX 3060, GA104 – RTX 3070, GA107 – RTX 3050, RTX A10, RTX
|
||
# A16, RTX A40, A2 Tensor Core GPU
|
||
"8.6" = {
|
||
archName = "Ampere";
|
||
minCudaMajorMinorVersion = "11.2";
|
||
};
|
||
|
||
# Jetson AGX Orin and Drive AGX Orin only
|
||
"8.7" = {
|
||
archName = "Ampere";
|
||
minCudaMajorMinorVersion = "11.5";
|
||
isJetson = true;
|
||
};
|
||
|
||
# NVIDIA GeForce RTX 4090, RTX 4080, RTX 6000, Tesla L40
|
||
"8.9" = {
|
||
archName = "Ada";
|
||
minCudaMajorMinorVersion = "11.8";
|
||
};
|
||
|
||
# NVIDIA H100 (GH100)
|
||
"9.0" = {
|
||
archName = "Hopper";
|
||
minCudaMajorMinorVersion = "11.8";
|
||
};
|
||
|
||
"9.0a" = {
|
||
archName = "Hopper";
|
||
minCudaMajorMinorVersion = "12.0";
|
||
};
|
||
|
||
# NVIDIA B100
|
||
"10.0" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.7";
|
||
};
|
||
|
||
"10.0a" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.7";
|
||
};
|
||
|
||
"10.0f" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.9";
|
||
};
|
||
|
||
# NVIDIA Jetson Thor Blackwell
|
||
"10.1" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.7";
|
||
isJetson = true;
|
||
};
|
||
|
||
"10.1a" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.7";
|
||
isJetson = true;
|
||
};
|
||
|
||
"10.1f" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.9";
|
||
isJetson = true;
|
||
};
|
||
|
||
# NVIDIA ???
|
||
"10.3" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.9";
|
||
};
|
||
|
||
"10.3a" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.9";
|
||
};
|
||
|
||
"10.3f" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.9";
|
||
};
|
||
|
||
# NVIDIA GeForce RTX 5090 (GB202) etc.
|
||
"12.0" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.8";
|
||
};
|
||
|
||
"12.0a" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.8";
|
||
};
|
||
|
||
"12.0f" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.9";
|
||
};
|
||
|
||
# NVIDIA ???
|
||
"12.1" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.9";
|
||
};
|
||
|
||
"12.1a" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.9";
|
||
};
|
||
|
||
"12.1f" = {
|
||
archName = "Blackwell";
|
||
minCudaMajorMinorVersion = "12.9";
|
||
};
|
||
};
|
||
}
|