2025-05-20 09:32:03 -07:00
|
|
|
|
{ lib }:
|
2025-05-09 21:54:54 +00:00
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
Attribute set of supported CUDA capability mapped to information about that capability.
|
|
|
|
|
|
|
|
|
|
|
|
NOTE: For more on baseline, architecture-specific, and family-specific feature sets, see
|
|
|
|
|
|
https://developer.nvidia.com/blog/nvidia-blackwell-and-nvidia-cuda-12-9-introduce-family-specific-architecture-features.
|
|
|
|
|
|
|
|
|
|
|
|
NOTE: For information on when support for a given architecture was added, see
|
|
|
|
|
|
https://docs.nvidia.com/cuda/parallel-thread-execution/#release-notes
|
|
|
|
|
|
|
|
|
|
|
|
NOTE: For baseline feature sets, `dontDefaultAfterCudaMajorMinorVersion` is generally set to the CUDA release
|
|
|
|
|
|
immediately prior to TensorRT removing support for that architecture.
|
|
|
|
|
|
|
|
|
|
|
|
Many thanks to Arnon Shimoni for maintaining a list of these architectures and capabilities.
|
|
|
|
|
|
Without your work, this would have been much more difficult.
|
|
|
|
|
|
https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
|
|
|
|
|
|
|
|
|
|
|
|
# Type
|
|
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
cudaCapabilityToInfo ::
|
|
|
|
|
|
AttrSet
|
|
|
|
|
|
CudaCapability
|
|
|
|
|
|
{ archName :: String
|
|
|
|
|
|
, cudaCapability :: CudaCapability
|
|
|
|
|
|
, isJetson :: Bool
|
|
|
|
|
|
, isArchitectureSpecific :: Bool
|
|
|
|
|
|
, isFamilySpecific :: Bool
|
|
|
|
|
|
, minCudaMajorMinorVersion :: MajorMinorVersion
|
|
|
|
|
|
, maxCudaMajorMinorVersion :: MajorMinorVersion
|
|
|
|
|
|
, dontDefaultAfterCudaMajorMinorVersion :: Null | MajorMinorVersion
|
|
|
|
|
|
}
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
`archName`
|
|
|
|
|
|
|
|
|
|
|
|
: The name of the microarchitecture
|
|
|
|
|
|
|
|
|
|
|
|
`cudaCapability`
|
|
|
|
|
|
|
|
|
|
|
|
: The CUDA capability
|
|
|
|
|
|
|
|
|
|
|
|
`isJetson`
|
|
|
|
|
|
|
|
|
|
|
|
: Whether this capability is part of NVIDIA's line of Jetson embedded computers. This field is notable
|
|
|
|
|
|
because it tells us what architecture to build for (as Jetson devices are aarch64).
|
|
|
|
|
|
More on Jetson devices here: https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/
|
|
|
|
|
|
NOTE: These architectures are only built upon request.
|
|
|
|
|
|
|
|
|
|
|
|
`isArchitectureSpecific`
|
|
|
|
|
|
|
|
|
|
|
|
: Whether this capability is an architecture-specific feature set.
|
|
|
|
|
|
NOTE: These architectures are only built upon request.
|
|
|
|
|
|
|
|
|
|
|
|
`isFamilySpecific`
|
|
|
|
|
|
|
|
|
|
|
|
: Whether this capability is a family-specific feature set.
|
|
|
|
|
|
NOTE: These architectures are only built upon request.
|
|
|
|
|
|
|
|
|
|
|
|
`minCudaMajorMinorVersion`
|
|
|
|
|
|
|
|
|
|
|
|
: The minimum (inclusive) CUDA version that supports this capability.
|
|
|
|
|
|
|
|
|
|
|
|
`maxCudaMajorMinorVersion`
|
|
|
|
|
|
|
|
|
|
|
|
: The maximum (exclusive) CUDA version that supports this capability.
|
|
|
|
|
|
`null` means there is no maximum.
|
|
|
|
|
|
|
|
|
|
|
|
`dontDefaultAfterCudaMajorMinorVersion`
|
|
|
|
|
|
|
|
|
|
|
|
: The CUDA version after which to exclude this capability from the list of default capabilities we build.
|
|
|
|
|
|
*/
|
|
|
|
|
|
cudaCapabilityToInfo =
|
|
|
|
|
|
lib.mapAttrs
|
|
|
|
|
|
(
|
|
|
|
|
|
cudaCapability:
|
|
|
|
|
|
# Supplies default values.
|
|
|
|
|
|
{
|
|
|
|
|
|
archName,
|
|
|
|
|
|
isJetson ? false,
|
|
|
|
|
|
isArchitectureSpecific ? (lib.hasSuffix "a" cudaCapability),
|
|
|
|
|
|
isFamilySpecific ? (lib.hasSuffix "f" cudaCapability),
|
|
|
|
|
|
minCudaMajorMinorVersion,
|
|
|
|
|
|
maxCudaMajorMinorVersion ? null,
|
|
|
|
|
|
dontDefaultAfterCudaMajorMinorVersion ? null,
|
|
|
|
|
|
}:
|
|
|
|
|
|
{
|
|
|
|
|
|
inherit
|
|
|
|
|
|
archName
|
|
|
|
|
|
cudaCapability
|
|
|
|
|
|
isJetson
|
|
|
|
|
|
isArchitectureSpecific
|
|
|
|
|
|
isFamilySpecific
|
|
|
|
|
|
minCudaMajorMinorVersion
|
|
|
|
|
|
maxCudaMajorMinorVersion
|
|
|
|
|
|
dontDefaultAfterCudaMajorMinorVersion
|
|
|
|
|
|
;
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
{
|
|
|
|
|
|
# Tesla K40
|
|
|
|
|
|
"3.5" = {
|
|
|
|
|
|
archName = "Kepler";
|
|
|
|
|
|
minCudaMajorMinorVersion = "10.0";
|
|
|
|
|
|
dontDefaultAfterCudaMajorMinorVersion = "11.0";
|
|
|
|
|
|
maxCudaMajorMinorVersion = "11.8";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# Tesla K80
|
|
|
|
|
|
"3.7" = {
|
|
|
|
|
|
archName = "Kepler";
|
|
|
|
|
|
minCudaMajorMinorVersion = "10.0";
|
|
|
|
|
|
dontDefaultAfterCudaMajorMinorVersion = "11.0";
|
|
|
|
|
|
maxCudaMajorMinorVersion = "11.8";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# Tesla/Quadro M series
|
|
|
|
|
|
"5.0" = {
|
|
|
|
|
|
archName = "Maxwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "10.0";
|
|
|
|
|
|
dontDefaultAfterCudaMajorMinorVersion = "11.0";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# Quadro M6000 , GeForce 900, GTX-970, GTX-980, GTX Titan X
|
|
|
|
|
|
"5.2" = {
|
|
|
|
|
|
archName = "Maxwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "10.0";
|
|
|
|
|
|
dontDefaultAfterCudaMajorMinorVersion = "11.0";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# Quadro GP100, Tesla P100, DGX-1 (Generic Pascal)
|
|
|
|
|
|
"6.0" = {
|
|
|
|
|
|
archName = "Pascal";
|
|
|
|
|
|
minCudaMajorMinorVersion = "10.0";
|
|
|
|
|
|
# Removed from TensorRT 10.0, which corresponds to CUDA 12.4 release.
|
|
|
|
|
|
# https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-1001/support-matrix/index.html
|
|
|
|
|
|
dontDefaultAfterCudaMajorMinorVersion = "12.3";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030 (GP108), GT 1010 (GP108) Titan Xp, Tesla
|
|
|
|
|
|
# P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
|
|
|
|
|
|
"6.1" = {
|
|
|
|
|
|
archName = "Pascal";
|
|
|
|
|
|
minCudaMajorMinorVersion = "10.0";
|
|
|
|
|
|
# Removed from TensorRT 10.0, which corresponds to CUDA 12.4 release.
|
|
|
|
|
|
# https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-1001/support-matrix/index.html
|
|
|
|
|
|
dontDefaultAfterCudaMajorMinorVersion = "12.3";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# DGX-1 with Volta, Tesla V100, GTX 1180 (GV104), Titan V, Quadro GV100
|
|
|
|
|
|
"7.0" = {
|
|
|
|
|
|
archName = "Volta";
|
|
|
|
|
|
minCudaMajorMinorVersion = "10.0";
|
|
|
|
|
|
# Removed from TensorRT 10.5, which corresponds to CUDA 12.6 release.
|
|
|
|
|
|
# https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-1050/support-matrix/index.html
|
|
|
|
|
|
dontDefaultAfterCudaMajorMinorVersion = "12.5";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# Jetson AGX Xavier, Drive AGX Pegasus, Xavier NX
|
|
|
|
|
|
"7.2" = {
|
|
|
|
|
|
archName = "Volta";
|
|
|
|
|
|
minCudaMajorMinorVersion = "10.0";
|
|
|
|
|
|
# Note: without `cuda_compat`, maxCudaMajorMinorVersion is 11.8
|
|
|
|
|
|
# https://docs.nvidia.com/cuda/cuda-for-tegra-appnote/index.html#deployment-considerations-for-cuda-upgrade-package
|
|
|
|
|
|
maxCudaMajorMinorVersion = "12.2";
|
|
|
|
|
|
isJetson = true;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# GTX/RTX Turing – GTX 1660 Ti, RTX 2060, RTX 2070, RTX 2080, Titan RTX, Quadro RTX 4000,
|
|
|
|
|
|
# Quadro RTX 5000, Quadro RTX 6000, Quadro RTX 8000, Quadro T1000/T2000, Tesla T4
|
|
|
|
|
|
"7.5" = {
|
|
|
|
|
|
archName = "Turing";
|
|
|
|
|
|
minCudaMajorMinorVersion = "10.0";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# NVIDIA A100 (the name “Tesla” has been dropped – GA100), NVIDIA DGX-A100
|
|
|
|
|
|
"8.0" = {
|
|
|
|
|
|
archName = "Ampere";
|
|
|
|
|
|
minCudaMajorMinorVersion = "11.2";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# Tesla GA10x cards, RTX Ampere – RTX 3080, GA102 – RTX 3090, RTX A2000, A3000, RTX A4000,
|
|
|
|
|
|
# A5000, A6000, NVIDIA A40, GA106 – RTX 3060, GA104 – RTX 3070, GA107 – RTX 3050, RTX A10, RTX
|
|
|
|
|
|
# A16, RTX A40, A2 Tensor Core GPU
|
|
|
|
|
|
"8.6" = {
|
|
|
|
|
|
archName = "Ampere";
|
|
|
|
|
|
minCudaMajorMinorVersion = "11.2";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# Jetson AGX Orin and Drive AGX Orin only
|
|
|
|
|
|
"8.7" = {
|
|
|
|
|
|
archName = "Ampere";
|
|
|
|
|
|
minCudaMajorMinorVersion = "11.5";
|
|
|
|
|
|
isJetson = true;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# NVIDIA GeForce RTX 4090, RTX 4080, RTX 6000, Tesla L40
|
|
|
|
|
|
"8.9" = {
|
|
|
|
|
|
archName = "Ada";
|
|
|
|
|
|
minCudaMajorMinorVersion = "11.8";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# NVIDIA H100 (GH100)
|
|
|
|
|
|
"9.0" = {
|
|
|
|
|
|
archName = "Hopper";
|
|
|
|
|
|
minCudaMajorMinorVersion = "11.8";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"9.0a" = {
|
|
|
|
|
|
archName = "Hopper";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.0";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# NVIDIA B100
|
|
|
|
|
|
"10.0" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.7";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"10.0a" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.7";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"10.0f" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.9";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# NVIDIA Jetson Thor Blackwell
|
|
|
|
|
|
"10.1" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.7";
|
|
|
|
|
|
isJetson = true;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"10.1a" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.7";
|
|
|
|
|
|
isJetson = true;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"10.1f" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.9";
|
|
|
|
|
|
isJetson = true;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# NVIDIA ???
|
|
|
|
|
|
"10.3" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.9";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"10.3a" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.9";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"10.3f" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.9";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# NVIDIA GeForce RTX 5090 (GB202) etc.
|
|
|
|
|
|
"12.0" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.8";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"12.0a" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.8";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"12.0f" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.9";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
# NVIDIA ???
|
|
|
|
|
|
"12.1" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.9";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"12.1a" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.9";
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
"12.1f" = {
|
|
|
|
|
|
archName = "Blackwell";
|
|
|
|
|
|
minCudaMajorMinorVersion = "12.9";
|
|
|
|
|
|
};
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|