233 lines
7.3 KiB
Nix
233 lines
7.3 KiB
Nix
{
|
|
lib,
|
|
stdenv,
|
|
fetchpatch,
|
|
fetchFromGitHub,
|
|
cmake,
|
|
rocm-cmake,
|
|
clr,
|
|
gfortran,
|
|
gtest,
|
|
msgpack,
|
|
libxml2,
|
|
python3,
|
|
python3Packages,
|
|
openmp,
|
|
hipblas-common,
|
|
tensile,
|
|
lapack-reference,
|
|
ncurses,
|
|
libffi,
|
|
zlib,
|
|
zstd,
|
|
rocmUpdateScript,
|
|
buildTests ? false,
|
|
buildBenchmarks ? false,
|
|
buildSamples ? false,
|
|
# hipblaslt supports only devices with MFMA or WMMA
|
|
# WMMA on gfx1100 may be broken
|
|
# MFMA on MI100 may be broken
|
|
# MI200/MI300 known to work
|
|
gpuTargets ? (
|
|
clr.localGpuTargets or [
|
|
# "gfx908" FIXME: confirm MFMA on MI100 works
|
|
"gfx90a"
|
|
"gfx942"
|
|
# "gfx1100" FIXME: confirm WMMA targets work
|
|
]
|
|
),
|
|
}:
|
|
|
|
stdenv.mkDerivation (
|
|
finalAttrs:
|
|
let
|
|
supportsTargetArches =
|
|
(builtins.any (lib.strings.hasPrefix "gfx9") gpuTargets)
|
|
|| (builtins.any (lib.strings.hasPrefix "gfx11") gpuTargets);
|
|
tensile' = (tensile.override { isTensileLite = true; }).overrideAttrs {
|
|
inherit (finalAttrs) src;
|
|
sourceRoot = "${finalAttrs.src.name}/tensilelite";
|
|
};
|
|
py = python3.withPackages (ps: [
|
|
ps.pyyaml
|
|
ps.setuptools
|
|
ps.packaging
|
|
]);
|
|
gpuTargets' = lib.optionalString supportsTargetArches (lib.concatStringsSep ";" gpuTargets);
|
|
compiler = "amdclang++";
|
|
cFlags = "-O3 -I${msgpack}/include"; # FIXME: cmake files need patched to include this properly
|
|
in
|
|
{
|
|
pname = "hipblaslt${clr.gpuArchSuffix}";
|
|
version = "6.3.3";
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "ROCm";
|
|
repo = "hipBLASLt";
|
|
rev = "rocm-${finalAttrs.version}";
|
|
hash = "sha256-ozfHwsxcczzYXN9SIkyfRvdtaCqlDN4bh3UHZNS2oVQ=";
|
|
};
|
|
env.CXX = compiler;
|
|
env.CFLAGS = cFlags;
|
|
env.CXXFLAGS = cFlags;
|
|
env.ROCM_PATH = "${clr}";
|
|
env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++";
|
|
env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++";
|
|
# Some tensile scripts look for this as an env var rather than a cmake flag
|
|
env.CMAKE_CXX_COMPILER = lib.getExe' clr "amdclang++";
|
|
requiredSystemFeatures = [ "big-parallel" ];
|
|
|
|
outputs = [
|
|
"out"
|
|
]
|
|
++ lib.optionals buildTests [
|
|
"test"
|
|
]
|
|
++ lib.optionals buildBenchmarks [
|
|
"benchmark"
|
|
]
|
|
++ lib.optionals buildSamples [
|
|
"sample"
|
|
];
|
|
|
|
postPatch = ''
|
|
mkdir -p build/Tensile/library
|
|
# git isn't needed and we have no .git
|
|
substituteInPlace cmake/Dependencies.cmake \
|
|
--replace-fail "find_package(Git REQUIRED)" ""
|
|
substituteInPlace CMakeLists.txt \
|
|
--replace-fail "include(virtualenv)" "" \
|
|
--replace-fail "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" \
|
|
--replace-fail "virtualenv_install(\''${CMAKE_SOURCE_DIR}/tensilelite)" "" \
|
|
--replace-fail 'find_package(Tensile 4.33.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "''${INSTALLED_TENSILE_PATH}")' "find_package(Tensile)" \
|
|
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
|
|
# FIXME: TensileCreateExtOpLibraries build failure due to unsupported null operand
|
|
# Working around for now by disabling the ExtOp libs
|
|
substituteInPlace library/src/amd_detail/rocblaslt/src/CMakeLists.txt \
|
|
--replace-fail 'TensileCreateExtOpLibraries("' '# skipping TensileCreateExtOpLibraries'
|
|
substituteInPlace library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh \
|
|
--replace-fail '${"\${rocm_path}"}/bin/' ""
|
|
'';
|
|
|
|
# Apply patches to allow building without a target arch if we need to do that
|
|
patches = lib.optionals (!supportsTargetArches) [
|
|
# Add ability to build without specitying any arch.
|
|
(fetchpatch {
|
|
sha256 = "sha256-VW3bPzmQvfo8+iKsVfpn4sbqAe41fLzCEUfBh9JxVyk=";
|
|
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.1.1-no-arch.patch";
|
|
})
|
|
# Followup to above patch for 6.3.x
|
|
(fetchpatch {
|
|
sha256 = "sha256-GCsrne6BiWzwj8TMAfFuaYz1Pij97hoCc6E3qJhWb10=";
|
|
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch";
|
|
})
|
|
];
|
|
|
|
doCheck = false;
|
|
doInstallCheck = false;
|
|
|
|
nativeBuildInputs = [
|
|
cmake
|
|
rocm-cmake
|
|
py
|
|
clr
|
|
gfortran
|
|
# need make to get streaming console output so nix knows build is still running
|
|
# so deliberately not using ninja
|
|
];
|
|
|
|
buildInputs = [
|
|
hipblas-common
|
|
tensile'
|
|
openmp
|
|
libffi
|
|
ncurses
|
|
|
|
# Tensile deps - not optional, building without tensile isn't actually supported
|
|
msgpack # FIXME: not included in cmake!
|
|
libxml2
|
|
python3Packages.msgpack
|
|
python3Packages.joblib
|
|
zlib
|
|
zstd
|
|
]
|
|
++ lib.optionals buildTests [
|
|
gtest
|
|
]
|
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
|
lapack-reference
|
|
];
|
|
|
|
cmakeFlags = [
|
|
"-Wno-dev"
|
|
"-DCMAKE_BUILD_TYPE=Release"
|
|
"-DCMAKE_VERBOSE_MAKEFILE=ON"
|
|
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
|
|
"-DTENSILE_USE_HIP=ON"
|
|
"-DTENSILE_BUILD_CLIENT=OFF"
|
|
"-DTENSILE_USE_FLOAT16_BUILTIN=ON"
|
|
"-DCMAKE_CXX_COMPILER=${compiler}"
|
|
# Manually define CMAKE_INSTALL_<DIR>
|
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
|
"-DHIPBLASLT_ENABLE_MARKER=Off"
|
|
# FIXME what are the implications of hardcoding this?
|
|
"-DTensile_CODE_OBJECT_VERSION=V5"
|
|
"-DTensile_COMPILER=${compiler}"
|
|
"-DAMDGPU_TARGETS=${gpuTargets'}"
|
|
"-DGPU_TARGETS=${gpuTargets'}"
|
|
"-DTensile_LIBRARY_FORMAT=msgpack"
|
|
]
|
|
++ lib.optionals (!supportsTargetArches) [
|
|
"-DBUILD_WITH_TENSILE=OFF"
|
|
]
|
|
++ lib.optionals buildTests [
|
|
"-DBUILD_CLIENTS_TESTS=ON"
|
|
]
|
|
++ lib.optionals buildBenchmarks [
|
|
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
|
]
|
|
++ lib.optionals buildSamples [
|
|
"-DBUILD_CLIENTS_SAMPLES=ON"
|
|
];
|
|
|
|
postInstall =
|
|
lib.optionalString buildTests ''
|
|
mkdir -p $test/bin
|
|
mv $out/bin/hipblas-test $test/bin
|
|
''
|
|
+ lib.optionalString buildBenchmarks ''
|
|
mkdir -p $benchmark/bin
|
|
mv $out/bin/hipblas-bench $benchmark/bin
|
|
''
|
|
+ lib.optionalString buildSamples ''
|
|
mkdir -p $sample/bin
|
|
mv $out/bin/example-* $sample/bin
|
|
''
|
|
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
|
|
rmdir $out/bin
|
|
'';
|
|
# If this is false there are no kernels in the output lib
|
|
# and it's useless at runtime
|
|
# so if it's an optional dep it's best to not depend on it
|
|
# Some packages like torch need hipblaslt to compile
|
|
# and are fine ignoring it at runtime if it's not supported
|
|
# so we have to support building an empty hipblaslt
|
|
passthru.supportsTargetArches = supportsTargetArches;
|
|
passthru.updateScript = rocmUpdateScript {
|
|
name = finalAttrs.pname;
|
|
inherit (finalAttrs.src) owner repo;
|
|
};
|
|
passthru.tensilelite = tensile';
|
|
meta = with lib; {
|
|
description = "Library that provides general matrix-matrix operations with a flexible API";
|
|
homepage = "https://github.com/ROCm/hipBLASlt";
|
|
license = with licenses; [ mit ];
|
|
teams = [ teams.rocm ];
|
|
platforms = platforms.linux;
|
|
};
|
|
}
|
|
)
|