nixpkgs/pkgs/build-support/setup-hooks/separate-debug-info.sh
Guillaume Girol a924c0eb95 separateDebugInfo: add symlinks to executable and source for debuginfod support
A debuginfod support must be able to map a build-id to
- debug symbols
- the original elf file for which the debug symbols where separated
- the corresponding source files

Currently, hydra provides an index from build-id to the nar of the debug
output containing the debug symbols.

Add symlinks in these outputs so that we can recover the store path of
the source and original elf file. We can then fetch them by the normal
binary cache protocol.

About source files: to minimize storage demands, in the ideal case,
software would be built from the source store path $src and the
debuginfod server would just have to serve source files from this store
path. In practice, source files are sometimes patched as part of the
build. This commit stores the modified files in the debug output is a so
called source overlay so that the debuginfod serve can serve the patched
content of the file.

The checksum was chosen as follows (where big is 4GB of zeros):

$  hyperfine -L s sysv,bsd,crc,sha1,sha224,sha256,sha384,sha512,blake2b,sm3 'cksum -a {s} big'
Benchmark 1: cksum -a sysv big
  Time (mean ± σ):     854.5 ms ± 270.5 ms    [User: 245.3 ms, System: 601.8 ms]
  Range (min … max):   760.5 ms … 1623.8 ms    10 runs

  Warning: The first benchmarking run for this command was significantly slower than the rest (1.624 s). This could be caused by (filesystem) caches that were not filled until after the first run. You should consider using the '--warmup' option to fill those caches before the actual benchmark. Alternatively, use the '--prepare' option to clear the caches before each timing run.

Benchmark 2: cksum -a bsd big
  Time (mean ± σ):      5.838 s ±  0.045 s    [User: 5.118 s, System: 0.693 s]
  Range (min … max):    5.767 s …  5.897 s    10 runs

Benchmark 3: cksum -a crc big
  Time (mean ± σ):     829.9 ms ±  28.6 ms    [User: 274.5 ms, System: 551.0 ms]
  Range (min … max):   803.2 ms … 904.8 ms    10 runs

Benchmark 4: cksum -a sha1 big
  Time (mean ± σ):      2.553 s ±  0.010 s    [User: 1.912 s, System: 0.631 s]
  Range (min … max):    2.543 s …  2.575 s    10 runs

Benchmark 5: cksum -a sha224 big
  Time (mean ± σ):      2.716 s ±  0.018 s    [User: 2.054 s, System: 0.645 s]
  Range (min … max):    2.695 s …  2.743 s    10 runs

Benchmark 6: cksum -a sha256 big
  Time (mean ± σ):      2.751 s ±  0.029 s    [User: 2.057 s, System: 0.674 s]
  Range (min … max):    2.712 s …  2.812 s    10 runs

Benchmark 7: cksum -a sha384 big
  Time (mean ± σ):      5.600 s ±  0.049 s    [User: 4.820 s, System: 0.753 s]
  Range (min … max):    5.515 s …  5.683 s    10 runs

Benchmark 8: cksum -a sha512 big
  Time (mean ± σ):      5.543 s ±  0.021 s    [User: 4.751 s, System: 0.768 s]
  Range (min … max):    5.523 s …  5.579 s    10 runs

Benchmark 9: cksum -a blake2b big
  Time (mean ± σ):      5.091 s ±  0.025 s    [User: 4.306 s, System: 0.764 s]
  Range (min … max):    5.048 s …  5.125 s    10 runs

Benchmark 10: cksum -a sm3 big
  Time (mean ± σ):     14.220 s ±  0.120 s    [User: 13.376 s, System: 0.783 s]
  Range (min … max):   14.077 s … 14.497 s    10 runs

Summary
  cksum -a crc big ran
    1.03 ± 0.33 times faster than cksum -a sysv big
    3.08 ± 0.11 times faster than cksum -a sha1 big
    3.27 ± 0.11 times faster than cksum -a sha224 big
    3.31 ± 0.12 times faster than cksum -a sha256 big
    6.13 ± 0.21 times faster than cksum -a blake2b big
    6.68 ± 0.23 times faster than cksum -a sha512 big
    6.75 ± 0.24 times faster than cksum -a sha384 big
    7.03 ± 0.25 times faster than cksum -a bsd big
   17.13 ± 0.61 times faster than cksum -a sm3 big

unfortunately, crc (and sysv) are not supported by --check, so they are
disqualified. sha1 sha224 and sha256 are sensibly as fast as one
another, so let's use a non broken one, even though cryptographic
qualities are not needed here.
2025-06-14 22:14:55 +02:00

121 lines
5.0 KiB
Bash

export NIX_SET_BUILD_ID=1
export NIX_LDFLAGS+=" --compress-debug-sections=zlib"
export NIX_CFLAGS_COMPILE+=" -ggdb -Wa,--compress-debug-sections"
export NIX_RUSTFLAGS+=" -g -C strip=none"
cksumAlgo=sha256
fixupOutputHooks+=(_separateDebugInfo)
postUnpackHooks+=(_recordPristineSourceHashes)
_recordPristineSourceHashes() {
# shellcheck disable=2154
[ -e "$sourceRoot" ] || return 0
local checksumFileName=__nix_source_checksums
echo "separate-debug-info: recording checksum of source files for debug support..."
find "$sourceRoot" -type f -exec cksum -a "$cksumAlgo" '{}' \+ > "$checksumFileName"
recordedSourceChecksumsFileName="$(readlink -f "$checksumFileName")"
}
_separateDebugInfo() {
# shellcheck disable=2154
[ -e "$prefix" ] || return 0
local debugOutput="${debug:-$out}"
if [ "$prefix" = "$debugOutput" ]; then return 0; fi
# in case there is nothing to strip, don't fail the build
mkdir -p "$debugOutput"
local dst="$debugOutput/lib/debug/.build-id"
local source
local sourceOverlay
# shellcheck disable=2154
if [ -e "$src" ]; then
source="$src"
if [ -n "${recordedSourceChecksumsFileName:-}" ]; then
sourceOverlay="$debugOutput/src/overlay"
else
sourceOverlay=""
fi
else
source=""
sourceOverlay=""
fi
# Find executables and dynamic libraries.
local i
while IFS= read -r -d $'\0' i; do
if ! isELF "$i"; then continue; fi
[ -z "${READELF:-}" ] && echo "_separateDebugInfo: '\$READELF' variable is empty, skipping." 1>&2 && break
[ -z "${OBJCOPY:-}" ] && echo "_separateDebugInfo: '\$OBJCOPY' variable is empty, skipping." 1>&2 && break
# Extract the Build ID. FIXME: there's probably a cleaner way.
local id
id="$($READELF -n "$i" | sed 's/.*Build ID: \([0-9a-f]*\).*/\1/; t; d')"
if [ "${#id}" != 40 ]; then
echo "could not find build ID of $i, skipping" >&2
continue
fi
# Extract the debug info.
echo "separating debug info from $i (build ID $id)"
local debuginfoDir="$dst/${id:0:2}"
local buildIdPrefix="$debuginfoDir/${id:2}"
local debuginfoFile="$buildIdPrefix.debug"
local executableSymlink="$buildIdPrefix.executable"
local sourceSymlink="$buildIdPrefix.source"
local sourceOverlaySymlink="$buildIdPrefix.sourceoverlay"
mkdir -p "$debuginfoDir"
if [ -f "$debuginfoFile" ]; then
echo "separate-debug-info: warning: multiple files with build id $id found, overwriting"
fi
# This may fail, e.g. if the binary is for a different
# architecture than we're building for. (This happens with
# firmware blobs in QEMU.)
if $OBJCOPY --only-keep-debug "$i" "$debuginfoFile"; then
# If we succeeded, also a create a symlink <original-name>.debug.
ln -sfn "$debuginfoFile" "$dst/../$(basename "$i")"
# also create a symlink mapping the build-id to the original elf file and the source
# debuginfod protocol relies on it
ln -sfn "$i" "$executableSymlink"
if [ -n "$source" ]; then
ln -sfn "$source" "$sourceSymlink"
fi
if [ -n "$sourceOverlay" ]; then
# create it lazily
if [ ! -d "$sourceOverlay" ]; then
echo "separate-debug-info: copying patched source files to $sourceOverlay..."
mkdir -p "$sourceOverlay"
pushd "$(dirname "$recordedSourceChecksumsFileName")" || { echo "separate-debug-info: failed to cd parent directory of $recordedSourceChecksumsFileName"; return 1; }
while IFS= read -r -d $'\0' modifiedSourceFile; do
if [ -z "$modifiedSourceFile" ]; then
continue
fi
# this can happen with files with '\n' in their name
if [ ! -f "$modifiedSourceFile" ]; then
echo "separate-debug-info: cannot save modified source file $modifiedSourceFile: does not exist. ignoring"
continue
fi
mkdir -p "$sourceOverlay/$(dirname "$modifiedSourceFile")"
cp -v "$modifiedSourceFile" "$sourceOverlay/$modifiedSourceFile"
done < <(LANG=C cksum -a "$cksumAlgo" --check --ignore-missing --quiet "$recordedSourceChecksumsFileName" 2>&1 | sed -n -e 's/: FAILED$/\x00/p' | sed -z -e 's/^\n//')
popd || { echo "separate-debug-info: failed to popd" ; return 1; }
fi
ln -sfn "$sourceOverlay" "$sourceOverlaySymlink"
fi
else
# If we failed, try to clean up unnecessary directories
rmdir -p "$dst/${id:0:2}" --ignore-fail-on-non-empty
fi
done < <(find "$prefix" -type f -print0 | sort -z)
}