ci/github-script/commits: init from ci/check-cherry-picks (#425449)

2025-07-16 10:25:30 +00:00 · 2025-07-16 10:25:30 +00:00 · 182184a00f
commit 182184a00f
parent f200890f87 b46cb23251
11 changed files with 256 additions and 211 deletions
--- a/.github/workflows/check.yml
+++ b/.github/workflows/check.yml
@ -45,42 +45,25 @@ jobs:
          filter: tree:0
          path: trusted

-      - name: Check cherry-picks
-        id: check
-        continue-on-error: true
-        env:
-          BASE_SHA: ${{ github.event.pull_request.base.sha }}
-          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
-        run: |
-          ./trusted/ci/check-cherry-picks.sh "$BASE_SHA" "$HEAD_SHA" checked-cherry-picks.md
+      - name: Install dependencies
+        run: npm install bottleneck

      - name: Log current API rate limits
        env:
          GH_TOKEN: ${{ github.token }}
        run: gh api /rate_limit | jq

-      - name: Prepare review
-        if: steps.check.outcome == 'failure'
+      - name: Check cherry-picks
+        id: check
+        continue-on-error: true
        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
        with:
          script: |
-            const { readFile, writeFile } = require('node:fs/promises')
-
-            const job_url = (await github.rest.actions.listJobsForWorkflowRun({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              run_id: context.runId
-            })).data.jobs[0].html_url + '?pr=' + context.payload.pull_request.number
-
-            const header = await readFile('trusted/ci/check-cherry-picks.md')
-            const body = await readFile('checked-cherry-picks.md')
-            const footer =
-              `\n_Hint: The full diffs are also available in the [runner logs](${job_url}) with slightly better highlighting._`
-
-            const review = header + body + footer
-            await writeFile('review.md', review)
-            core.summary.addRaw(review)
-            core.summary.write()
+            require('./trusted/ci/github-script/commits.js')({
+              github,
+              context,
+              core,
+            })

      - name: Request changes
        if: ${{ github.event_name == 'pull_request_target' && steps.check.outcome == 'failure' }}
--- a/ci/check-cherry-picks.sh
+++ b/ci/check-cherry-picks.sh
@ -1,152 +0,0 @@
-#!/usr/bin/env bash
-# Find alleged cherry-picks
-
-set -euo pipefail
-
-if [[ $# != "2" && $# != "3" ]] ; then
-  echo "usage: check-cherry-picks.sh base_rev head_rev [markdown_file]"
-  exit 2
-fi
-
-markdown_file="$(realpath ${3:-/dev/null})"
-[ -v 3 ] && rm -f "$markdown_file"
-
-# Make sure we are inside the nixpkgs repo, even when called from outside
-cd "$(dirname "${BASH_SOURCE[0]}")"
-
-PICKABLE_BRANCHES="master staging release-??.?? staging-??.?? haskell-updates python-updates staging-next staging-next-??.??"
-problem=0
-
-# Not everyone calls their remote "origin"
-remote="$(git remote -v | grep -i 'NixOS/nixpkgs' | head -n1 | cut -f1 || true)"
-
-commits="$(git rev-list --reverse "$1..$2")"
-
-log() {
-  type="$1"
-  shift 1
-
-  local -A prefix
-  prefix[success]="  ✔ "
-  if [ -v GITHUB_ACTIONS ]; then
-    prefix[warning]="::warning::"
-    prefix[error]="::error::"
-  else
-    prefix[warning]="  ⚠ "
-    prefix[error]="  ✘ "
-  fi
-
-  echo "${prefix[$type]}$@"
-
-  # Only logging errors and warnings, which allows comparing the markdown file
-  # between pushes to the PR. Even if a new, proper cherry-pick, commit is added
-  # it won't change the markdown file's content and thus not trigger another comment.
-  if [ "$type" != "success" ]; then
-    local -A alert
-    alert[warning]="WARNING"
-    alert[error]="CAUTION"
-    echo >> $markdown_file
-    echo "> [!${alert[$type]}]" >> $markdown_file
-    echo "> $@" >> $markdown_file
-  fi
-}
-
-endgroup() {
-  if [ -v GITHUB_ACTIONS ] ; then
-    echo ::endgroup::
-  fi
-}
-
-while read -r new_commit_sha ; do
-  if [ -v GITHUB_ACTIONS ] ; then
-    echo "::group::Commit $new_commit_sha"
-  else
-    echo "================================================="
-  fi
-  git rev-list --max-count=1 --format=medium "$new_commit_sha"
-  echo "-------------------------------------------------"
-
-  # Using the last line with "cherry" + hash, because a chained backport
-  # can result in multiple of those lines. Only the last one counts.
-  original_commit_sha=$(
-    git rev-list --max-count=1 --format=format:%B "$new_commit_sha" \
-    | grep -Ei "cherry.*[0-9a-f]{40}" | tail -n1 \
-    | grep -Eoi -m1 '[0-9a-f]{40}' || true
-  )
-  if [ -z "$original_commit_sha" ] ; then
-    endgroup
-    log warning "Couldn't locate original commit hash in message of $new_commit_sha."
-    problem=1
-    continue
-  fi
-
-  set -f # prevent pathname expansion of patterns
-  for pattern in $PICKABLE_BRANCHES ; do
-    set +f # re-enable pathname expansion
-
-    # Reverse sorting by refname and taking one match only means we can only backport
-    # from unstable and the latest stable. That makes sense, because even right after
-    # branch-off, when we have two supported stable branches, we only ever want to cherry-pick
-    # **to** the older one, but never **from** it.
-    # This makes the job significantly faster in the case when commits can't be found,
-    # because it doesn't need to iterate through 20+ branches, which all need to be fetched.
-    branches="$(git for-each-ref --sort=-refname --format="%(refname)" \
-      "refs/remotes/${remote:-origin}/$pattern" | head -n1)"
-
-    while read -r picked_branch ; do
-      if git merge-base --is-ancestor "$original_commit_sha" "$picked_branch" ; then
-        range_diff_common='git --no-pager range-diff
-          --no-notes
-          --creation-factor=100
-          '"$original_commit_sha~..$original_commit_sha"'
-          '"$new_commit_sha~..$new_commit_sha"'
-        '
-
-        if $range_diff_common --no-color 2> /dev/null | grep -E '^ {4}[+-]{2}' > /dev/null ; then
-          log success "$original_commit_sha present in branch $picked_branch"
-          endgroup
-          log warning "Difference between $new_commit_sha and original $original_commit_sha may warrant inspection."
-
-          # First line contains commit SHAs, which we already printed.
-          $range_diff_common --color | tail -n +2
-
-          echo -e "> <details><summary>Show diff</summary>\n>" >> $markdown_file
-          echo '> ```diff' >> $markdown_file
-          # The output of `git range-diff` is indented with 4 spaces, which we need to match with the
-          # code blocks indent to get proper syntax highlighting on GitHub.
-          diff="$($range_diff_common | tail -n +2 | sed -Ee 's/^ {4}/> /g')"
-          # Also limit the output to 10k bytes (and remove the last, potentially incomplete line), because
-          # GitHub comments are limited in length. The value of 10k is arbitrary with the assumption, that
-          # after the range-diff becomes a certain size, a reviewer is better off reviewing the regular diff
-          # in GitHub's UI anyway, thus treating the commit as "new" and not cherry-picked.
-          # Note: This could still lead to a too lengthy comment with multiple commits touching the limit. We
-          # consider this too unlikely to happen, to deal with explicitly.
-          max_length=10000
-          if [ "${#diff}" -gt $max_length ]; then
-            printf -v diff "%s\n>\n> [...truncated...]" "$(echo "$diff" | head -c $max_length | head -n-1)"
-          fi
-          echo "$diff" >> $markdown_file
-          echo '> ```' >> $markdown_file
-          echo "> </details>" >> $markdown_file
-
-          problem=1
-        else
-          log success "$original_commit_sha present in branch $picked_branch"
-          log success "$original_commit_sha highly similar to $new_commit_sha"
-          $range_diff_common --color
-          endgroup
-        fi
-
-        # move on to next commit
-        continue 3
-      fi
-    done <<< "$branches"
-  done
-
-  endgroup
-  log error "$original_commit_sha given in $new_commit_sha not found in any pickable branch."
-
-  problem=1
-done <<< "$commits"
-
-exit $problem
--- a/ci/github-script/.gitignore
+++ b/ci/github-script/.gitignore
@ -1 +1,2 @@
 node_modules
+step-summary.md
--- a/ci/github-script/.npmrc
+++ b/ci/github-script/.npmrc
@ -1 +1,2 @@
 package-lock-only = true
+save-exact = true
--- a/ci/github-script/README.md
+++ b/ci/github-script/README.md
@ -8,6 +8,10 @@ To run any of the scripts locally:
 - Enter `nix-shell` in `./ci/github-script`.
 - Ensure `gh` is authenticated.

+## Check commits
+
+Run `./run commits OWNER REPO PR`, where OWNER is your username or "NixOS", REPO is the name of your fork or "nixpkgs" and PR is the number of the pull request to check.
+
 ## Labeler

 Run `./run labels OWNER REPO`, where OWNER is your username or "NixOS" and REPO the name of your fork or "nixpkgs".
--- a/ci/github-script/check-cherry-picks.md
+++ b/ci/github-script/check-cherry-picks.md
--- a/ci/github-script/commits.js
+++ b/ci/github-script/commits.js
@ -0,0 +1,199 @@
+module.exports = async function ({ github, context, core }) {
+  const { execFileSync } = require('node:child_process')
+  const { readFile, writeFile } = require('node:fs/promises')
+  const { join } = require('node:path')
+  const { classify } = require('../supportedBranches.js')
+  const withRateLimit = require('./withRateLimit.js')
+
+  await withRateLimit({ github, core }, async (stats) => {
+    stats.prs = 1
+
+    const job_url =
+      context.runId &&
+      (
+        await github.rest.actions.listJobsForWorkflowRun({
+          ...context.repo,
+          run_id: context.runId,
+        })
+      ).data.jobs[0].html_url +
+        '?pr=' +
+        context.payload.pull_request.number
+
+    async function handle({ sha, commit }) {
+      // Using the last line with "cherry" + hash, because a chained backport
+      // can result in multiple of those lines. Only the last one counts.
+      const match = Array.from(
+        commit.message.matchAll(/cherry.*([0-9a-f]{40})/g),
+      ).at(-1)
+
+      if (!match)
+        return {
+          sha,
+          commit,
+          severity: 'warning',
+          message: `Couldn't locate original commit hash in message of ${sha}.`,
+        }
+
+      const original_sha = match[1]
+
+      let branches
+      try {
+        branches = (
+          await github.request({
+            // This is an undocumented endpoint to fetch the branches a commit is part of.
+            // There is no equivalent in neither the REST nor the GraphQL API.
+            // The endpoint itself is unlikely to go away, because GitHub uses it to display
+            // the list of branches on the detail page of a commit.
+            url: `https://github.com/${context.repo.owner}/${context.repo.repo}/branch_commits/${original_sha}`,
+            headers: {
+              accept: 'application/json',
+            },
+          })
+        ).data.branches
+          .map(({ branch }) => branch)
+          .filter((branch) => classify(branch).type.includes('development'))
+      } catch (e) {
+        // For some unknown reason a 404 error comes back as 500 without any more details in a GitHub Actions runner.
+        // Ignore these to return a regular error message below.
+        if (![404, 500].includes(e.status)) throw e
+      }
+      if (!branches?.length)
+        return {
+          sha,
+          commit,
+          severity: 'error',
+          message: `${original_sha} given in ${sha} not found in any pickable branch.`,
+        }
+
+      const diff = execFileSync('git', [
+        '-C',
+        __dirname,
+        'range-diff',
+        '--no-color',
+        '--no-notes',
+        '--creation-factor=100',
+        `${original_sha}~..${original_sha}`,
+        `${sha}~..${sha}`,
+      ])
+        .toString()
+        .split('\n')
+        // First line contains commit SHAs, which we'll print separately.
+        .slice(1)
+        // # The output of `git range-diff` is indented with 4 spaces, but we'll control indentation manually.
+        .map((line) => line.replace(/^ {4}/, ''))
+
+      if (!diff.some((line) => line.match(/^[+-]{2}/)))
+        return {
+          sha,
+          commit,
+          severity: 'info',
+          message: `✔ ${original_sha} is highly similar to ${sha}.`,
+        }
+
+      const colored_diff = execFileSync('git', [
+        '-C',
+        __dirname,
+        'range-diff',
+        '--color',
+        '--no-notes',
+        '--creation-factor=100',
+        `${original_sha}~..${original_sha}`,
+        `${sha}~..${sha}`,
+      ]).toString()
+
+      return {
+        sha,
+        commit,
+        diff,
+        colored_diff,
+        severity: 'warning',
+        message: `Difference between ${sha} and original ${original_sha} may warrant inspection.`,
+      }
+    }
+
+    const commits = await github.paginate(github.rest.pulls.listCommits, {
+      ...context.repo,
+      pull_number: context.payload.pull_request.number,
+    })
+
+    const results = await Promise.all(commits.map(handle))
+
+    // Log all results without truncation and with better highlighting to the job log.
+    results.forEach(({ sha, commit, severity, message, colored_diff }) => {
+      core.startGroup(`Commit ${sha}`)
+      core.info(`Author: ${commit.author.name} ${commit.author.email}`)
+      core.info(`Date: ${new Date(commit.author.date)}`)
+      core[severity](message)
+      core.endGroup()
+      if (colored_diff) core.info(colored_diff)
+    })
+
+    // Only create step summary below in case of warnings or errors.
+    if (results.every(({ severity }) => severity == 'info')) return
+    else process.exitCode = 1
+
+    core.summary.addRaw(
+      await readFile(join(__dirname, 'check-cherry-picks.md'), 'utf-8'),
+      true,
+    )
+    results.forEach(({ severity, message, diff }) => {
+      if (severity == 'info') return
+
+      // The docs for markdown alerts only show examples with markdown blockquote syntax, like this:
+      //   > [!WARNING]
+      //   > message
+      // However, our testing shows that this also works with a `<blockquote>` html tag, as long as there
+      // is an empty line:
+      //   <blockquote>
+      //
+      //   [!WARNING]
+      //   message
+      //   </blockquote>
+      // Whether this is intended or just an implementation detail is unclear.
+      core.summary.addRaw('<blockquote>')
+      core.summary.addRaw(
+        `\n\n[!${severity == 'warning' ? 'WARNING' : 'CAUTION'}]`,
+        true,
+      )
+      core.summary.addRaw(`${message}`, true)
+
+      if (diff) {
+        // Limit the output to 10k bytes and remove the last, potentially incomplete line, because GitHub
+        // comments are limited in length. The value of 10k is arbitrary with the assumption, that after
+        // the range-diff becomes a certain size, a reviewer is better off reviewing the regular diff in
+        // GitHub's UI anyway, thus treating the commit as "new" and not cherry-picked.
+        // Note: if multiple commits are close to the limit, this approach could still lead to a comment
+        // that's too long. We think this is unlikely to happen, and so don't deal with it explicitly.
+        const truncated = []
+        let total_length = 0
+        for (line of diff) {
+          total_length += line.length
+          if (total_length > 10000) {
+            truncated.push('', '[...truncated...]')
+            break
+          } else {
+            truncated.push(line)
+          }
+        }
+
+        core.summary.addRaw('<details><summary>Show diff</summary>')
+        core.summary.addRaw('\n\n```diff', true)
+        core.summary.addRaw(truncated.join('\n'), true)
+        core.summary.addRaw('```', true)
+        core.summary.addRaw('</details>')
+      }
+
+      core.summary.addRaw('</blockquote>')
+    })
+
+    if (job_url)
+      core.summary.addRaw(
+        `\n\n_Hint: The full diffs are also available in the [runner logs](${job_url}) with slightly better highlighting._`,
+      )
+
+    // Write to disk temporarily for next step in GHA.
+    await writeFile('review.md', core.summary.stringify())
+
+    core.summary.write()
+  })
+}
--- a/ci/github-script/package-lock.json
+++ b/ci/github-script/package-lock.json
@ -6,6 +6,7 @@
    "": {
      "dependencies": {
        "@actions/artifact": "2.3.2",
+        "@actions/core": "1.11.1",
        "@actions/github": "6.0.1",
        "bottleneck": "2.19.5",
        "commander": "14.0.0"
--- a/ci/github-script/package.json
+++ b/ci/github-script/package.json
@ -2,6 +2,7 @@
  "private": true,
  "dependencies": {
    "@actions/artifact": "2.3.2",
+    "@actions/core": "1.11.1",
    "@actions/github": "6.0.1",
    "bottleneck": "2.19.5",
    "commander": "14.0.0"
--- a/ci/github-script/run
+++ b/ci/github-script/run
@ -1,12 +1,13 @@
 #!/usr/bin/env -S node --import ./run
 import { execSync } from 'node:child_process'
-import { mkdtempSync, rmSync } from 'node:fs'
+import { closeSync, mkdtempSync, openSync, rmSync } from 'node:fs'
 import { tmpdir } from 'node:os'
 import { join } from 'node:path'
 import { program } from 'commander'
+import * as core from '@actions/core'
 import { getOctokit } from '@actions/github'

-async function run(action, owner, repo, pull_number, dry) {
+async function run(action, owner, repo, pull_number, dry = true) {
  const token = execSync('gh auth token', { encoding: 'utf-8' }).trim()

  const github = getOctokit(token)
@ -19,39 +20,36 @@ async function run(action, owner, repo, pull_number, dry) {
    })).data
  }

-  const tmp = mkdtempSync(join(tmpdir(), 'github-script-'))
-  try {
-    process.env.GITHUB_WORKSPACE = tmp
-    process.chdir(tmp)
+  process.env['INPUT_GITHUB-TOKEN'] = token

-    await action({
-      github,
-      context: {
-        payload,
-        repo: {
-          owner,
-          repo,
-        },
+  closeSync(openSync('step-summary.md', 'w'))
+  process.env.GITHUB_STEP_SUMMARY = 'step-summary.md'
+
+  await action({
+    github,
+    context: {
+      payload,
+      repo: {
+        owner,
+        repo,
      },
-      core: {
-        getInput() {
-          return token
-        },
-        error: console.error,
-        info: console.log,
-        notice: console.log,
-        setFailed(msg) {
-          console.error(msg)
-          process.exitCode = 1
-        },
-      },
-      dry,
-    })
-  } finally {
-    rmSync(tmp, { recursive: true })
-  }
+    },
+    core,
+    dry,
+  })
 }

+program
+  .command('commits')
+  .description('Check commit structure of a pull request.')
+  .argument('<owner>', 'Owner of the GitHub repository to check (Example: NixOS)')
+  .argument('<repo>', 'Name of the GitHub repository to check (Example: nixpkgs)')
+  .argument('<pr>', 'Number of the Pull Request to check')
+  .action(async (owner, repo, pr) => {
+    const commits = (await import('./commits.js')).default
+    run(commits, owner, repo, pr)
+  })
+
 program
  .command('labels')
  .description('Manage labels on pull requests.')
@ -61,7 +59,14 @@ program
  .option('--no-dry', 'Make actual modifications')
  .action(async (owner, repo, pr, options) => {
    const labels = (await import('./labels.js')).default
-    run(labels, owner, repo, pr, options.dry)
+    const tmp = mkdtempSync(join(tmpdir(), 'github-script-'))
+    try {
+      process.env.GITHUB_WORKSPACE = tmp
+      process.chdir(tmp)
+      run(labels, owner, repo, pr, options.dry)
+    } finally {
+      rmSync(tmp, { recursive: true })
+    }
  })

 await program.parse()
--- a/ci/github-script/withRateLimit.js
+++ b/ci/github-script/withRateLimit.js
@ -20,6 +20,8 @@ module.exports = async function ({ github, core }, callback) {
  // Pause between mutative requests
  const writeLimits = new Bottleneck({ minTime: 1000 }).chain(allLimits)
  github.hook.wrap('request', async (request, options) => {
+    // Requests to a different host do not count against the rate limit.
+    if (options.url.startsWith('https://github.com')) return request(options)
    // Requests to the /rate_limit endpoint do not count against the rate limit.
    if (options.url == '/rate_limit') return request(options)
    // Search requests are in a different resource group, which allows 30 requests / minute.