diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index d77bbceb07f1..04f38641323c 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -45,42 +45,25 @@ jobs: filter: tree:0 path: trusted - - name: Check cherry-picks - id: check - continue-on-error: true - env: - BASE_SHA: ${{ github.event.pull_request.base.sha }} - HEAD_SHA: ${{ github.event.pull_request.head.sha }} - run: | - ./trusted/ci/check-cherry-picks.sh "$BASE_SHA" "$HEAD_SHA" checked-cherry-picks.md + - name: Install dependencies + run: npm install bottleneck - name: Log current API rate limits env: GH_TOKEN: ${{ github.token }} run: gh api /rate_limit | jq - - name: Prepare review - if: steps.check.outcome == 'failure' + - name: Check cherry-picks + id: check + continue-on-error: true uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 with: script: | - const { readFile, writeFile } = require('node:fs/promises') - - const job_url = (await github.rest.actions.listJobsForWorkflowRun({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: context.runId - })).data.jobs[0].html_url + '?pr=' + context.payload.pull_request.number - - const header = await readFile('trusted/ci/check-cherry-picks.md') - const body = await readFile('checked-cherry-picks.md') - const footer = - `\n_Hint: The full diffs are also available in the [runner logs](${job_url}) with slightly better highlighting._` - - const review = header + body + footer - await writeFile('review.md', review) - core.summary.addRaw(review) - core.summary.write() + require('./trusted/ci/github-script/commits.js')({ + github, + context, + core, + }) - name: Request changes if: ${{ github.event_name == 'pull_request_target' && steps.check.outcome == 'failure' }} diff --git a/ci/check-cherry-picks.sh b/ci/check-cherry-picks.sh deleted file mode 100755 index 113828f6ee1c..000000000000 --- a/ci/check-cherry-picks.sh +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/env bash -# Find alleged cherry-picks - -set -euo pipefail - -if [[ $# != "2" && $# != "3" ]] ; then - echo "usage: check-cherry-picks.sh base_rev head_rev [markdown_file]" - exit 2 -fi - -markdown_file="$(realpath ${3:-/dev/null})" -[ -v 3 ] && rm -f "$markdown_file" - -# Make sure we are inside the nixpkgs repo, even when called from outside -cd "$(dirname "${BASH_SOURCE[0]}")" - -PICKABLE_BRANCHES="master staging release-??.?? staging-??.?? haskell-updates python-updates staging-next staging-next-??.??" -problem=0 - -# Not everyone calls their remote "origin" -remote="$(git remote -v | grep -i 'NixOS/nixpkgs' | head -n1 | cut -f1 || true)" - -commits="$(git rev-list --reverse "$1..$2")" - -log() { - type="$1" - shift 1 - - local -A prefix - prefix[success]=" ✔ " - if [ -v GITHUB_ACTIONS ]; then - prefix[warning]="::warning::" - prefix[error]="::error::" - else - prefix[warning]=" ⚠ " - prefix[error]=" ✘ " - fi - - echo "${prefix[$type]}$@" - - # Only logging errors and warnings, which allows comparing the markdown file - # between pushes to the PR. Even if a new, proper cherry-pick, commit is added - # it won't change the markdown file's content and thus not trigger another comment. - if [ "$type" != "success" ]; then - local -A alert - alert[warning]="WARNING" - alert[error]="CAUTION" - echo >> $markdown_file - echo "> [!${alert[$type]}]" >> $markdown_file - echo "> $@" >> $markdown_file - fi -} - -endgroup() { - if [ -v GITHUB_ACTIONS ] ; then - echo ::endgroup:: - fi -} - -while read -r new_commit_sha ; do - if [ -v GITHUB_ACTIONS ] ; then - echo "::group::Commit $new_commit_sha" - else - echo "=================================================" - fi - git rev-list --max-count=1 --format=medium "$new_commit_sha" - echo "-------------------------------------------------" - - # Using the last line with "cherry" + hash, because a chained backport - # can result in multiple of those lines. Only the last one counts. - original_commit_sha=$( - git rev-list --max-count=1 --format=format:%B "$new_commit_sha" \ - | grep -Ei "cherry.*[0-9a-f]{40}" | tail -n1 \ - | grep -Eoi -m1 '[0-9a-f]{40}' || true - ) - if [ -z "$original_commit_sha" ] ; then - endgroup - log warning "Couldn't locate original commit hash in message of $new_commit_sha." - problem=1 - continue - fi - - set -f # prevent pathname expansion of patterns - for pattern in $PICKABLE_BRANCHES ; do - set +f # re-enable pathname expansion - - # Reverse sorting by refname and taking one match only means we can only backport - # from unstable and the latest stable. That makes sense, because even right after - # branch-off, when we have two supported stable branches, we only ever want to cherry-pick - # **to** the older one, but never **from** it. - # This makes the job significantly faster in the case when commits can't be found, - # because it doesn't need to iterate through 20+ branches, which all need to be fetched. - branches="$(git for-each-ref --sort=-refname --format="%(refname)" \ - "refs/remotes/${remote:-origin}/$pattern" | head -n1)" - - while read -r picked_branch ; do - if git merge-base --is-ancestor "$original_commit_sha" "$picked_branch" ; then - range_diff_common='git --no-pager range-diff - --no-notes - --creation-factor=100 - '"$original_commit_sha~..$original_commit_sha"' - '"$new_commit_sha~..$new_commit_sha"' - ' - - if $range_diff_common --no-color 2> /dev/null | grep -E '^ {4}[+-]{2}' > /dev/null ; then - log success "$original_commit_sha present in branch $picked_branch" - endgroup - log warning "Difference between $new_commit_sha and original $original_commit_sha may warrant inspection." - - # First line contains commit SHAs, which we already printed. - $range_diff_common --color | tail -n +2 - - echo -e ">
Show diff\n>" >> $markdown_file - echo '> ```diff' >> $markdown_file - # The output of `git range-diff` is indented with 4 spaces, which we need to match with the - # code blocks indent to get proper syntax highlighting on GitHub. - diff="$($range_diff_common | tail -n +2 | sed -Ee 's/^ {4}/> /g')" - # Also limit the output to 10k bytes (and remove the last, potentially incomplete line), because - # GitHub comments are limited in length. The value of 10k is arbitrary with the assumption, that - # after the range-diff becomes a certain size, a reviewer is better off reviewing the regular diff - # in GitHub's UI anyway, thus treating the commit as "new" and not cherry-picked. - # Note: This could still lead to a too lengthy comment with multiple commits touching the limit. We - # consider this too unlikely to happen, to deal with explicitly. - max_length=10000 - if [ "${#diff}" -gt $max_length ]; then - printf -v diff "%s\n>\n> [...truncated...]" "$(echo "$diff" | head -c $max_length | head -n-1)" - fi - echo "$diff" >> $markdown_file - echo '> ```' >> $markdown_file - echo ">
" >> $markdown_file - - problem=1 - else - log success "$original_commit_sha present in branch $picked_branch" - log success "$original_commit_sha highly similar to $new_commit_sha" - $range_diff_common --color - endgroup - fi - - # move on to next commit - continue 3 - fi - done <<< "$branches" - done - - endgroup - log error "$original_commit_sha given in $new_commit_sha not found in any pickable branch." - - problem=1 -done <<< "$commits" - -exit $problem diff --git a/ci/github-script/.gitignore b/ci/github-script/.gitignore index 3c3629e647f5..6b8a37657bc7 100644 --- a/ci/github-script/.gitignore +++ b/ci/github-script/.gitignore @@ -1 +1,2 @@ node_modules +step-summary.md diff --git a/ci/github-script/.npmrc b/ci/github-script/.npmrc index f91a336f47b6..fb41d64f4679 100644 --- a/ci/github-script/.npmrc +++ b/ci/github-script/.npmrc @@ -1 +1,2 @@ package-lock-only = true +save-exact = true diff --git a/ci/github-script/README.md b/ci/github-script/README.md index caf52eb5bba3..52b78c79d79f 100644 --- a/ci/github-script/README.md +++ b/ci/github-script/README.md @@ -8,6 +8,10 @@ To run any of the scripts locally: - Enter `nix-shell` in `./ci/github-script`. - Ensure `gh` is authenticated. +## Check commits + +Run `./run commits OWNER REPO PR`, where OWNER is your username or "NixOS", REPO is the name of your fork or "nixpkgs" and PR is the number of the pull request to check. + ## Labeler Run `./run labels OWNER REPO`, where OWNER is your username or "NixOS" and REPO the name of your fork or "nixpkgs". diff --git a/ci/check-cherry-picks.md b/ci/github-script/check-cherry-picks.md similarity index 100% rename from ci/check-cherry-picks.md rename to ci/github-script/check-cherry-picks.md diff --git a/ci/github-script/commits.js b/ci/github-script/commits.js new file mode 100644 index 000000000000..99e969591045 --- /dev/null +++ b/ci/github-script/commits.js @@ -0,0 +1,199 @@ +module.exports = async function ({ github, context, core }) { + const { execFileSync } = require('node:child_process') + const { readFile, writeFile } = require('node:fs/promises') + const { join } = require('node:path') + const { classify } = require('../supportedBranches.js') + const withRateLimit = require('./withRateLimit.js') + + await withRateLimit({ github, core }, async (stats) => { + stats.prs = 1 + + const job_url = + context.runId && + ( + await github.rest.actions.listJobsForWorkflowRun({ + ...context.repo, + run_id: context.runId, + }) + ).data.jobs[0].html_url + + '?pr=' + + context.payload.pull_request.number + + async function handle({ sha, commit }) { + // Using the last line with "cherry" + hash, because a chained backport + // can result in multiple of those lines. Only the last one counts. + const match = Array.from( + commit.message.matchAll(/cherry.*([0-9a-f]{40})/g), + ).at(-1) + + if (!match) + return { + sha, + commit, + severity: 'warning', + message: `Couldn't locate original commit hash in message of ${sha}.`, + } + + const original_sha = match[1] + + let branches + try { + branches = ( + await github.request({ + // This is an undocumented endpoint to fetch the branches a commit is part of. + // There is no equivalent in neither the REST nor the GraphQL API. + // The endpoint itself is unlikely to go away, because GitHub uses it to display + // the list of branches on the detail page of a commit. + url: `https://github.com/${context.repo.owner}/${context.repo.repo}/branch_commits/${original_sha}`, + headers: { + accept: 'application/json', + }, + }) + ).data.branches + .map(({ branch }) => branch) + .filter((branch) => classify(branch).type.includes('development')) + } catch (e) { + // For some unknown reason a 404 error comes back as 500 without any more details in a GitHub Actions runner. + // Ignore these to return a regular error message below. + if (![404, 500].includes(e.status)) throw e + } + if (!branches?.length) + return { + sha, + commit, + severity: 'error', + message: `${original_sha} given in ${sha} not found in any pickable branch.`, + } + + const diff = execFileSync('git', [ + '-C', + __dirname, + 'range-diff', + '--no-color', + '--no-notes', + '--creation-factor=100', + `${original_sha}~..${original_sha}`, + `${sha}~..${sha}`, + ]) + .toString() + .split('\n') + // First line contains commit SHAs, which we'll print separately. + .slice(1) + // # The output of `git range-diff` is indented with 4 spaces, but we'll control indentation manually. + .map((line) => line.replace(/^ {4}/, '')) + + if (!diff.some((line) => line.match(/^[+-]{2}/))) + return { + sha, + commit, + severity: 'info', + message: `✔ ${original_sha} is highly similar to ${sha}.`, + } + + const colored_diff = execFileSync('git', [ + '-C', + __dirname, + 'range-diff', + '--color', + '--no-notes', + '--creation-factor=100', + `${original_sha}~..${original_sha}`, + `${sha}~..${sha}`, + ]).toString() + + return { + sha, + commit, + diff, + colored_diff, + severity: 'warning', + message: `Difference between ${sha} and original ${original_sha} may warrant inspection.`, + } + } + + const commits = await github.paginate(github.rest.pulls.listCommits, { + ...context.repo, + pull_number: context.payload.pull_request.number, + }) + + const results = await Promise.all(commits.map(handle)) + + // Log all results without truncation and with better highlighting to the job log. + results.forEach(({ sha, commit, severity, message, colored_diff }) => { + core.startGroup(`Commit ${sha}`) + core.info(`Author: ${commit.author.name} ${commit.author.email}`) + core.info(`Date: ${new Date(commit.author.date)}`) + core[severity](message) + core.endGroup() + if (colored_diff) core.info(colored_diff) + }) + + // Only create step summary below in case of warnings or errors. + if (results.every(({ severity }) => severity == 'info')) return + else process.exitCode = 1 + + core.summary.addRaw( + await readFile(join(__dirname, 'check-cherry-picks.md'), 'utf-8'), + true, + ) + results.forEach(({ severity, message, diff }) => { + if (severity == 'info') return + + // The docs for markdown alerts only show examples with markdown blockquote syntax, like this: + // > [!WARNING] + // > message + // However, our testing shows that this also works with a `
` html tag, as long as there + // is an empty line: + //
+ // + // [!WARNING] + // message + //
+ // Whether this is intended or just an implementation detail is unclear. + core.summary.addRaw('
') + core.summary.addRaw( + `\n\n[!${severity == 'warning' ? 'WARNING' : 'CAUTION'}]`, + true, + ) + core.summary.addRaw(`${message}`, true) + + if (diff) { + // Limit the output to 10k bytes and remove the last, potentially incomplete line, because GitHub + // comments are limited in length. The value of 10k is arbitrary with the assumption, that after + // the range-diff becomes a certain size, a reviewer is better off reviewing the regular diff in + // GitHub's UI anyway, thus treating the commit as "new" and not cherry-picked. + // Note: if multiple commits are close to the limit, this approach could still lead to a comment + // that's too long. We think this is unlikely to happen, and so don't deal with it explicitly. + const truncated = [] + let total_length = 0 + for (line of diff) { + total_length += line.length + if (total_length > 10000) { + truncated.push('', '[...truncated...]') + break + } else { + truncated.push(line) + } + } + + core.summary.addRaw('
Show diff') + core.summary.addRaw('\n\n```diff', true) + core.summary.addRaw(truncated.join('\n'), true) + core.summary.addRaw('```', true) + core.summary.addRaw('
') + } + + core.summary.addRaw('
') + }) + + if (job_url) + core.summary.addRaw( + `\n\n_Hint: The full diffs are also available in the [runner logs](${job_url}) with slightly better highlighting._`, + ) + + // Write to disk temporarily for next step in GHA. + await writeFile('review.md', core.summary.stringify()) + + core.summary.write() + }) +} diff --git a/ci/github-script/package-lock.json b/ci/github-script/package-lock.json index 538083dcea93..0dcc9b68e259 100644 --- a/ci/github-script/package-lock.json +++ b/ci/github-script/package-lock.json @@ -6,6 +6,7 @@ "": { "dependencies": { "@actions/artifact": "2.3.2", + "@actions/core": "1.11.1", "@actions/github": "6.0.1", "bottleneck": "2.19.5", "commander": "14.0.0" diff --git a/ci/github-script/package.json b/ci/github-script/package.json index 4671dd41f0cd..860bb09cdd95 100644 --- a/ci/github-script/package.json +++ b/ci/github-script/package.json @@ -2,6 +2,7 @@ "private": true, "dependencies": { "@actions/artifact": "2.3.2", + "@actions/core": "1.11.1", "@actions/github": "6.0.1", "bottleneck": "2.19.5", "commander": "14.0.0" diff --git a/ci/github-script/run b/ci/github-script/run index cbf3ea9315e0..3fe6e189eb96 100755 --- a/ci/github-script/run +++ b/ci/github-script/run @@ -1,12 +1,13 @@ #!/usr/bin/env -S node --import ./run import { execSync } from 'node:child_process' -import { mkdtempSync, rmSync } from 'node:fs' +import { closeSync, mkdtempSync, openSync, rmSync } from 'node:fs' import { tmpdir } from 'node:os' import { join } from 'node:path' import { program } from 'commander' +import * as core from '@actions/core' import { getOctokit } from '@actions/github' -async function run(action, owner, repo, pull_number, dry) { +async function run(action, owner, repo, pull_number, dry = true) { const token = execSync('gh auth token', { encoding: 'utf-8' }).trim() const github = getOctokit(token) @@ -19,39 +20,36 @@ async function run(action, owner, repo, pull_number, dry) { })).data } - const tmp = mkdtempSync(join(tmpdir(), 'github-script-')) - try { - process.env.GITHUB_WORKSPACE = tmp - process.chdir(tmp) + process.env['INPUT_GITHUB-TOKEN'] = token - await action({ - github, - context: { - payload, - repo: { - owner, - repo, - }, + closeSync(openSync('step-summary.md', 'w')) + process.env.GITHUB_STEP_SUMMARY = 'step-summary.md' + + await action({ + github, + context: { + payload, + repo: { + owner, + repo, }, - core: { - getInput() { - return token - }, - error: console.error, - info: console.log, - notice: console.log, - setFailed(msg) { - console.error(msg) - process.exitCode = 1 - }, - }, - dry, - }) - } finally { - rmSync(tmp, { recursive: true }) - } + }, + core, + dry, + }) } +program + .command('commits') + .description('Check commit structure of a pull request.') + .argument('', 'Owner of the GitHub repository to check (Example: NixOS)') + .argument('', 'Name of the GitHub repository to check (Example: nixpkgs)') + .argument('', 'Number of the Pull Request to check') + .action(async (owner, repo, pr) => { + const commits = (await import('./commits.js')).default + run(commits, owner, repo, pr) + }) + program .command('labels') .description('Manage labels on pull requests.') @@ -61,7 +59,14 @@ program .option('--no-dry', 'Make actual modifications') .action(async (owner, repo, pr, options) => { const labels = (await import('./labels.js')).default - run(labels, owner, repo, pr, options.dry) + const tmp = mkdtempSync(join(tmpdir(), 'github-script-')) + try { + process.env.GITHUB_WORKSPACE = tmp + process.chdir(tmp) + run(labels, owner, repo, pr, options.dry) + } finally { + rmSync(tmp, { recursive: true }) + } }) await program.parse() diff --git a/ci/github-script/withRateLimit.js b/ci/github-script/withRateLimit.js index 03fbd54291a8..ff97c7173fcf 100644 --- a/ci/github-script/withRateLimit.js +++ b/ci/github-script/withRateLimit.js @@ -20,6 +20,8 @@ module.exports = async function ({ github, core }, callback) { // Pause between mutative requests const writeLimits = new Bottleneck({ minTime: 1000 }).chain(allLimits) github.hook.wrap('request', async (request, options) => { + // Requests to a different host do not count against the rate limit. + if (options.url.startsWith('https://github.com')) return request(options) // Requests to the /rate_limit endpoint do not count against the rate limit. if (options.url == '/rate_limit') return request(options) // Search requests are in a different resource group, which allows 30 requests / minute.