Skip to content

Commit d78036f

Browse files
authored
Only restart failed libc++ jobs, not cancelled ones. (#146397)
Despite the error message for preempted jobs containing the words "cancelled", these are considered workflow "failures" by github. This is important, because if we fail to distinguish between "failed" and "cancelled" jobs, the restarter will fight to restart jobs a user intentionally cancelled (either by pressing the "cancel" button, or by pushing an update to a PR). This reverts commit 3ea7fc7. This also reverts earlier attempts to solve this problem by matching the messages to detect manual cancellations. This change also removes ldionne's test workflow, as its hard to correctly keep in sync. This change does not attempt to address the maintainability or testability of this script, which continues to be an issue. If asked to address these issues, my plan is to write the script in python (which most people are more familar with), and turn this action into a "docker action" using a container with the python action and dependencies built into it. Let me know if that's a direction we're interested in heading.
1 parent 8b3cc4d commit d78036f

File tree

1 file changed

+4
-92
lines changed

1 file changed

+4
-92
lines changed

.github/workflows/libcxx-restart-preempted-jobs.yaml

Lines changed: 4 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ permissions:
2020

2121
jobs:
2222
restart:
23-
if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled')
23+
if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure')
2424
name: "Restart Job"
2525
permissions:
2626
statuses: read
@@ -35,8 +35,8 @@ jobs:
3535
// The "The run was canceled by" message comes from a user manually canceling a workflow
3636
// the "higher priority" message comes from github canceling a workflow because the user updated the change.
3737
// And the "exit code 1" message indicates a genuine failure.
38-
const failure_regex = /(Process completed with exit code 1.)|(Canceling since a higher priority waiting request)|(The run was canceled by)/
39-
const preemption_regex = /(The runner has received a shutdown signal)/
38+
const failure_regex = /(Process completed with exit code 1.)/
39+
const preemption_regex = /(The runner has received a shutdown signal)|(The operation was canceled)/
4040
4141
const wf_run = context.payload.workflow_run
4242
core.notice(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`)
@@ -77,7 +77,7 @@ jobs:
7777
console.log('Check run was not completed. Skipping.');
7878
continue;
7979
}
80-
if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') {
80+
if (check_run.conclusion != 'failure') {
8181
console.log('Check run had conclusion: ' + check_run.conclusion + '. Skipping.');
8282
continue;
8383
}
@@ -156,91 +156,3 @@ jobs:
156156
run_id: context.payload.workflow_run.id
157157
})
158158
await create_check_run('success', 'Restarted workflow run due to preempted job')
159-
160-
restart-test:
161-
if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled') && github.event.actor.login == 'ldionne' # TESTING ONLY
162-
name: "Restart Job (test)"
163-
permissions:
164-
statuses: read
165-
checks: write
166-
actions: write
167-
runs-on: ubuntu-24.04
168-
steps:
169-
- name: "Restart Job (test)"
170-
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1
171-
with:
172-
script: |
173-
const FAILURE_REGEX = /(Process completed with exit code 1.)|(Canceling since a higher priority waiting request)|(The run was canceled by)/
174-
const PREEMPTION_REGEX = /(The runner has received a shutdown signal)|(The operation was canceled)/
175-
176-
function log(msg) {
177-
core.notice(msg)
178-
}
179-
180-
const wf_run = context.payload.workflow_run
181-
log(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`)
182-
183-
log('Listing check runs for suite')
184-
const check_suites = await github.rest.checks.listForSuite({
185-
owner: context.repo.owner,
186-
repo: context.repo.repo,
187-
check_suite_id: context.payload.workflow_run.check_suite_id,
188-
per_page: 100 // FIXME: We don't have 100 check runs yet, but we should handle this better.
189-
})
190-
191-
preemptions = [];
192-
legitimate_failures = [];
193-
for (check_run of check_suites.data.check_runs) {
194-
log(`Checking check run: ${check_run.id}`);
195-
if (check_run.status != 'completed') {
196-
log('Check run was not completed. Skipping.');
197-
continue;
198-
}
199-
200-
if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') {
201-
log(`Check run had conclusion: ${check_run.conclusion}. Skipping.`);
202-
continue;
203-
}
204-
205-
annotations = await github.rest.checks.listAnnotations({
206-
owner: context.repo.owner,
207-
repo: context.repo.repo,
208-
check_run_id: check_run.id
209-
})
210-
211-
preemption_annotation = annotations.data.find(function(annotation) {
212-
return annotation.annotation_level == 'failure' &&
213-
annotation.message.match(PREEMPTION_REGEX) != null;
214-
});
215-
if (preemption_annotation != null) {
216-
log(`Found preemption message: ${preemption_annotation.message}`);
217-
preemptions.push(check_run);
218-
break;
219-
}
220-
221-
failure_annotation = annotations.data.find(function(annotation) {
222-
return annotation.annotation_level == 'failure' &&
223-
annotation.message.match(FAILURE_REGEX) != null;
224-
});
225-
if (failure_annotation != null) {
226-
log(`Found legitimate failure annotation: ${failure_annotation.message}`);
227-
legitimate_failures.push(check_run);
228-
break;
229-
}
230-
}
231-
232-
if (preemptions) {
233-
log('Found some preempted jobs');
234-
if (legitimate_failures) {
235-
log('Also found some legitimate failures, so not restarting the workflow.');
236-
} else {
237-
log('Did not find any legitimate failures. Restarting workflow.');
238-
await github.rest.actions.reRunWorkflowFailedJobs({
239-
owner: context.repo.owner,
240-
repo: context.repo.repo,
241-
run_id: context.payload.workflow_run.id
242-
})
243-
}
244-
} else {
245-
log('Did not find any preempted jobs. Not restarting the workflow.');
246-
}

0 commit comments

Comments
 (0)