Skip to content

Commit aa10279

Browse files
authored
Increase v4 visibility timeouts & don't try continue finished runs (#1978)
* Increase v4 visibility timeouts * Added some logging for #continueRunIfUnblocked: run has no checkpoint * If a run is finished or pending cancel, don’t try and continue it
1 parent 32ff569 commit aa10279

File tree

3 files changed

+27
-9
lines changed

3 files changed

+27
-9
lines changed

internal-packages/run-engine/src/engine/statuses.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ export function isCheckpointable(status: TaskRunExecutionStatus): boolean {
3131
return checkpointableStatuses.includes(status);
3232
}
3333

34+
export function isFinishedOrPendingFinished(status: TaskRunExecutionStatus): boolean {
35+
const finishedStatuses: TaskRunExecutionStatus[] = ["FINISHED", "PENDING_CANCEL"];
36+
return finishedStatuses.includes(status);
37+
}
38+
3439
export function isFinalRunStatus(status: TaskRunStatus): boolean {
3540
const finalStatuses: TaskRunStatus[] = [
3641
"CANCELED",

internal-packages/run-engine/src/engine/systems/waitpointSystem.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import {
1010
} from "@trigger.dev/database";
1111
import { nanoid } from "nanoid";
1212
import { sendNotificationToWorker } from "../eventBus.js";
13-
import { isExecuting } from "../statuses.js";
13+
import { isExecuting, isFinishedOrPendingFinished } from "../statuses.js";
1414
import { EnqueueSystem } from "./enqueueSystem.js";
1515
import { ExecutionSnapshotSystem, getLatestExecutionSnapshot } from "./executionSnapshotSystem.js";
1616
import { SystemResources } from "./systems.js";
@@ -512,6 +512,14 @@ export class WaitpointSystem {
512512
await this.$.runLock.lock([runId], 5000, async () => {
513513
const snapshot = await getLatestExecutionSnapshot(this.$.prisma, runId);
514514

515+
if (isFinishedOrPendingFinished(snapshot.executionStatus)) {
516+
this.$.logger.debug(`#continueRunIfUnblocked: run is finished, skipping`, {
517+
runId,
518+
snapshot,
519+
});
520+
return;
521+
}
522+
515523
//run is still executing, send a message to the worker
516524
if (isExecuting(snapshot.executionStatus)) {
517525
const result = await this.$.runQueue.reacquireConcurrency(
@@ -573,6 +581,11 @@ export class WaitpointSystem {
573581
} else {
574582
if (snapshot.executionStatus !== "RUN_CREATED" && !snapshot.checkpointId) {
575583
// TODO: We're screwed, should probably fail the run immediately
584+
this.$.logger.error(`#continueRunIfUnblocked: run has no checkpoint`, {
585+
runId: run.id,
586+
snapshot,
587+
blockingWaitpoints,
588+
});
576589
throw new Error(`#continueRunIfUnblocked: run has no checkpoint: ${run.id}`);
577590
}
578591

internal-packages/run-engine/src/engine/workerCatalog.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,51 +6,51 @@ export const workerCatalog = {
66
waitpointId: z.string(),
77
error: z.string().optional(),
88
}),
9-
visibilityTimeoutMs: 5000,
9+
visibilityTimeoutMs: 30_000,
1010
},
1111
heartbeatSnapshot: {
1212
schema: z.object({
1313
runId: z.string(),
1414
snapshotId: z.string(),
1515
}),
16-
visibilityTimeoutMs: 5000,
16+
visibilityTimeoutMs: 30_000,
1717
},
1818
expireRun: {
1919
schema: z.object({
2020
runId: z.string(),
2121
}),
22-
visibilityTimeoutMs: 5000,
22+
visibilityTimeoutMs: 30_000,
2323
},
2424
cancelRun: {
2525
schema: z.object({
2626
runId: z.string(),
2727
completedAt: z.coerce.date(),
2828
reason: z.string().optional(),
2929
}),
30-
visibilityTimeoutMs: 5000,
30+
visibilityTimeoutMs: 30_000,
3131
},
3232
queueRunsPendingVersion: {
3333
schema: z.object({
3434
backgroundWorkerId: z.string(),
3535
}),
36-
visibilityTimeoutMs: 5000,
36+
visibilityTimeoutMs: 60_000,
3737
},
3838
tryCompleteBatch: {
3939
schema: z.object({
4040
batchId: z.string(),
4141
}),
42-
visibilityTimeoutMs: 10_000,
42+
visibilityTimeoutMs: 30_000,
4343
},
4444
continueRunIfUnblocked: {
4545
schema: z.object({
4646
runId: z.string(),
4747
}),
48-
visibilityTimeoutMs: 10_000,
48+
visibilityTimeoutMs: 30_000,
4949
},
5050
enqueueDelayedRun: {
5151
schema: z.object({
5252
runId: z.string(),
5353
}),
54-
visibilityTimeoutMs: 10_000,
54+
visibilityTimeoutMs: 30_000,
5555
},
5656
};

0 commit comments

Comments
 (0)