Skip to content

Commit 98749b4

Browse files
committed
improve retry spans for oom errors
1 parent a05f8d5 commit 98749b4

File tree

3 files changed

+10
-1
lines changed

3 files changed

+10
-1
lines changed

apps/webapp/app/v3/runEngineHandlers.server.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,12 +326,19 @@ export function registerRunEngineEventBusHandlers() {
326326

327327
engine.eventBus.on("runRetryScheduled", async ({ time, run, environment, retryAt }) => {
328328
try {
329-
await eventRepository.recordEvent(`Retry #${run.attemptNumber} delay`, {
329+
let retryMessage = `Retry #${run.attemptNumber} delay`;
330+
331+
if (run.nextMachineAfterOOM) {
332+
retryMessage += ` after OOM`;
333+
}
334+
335+
await eventRepository.recordEvent(retryMessage, {
330336
taskSlug: run.taskIdentifier,
331337
environment,
332338
attributes: {
333339
properties: {
334340
retryAt: retryAt.toISOString(),
341+
nextMachine: run.nextMachineAfterOOM,
335342
},
336343
runId: run.friendlyId,
337344
style: {

internal-packages/run-engine/src/engine/eventBus.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ export type EventBusEvents = {
8585
traceContext: Record<string, string | undefined>;
8686
taskIdentifier: string;
8787
baseCostInCents: number;
88+
nextMachineAfterOOM?: string;
8889
};
8990
organization: {
9091
id: string;

internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ export class RunAttemptSystem {
692692
traceContext: run.traceContext as Record<string, string | undefined>,
693693
baseCostInCents: run.baseCostInCents,
694694
spanId: run.spanId,
695+
nextMachineAfterOOM: retryResult.machine,
695696
},
696697
organization: {
697698
id: run.runtimeEnvironment.organizationId,

0 commit comments

Comments
 (0)