Skip to content

Commit efd2d21

Browse files
authored
Detect ffmpeg OOM errors, added manual OutOfMemoryError (#1694)
* Detect ffmpeg OOM errors, added manual OutOfMemoryError * Create eighty-spies-knock.md
1 parent 6017c52 commit efd2d21

File tree

5 files changed

+83
-13
lines changed

5 files changed

+83
-13
lines changed

.changeset/eighty-spies-knock.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@trigger.dev/sdk": patch
3+
---
4+
5+
Detect ffmpeg OOM errors, added manual OutOfMemoryError

apps/webapp/app/v3/services/completeAttempt.server.ts

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import {
1111
exceptionEventEnhancer,
1212
flattenAttributes,
1313
internalErrorFromUnexpectedExit,
14+
isManualOutOfMemoryError,
1415
sanitizeError,
1516
shouldRetryError,
1617
taskRunErrorEnhancer,
@@ -691,20 +692,38 @@ async function findAttempt(prismaClient: PrismaClientOrTransaction, friendlyId:
691692
}
692693

693694
function isOOMError(error: TaskRunError) {
694-
if (error.type !== "INTERNAL_ERROR") return false;
695-
if (error.code === "TASK_PROCESS_OOM_KILLED" || error.code === "TASK_PROCESS_MAYBE_OOM_KILLED") {
696-
return true;
695+
if (error.type === "INTERNAL_ERROR") {
696+
if (
697+
error.code === "TASK_PROCESS_OOM_KILLED" ||
698+
error.code === "TASK_PROCESS_MAYBE_OOM_KILLED"
699+
) {
700+
return true;
701+
}
702+
703+
// For the purposes of retrying on a larger machine, we're going to treat this is an OOM error.
704+
// This is what they look like if we're executing using k8s. They then get corrected later, but it's too late.
705+
// {"code": "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE", "type": "INTERNAL_ERROR", "message": "Process exited with code -1 after signal SIGKILL."}
706+
if (
707+
error.code === "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE" &&
708+
error.message &&
709+
error.message.includes("SIGKILL") &&
710+
error.message.includes("-1")
711+
) {
712+
return true;
713+
}
714+
}
715+
716+
if (error.type === "BUILT_IN_ERROR") {
717+
// ffmpeg also does weird stuff
718+
// { "name": "Error", "type": "BUILT_IN_ERROR", "message": "ffmpeg was killed with signal SIGKILL" }
719+
if (error.message && error.message.includes("ffmpeg was killed with signal SIGKILL")) {
720+
return true;
721+
}
697722
}
698723

699-
// For the purposes of retrying on a larger machine, we're going to treat this is an OOM error.
700-
// This is what they look like if we're executing using k8s. They then get corrected later, but it's too late.
701-
// {"code": "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE", "type": "INTERNAL_ERROR", "message": "Process exited with code -1 after signal SIGKILL."}
702-
if (
703-
error.code === "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE" &&
704-
error.message &&
705-
error.message.includes("SIGKILL") &&
706-
error.message.includes("-1")
707-
) {
724+
// Special `OutOfMemoryError` for doing a manual OOM kill.
725+
// Useful if a native library does an OOM but doesn't actually crash the run and you want to manually
726+
if (isManualOutOfMemoryError(error)) {
708727
return true;
709728
}
710729

packages/core/src/v3/errors.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,28 @@ export class AbortTaskRunError extends Error {
5454
}
5555
}
5656

57+
const MANUAL_OOM_KILL_ERROR_MESSAGE = "MANUAL_OOM_KILL_ERROR";
58+
59+
/**
60+
* This causes an Out Of Memory error on the run (if it's uncaught).
61+
* This can be useful if you use a native package that detects it's run out of memory but doesn't kill Node.js
62+
*/
63+
export class OutOfMemoryError extends Error {
64+
constructor() {
65+
super(MANUAL_OOM_KILL_ERROR_MESSAGE);
66+
this.name = "OutOfMemoryError";
67+
}
68+
}
69+
70+
export function isManualOutOfMemoryError(error: TaskRunError) {
71+
if (error.type === "BUILT_IN_ERROR") {
72+
if (error.message && error.message === MANUAL_OOM_KILL_ERROR_MESSAGE) {
73+
return true;
74+
}
75+
}
76+
return false;
77+
}
78+
5779
export class TaskPayloadParsedError extends Error {
5880
public readonly cause: unknown;
5981

@@ -562,6 +584,13 @@ export function taskRunErrorEnhancer(error: TaskRunError): EnhanceError<TaskRunE
562584
};
563585
}
564586
}
587+
588+
if (isManualOutOfMemoryError(error)) {
589+
return {
590+
...getPrettyTaskRunError("TASK_PROCESS_OOM_KILLED"),
591+
};
592+
}
593+
565594
break;
566595
}
567596
case "STRING_ERROR": {

packages/trigger-sdk/src/v3/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ export {
3131
RateLimitError,
3232
UnprocessableEntityError,
3333
AbortTaskRunError,
34+
OutOfMemoryError,
3435
logger,
3536
type LogLevel,
3637
} from "@trigger.dev/core/v3";

references/hello-world/src/trigger/oom.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { OutOfMemoryError } from "@trigger.dev/sdk/v3";
12
import { logger, task } from "@trigger.dev/sdk/v3";
23
import { setTimeout } from "timers/promises";
34

@@ -9,7 +10,14 @@ export const oomTask = task({
910
machine: "small-1x",
1011
},
1112
},
12-
run: async ({ succeedOnLargerMachine }: { succeedOnLargerMachine: boolean }, { ctx }) => {
13+
run: async (
14+
{
15+
succeedOnLargerMachine = false,
16+
ffmpeg = false,
17+
manual = false,
18+
}: { succeedOnLargerMachine?: boolean; ffmpeg?: boolean; manual?: boolean },
19+
{ ctx }
20+
) => {
1321
logger.info("running out of memory below this line");
1422

1523
logger.info(`Running on ${ctx.machine?.name}`);
@@ -23,6 +31,14 @@ export const oomTask = task({
2331
};
2432
}
2533

34+
if (manual) {
35+
throw new OutOfMemoryError();
36+
}
37+
38+
if (ffmpeg) {
39+
throw new Error("ffmpeg was killed with signal SIGKILL");
40+
}
41+
2642
let a = "a";
2743

2844
try {

0 commit comments

Comments
 (0)