Skip to content

Commit 036a506

Browse files
committed
Kubernetes OOMs appear as non-zero sigkills, adding support for treating these as OOMs
1 parent bb65b26 commit 036a506

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

apps/webapp/app/v3/services/completeAttempt.server.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,5 +696,17 @@ function isOOMError(error: TaskRunError) {
696696
return true;
697697
}
698698

699+
// For the purposes of retrying on a larger machine, we're going to treat this is an OOM error.
700+
// This is what they look like if we're executing using k8s. They then get corrected later, but it's too late.
701+
// {"code": "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE", "type": "INTERNAL_ERROR", "message": "Process exited with code -1 after signal SIGKILL."}
702+
if (
703+
error.code === "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE" &&
704+
error.message &&
705+
error.message.includes("SIGKILL") &&
706+
error.message.includes("-1")
707+
) {
708+
return true;
709+
}
710+
699711
return false;
700712
}

0 commit comments

Comments
 (0)