Skip to content

Commit 4b0f677

Browse files
authored
Prettier errors and related improvements (#1387)
* improve get attempt payload logging * only log checkpoint readiness timeouts, don't crash the run * improve coordinator errors * add links export to core * remove unused deploy error utilities * update cli errors and use new links from core * log remaining failure reasons * also parse execute errors in deployed worker * add signal to unexpected exit messages * start displaying prettier internal errors * improve unexpected exit error parsing * better errors for crashes * fix unexpected error parsing for dev runs * changeset * parse graceful exit timeouts
1 parent 886429b commit 4b0f677

File tree

20 files changed

+323
-359
lines changed

20 files changed

+323
-359
lines changed

.changeset/plenty-spoons-build.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"trigger.dev": patch
3+
"@trigger.dev/core": patch
4+
---
5+
6+
Prettier and more specific errors with links to docs

apps/coordinator/src/index.ts

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -401,13 +401,16 @@ class TaskCoordinator {
401401
success: true,
402402
};
403403
} catch (error) {
404-
logger.error("Error while waiting for checkpointable state", { error });
404+
logger.error("Error while waiting for checkpointable state", {
405+
error,
406+
runId: socket.data.runId,
407+
});
405408

406409
if (error instanceof CheckpointReadinessTimeoutError) {
407-
await crashRun({
408-
name: error.name,
409-
message: `Failed to become checkpointable in ${CHECKPOINTABLE_TIMEOUT_SECONDS}s for ${reason}`,
410-
});
410+
logger.error(
411+
`Failed to become checkpointable in ${CHECKPOINTABLE_TIMEOUT_SECONDS}s for ${reason}`,
412+
{ runId: socket.data.runId }
413+
);
411414

412415
return {
413416
success: false,
@@ -490,7 +493,7 @@ class TaskCoordinator {
490493
updateAttemptFriendlyId(executionAck.payload.execution.attempt.id);
491494
updateAttemptNumber(executionAck.payload.execution.attempt.number);
492495
} catch (error) {
493-
logger.error("Error", { error });
496+
logger.error("READY_FOR_EXECUTION error", { error, runId: socket.data.runId });
494497

495498
await crashRun({
496499
name: "ReadyForExecutionError",
@@ -524,7 +527,10 @@ class TaskCoordinator {
524527
}
525528

526529
if (!lazyAttempt.success) {
527-
logger.error("failed to get lazy attempt payload", { runId: socket.data.runId });
530+
logger.error("failed to get lazy attempt payload", {
531+
runId: socket.data.runId,
532+
reason: lazyAttempt.reason,
533+
});
528534

529535
await crashRun({
530536
name: "ReadyForLazyAttemptError",
@@ -546,7 +552,7 @@ class TaskCoordinator {
546552
return;
547553
}
548554

549-
logger.error("Error", { error });
555+
logger.error("READY_FOR_LAZY_ATTEMPT error", { error, runId: socket.data.runId });
550556

551557
await crashRun({
552558
name: "ReadyForLazyAttemptError",
@@ -1004,7 +1010,10 @@ class TaskCoordinator {
10041010
});
10051011

10061012
if (!createAttempt?.success) {
1007-
logger.debug("no ack while creating attempt", message);
1013+
logger.debug("no ack while creating attempt", {
1014+
runId: message.runId,
1015+
reason: createAttempt?.reason,
1016+
});
10081017
callback({ success: false, reason: createAttempt?.reason });
10091018
return;
10101019
}

apps/kubernetes-provider/src/taskMonitor.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@ import { SimpleLogger } from "@trigger.dev/core/v3/apps";
33
import { EXIT_CODE_ALREADY_HANDLED, EXIT_CODE_CHILD_NONZERO } from "@trigger.dev/core/v3/apps";
44
import { setTimeout } from "timers/promises";
55
import PQueue from "p-queue";
6-
import type { Prettify } from "@trigger.dev/core/v3";
6+
import { TaskRunErrorCodes, type Prettify, type TaskRunInternalError } from "@trigger.dev/core/v3";
77

88
type FailureDetails = Prettify<{
99
exitCode: number;
1010
reason: string;
1111
logs: string;
1212
overrideCompletion: boolean;
13+
errorCode: TaskRunInternalError["code"];
1314
}>;
1415

1516
type IndexFailureHandler = (deploymentId: string, details: FailureDetails) => Promise<any>;
@@ -160,18 +161,23 @@ export class TaskMonitor {
160161
let reason = rawReason || "Unknown error";
161162
let logs = rawLogs || "";
162163
let overrideCompletion = false;
164+
let errorCode: TaskRunInternalError["code"] = TaskRunErrorCodes.POD_UNKNOWN_ERROR;
163165

164166
switch (rawReason) {
165167
case "Error":
166168
reason = "Unknown error.";
169+
errorCode = TaskRunErrorCodes.POD_UNKNOWN_ERROR;
167170
break;
168171
case "Evicted":
169172
if (message.startsWith("Pod ephemeral local storage usage")) {
170173
reason = "Storage limit exceeded.";
174+
errorCode = TaskRunErrorCodes.DISK_SPACE_EXCEEDED;
171175
} else if (message) {
172176
reason = `Evicted: ${message}`;
177+
errorCode = TaskRunErrorCodes.POD_EVICTED;
173178
} else {
174179
reason = "Evicted for unknown reason.";
180+
errorCode = TaskRunErrorCodes.POD_EVICTED;
175181
}
176182

177183
if (logs.startsWith("failed to try resolving symlinks")) {
@@ -183,6 +189,7 @@ export class TaskMonitor {
183189
reason = `${
184190
exitCode === EXIT_CODE_CHILD_NONZERO ? "Child process" : "Parent process"
185191
} ran out of memory! Try choosing a machine preset with more memory for this task.`;
192+
errorCode = TaskRunErrorCodes.TASK_PROCESS_OOM_KILLED;
186193
break;
187194
default:
188195
break;
@@ -193,6 +200,7 @@ export class TaskMonitor {
193200
reason,
194201
logs,
195202
overrideCompletion,
203+
errorCode,
196204
} satisfies FailureDetails;
197205

198206
const app = pod.metadata?.labels?.app;

apps/webapp/app/components/runs/v3/RunInspector.tsx

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import { CheckIcon, ClockIcon, CloudArrowDownIcon, QueueListIcon } from "@heroicons/react/20/solid";
22
import { Link } from "@remix-run/react";
3-
import { formatDuration, formatDurationMilliseconds, TaskRunError } from "@trigger.dev/core/v3";
3+
import {
4+
formatDuration,
5+
formatDurationMilliseconds,
6+
TaskRunError,
7+
taskRunErrorEnhancer,
8+
} from "@trigger.dev/core/v3";
49
import { useEffect } from "react";
510
import { useTypedFetcher } from "remix-typedjson";
611
import { ExitIcon } from "~/assets/icons/ExitIcon";
@@ -553,32 +558,39 @@ function RunTimeline({ run }: { run: RawRun }) {
553558
}
554559

555560
function RunError({ error }: { error: TaskRunError }) {
556-
switch (error.type) {
561+
const enhancedError = taskRunErrorEnhancer(error);
562+
563+
switch (enhancedError.type) {
557564
case "STRING_ERROR":
558565
case "CUSTOM_ERROR": {
559566
return (
560567
<div className="flex flex-col gap-2 rounded-sm border border-rose-500/50 px-3 pb-3 pt-2">
561568
<CodeBlock
562569
showCopyButton={false}
563570
showLineNumbers={false}
564-
code={error.raw}
571+
code={enhancedError.raw}
565572
maxLines={20}
566573
/>
567574
</div>
568575
);
569576
}
570577
case "BUILT_IN_ERROR":
571578
case "INTERNAL_ERROR": {
572-
const name = "name" in error ? error.name : error.code;
579+
const name = "name" in enhancedError ? enhancedError.name : enhancedError.code;
573580
return (
574581
<div className="flex flex-col gap-2 rounded-sm border border-rose-500/50 px-3 pb-3 pt-2">
575582
<Header3 className="text-rose-500">{name}</Header3>
576-
{error.message && <Callout variant="error">{error.message}</Callout>}
577-
{error.stackTrace && (
583+
{enhancedError.message && <Callout variant="error">{enhancedError.message}</Callout>}
584+
{enhancedError.link && (
585+
<Callout variant="docs" to={enhancedError.link.href}>
586+
{enhancedError.link.name}
587+
</Callout>
588+
)}
589+
{enhancedError.stackTrace && (
578590
<CodeBlock
579591
showCopyButton={false}
580592
showLineNumbers={false}
581-
code={error.stackTrace}
593+
code={enhancedError.stackTrace}
582594
maxLines={20}
583595
/>
584596
)}

apps/webapp/app/components/runs/v3/SpanEvents.tsx

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import {
2+
exceptionEventEnhancer,
23
isExceptionSpanEvent,
34
type ExceptionEventProperties,
45
type SpanEvent as OtelSpanEvent,
@@ -64,19 +65,26 @@ export function SpanEventError({
6465
spanEvent: OtelSpanEvent;
6566
exception: ExceptionEventProperties;
6667
}) {
68+
const enhancedException = exceptionEventEnhancer(exception);
69+
6770
return (
6871
<div className="flex flex-col gap-2 rounded-sm border border-rose-500/50 px-3 pb-3 pt-2">
6972
<SpanEventHeader
70-
title={exception.type ?? "Error"}
73+
title={enhancedException.type ?? "Error"}
7174
time={spanEvent.time}
7275
titleClassName="text-rose-500"
7376
/>
74-
{exception.message && <Callout variant="error">{exception.message}</Callout>}
75-
{exception.stacktrace && (
77+
{enhancedException.message && <Callout variant="error">{enhancedException.message}</Callout>}
78+
{enhancedException.link && (
79+
<Callout variant="docs" to={enhancedException.link.href}>
80+
{enhancedException.link.name}
81+
</Callout>
82+
)}
83+
{enhancedException.stacktrace && (
7684
<CodeBlock
7785
showCopyButton={false}
7886
showLineNumbers={false}
79-
code={exception.stacktrace}
87+
code={enhancedException.stacktrace}
8088
maxLines={20}
8189
/>
8290
)}

apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.v3.$projectParam.runs.$runParam.spans.$spanParam/route.tsx

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
formatDurationMilliseconds,
77
nanosecondsToMilliseconds,
88
TaskRunError,
9+
taskRunErrorEnhancer,
910
} from "@trigger.dev/core/v3";
1011
import { ReactNode, useEffect } from "react";
1112
import { typedjson, useTypedFetcher } from "remix-typedjson";
@@ -933,12 +934,14 @@ function RunTimelineLine({ title, state }: RunTimelineLineProps) {
933934
}
934935

935936
function RunError({ error }: { error: TaskRunError }) {
936-
switch (error.type) {
937+
const enhancedError = taskRunErrorEnhancer(error);
938+
939+
switch (enhancedError.type) {
937940
case "STRING_ERROR":
938941
return (
939942
<div className="flex flex-col gap-2 rounded-sm border border-rose-500/50 px-3 pb-3 pt-2">
940943
<Header3 className="text-rose-500">Error</Header3>
941-
<Callout variant="error">{error.raw}</Callout>
944+
<Callout variant="error">{enhancedError.raw}</Callout>
942945
</div>
943946
);
944947
case "CUSTOM_ERROR": {
@@ -947,24 +950,29 @@ function RunError({ error }: { error: TaskRunError }) {
947950
<CodeBlock
948951
showCopyButton={false}
949952
showLineNumbers={false}
950-
code={error.raw}
953+
code={enhancedError.raw}
951954
maxLines={20}
952955
/>
953956
</div>
954957
);
955958
}
956959
case "BUILT_IN_ERROR":
957960
case "INTERNAL_ERROR": {
958-
const name = "name" in error ? error.name : error.code;
961+
const name = "name" in enhancedError ? enhancedError.name : enhancedError.code;
959962
return (
960963
<div className="flex flex-col gap-2 rounded-sm border border-rose-500/50 px-3 pb-3 pt-2">
961964
<Header3 className="text-rose-500">{name}</Header3>
962-
{error.message && <Callout variant="error">{error.message}</Callout>}
963-
{error.stackTrace && (
965+
{enhancedError.message && <Callout variant="error">{enhancedError.message}</Callout>}
966+
{enhancedError.link && (
967+
<Callout variant="docs" to={enhancedError.link.href}>
968+
{enhancedError.link.name}
969+
</Callout>
970+
)}
971+
{enhancedError.stackTrace && (
964972
<CodeBlock
965973
showCopyButton={false}
966974
showLineNumbers={false}
967-
code={error.stackTrace}
975+
code={enhancedError.stackTrace}
968976
maxLines={20}
969977
/>
970978
)}

apps/webapp/app/v3/handleSocketIo.server.ts

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ function createCoordinatorNamespace(io: Server) {
188188
const environment = await findEnvironmentById(message.envId);
189189

190190
if (!environment) {
191-
logger.error("Environment not found", { id: message.envId });
191+
logger.error("CREATE_TASK_RUN_ATTEMPT: Environment not found", message);
192192
return { success: false, reason: "Environment not found" };
193193
}
194194

@@ -198,16 +198,14 @@ function createCoordinatorNamespace(io: Server) {
198198
const payload = await sharedQueueTasks.getExecutionPayloadFromAttempt(attempt.id, true);
199199

200200
if (!payload) {
201-
logger.error("Failed to retrieve payload after attempt creation", {
202-
id: message.envId,
203-
});
201+
logger.error("Failed to retrieve payload after attempt creation", message);
204202
return { success: false, reason: "Failed to retrieve payload" };
205203
}
206204

207205
return { success: true, executionPayload: payload };
208206
} catch (error) {
209207
logger.error("Error while creating attempt", {
210-
runId: message.runId,
208+
...message,
211209
error,
212210
});
213211
return { success: false };

apps/webapp/app/v3/marqs/sharedQueueConsumer.server.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,7 +1160,7 @@ class SharedQueueTasks {
11601160
const environment = await findEnvironmentById(envId);
11611161

11621162
if (!environment) {
1163-
logger.error("Environment not found", { id: envId });
1163+
logger.error("getLazyAttemptPayload: Environment not found", { runId, envId });
11641164
return;
11651165
}
11661166

@@ -1182,7 +1182,7 @@ class SharedQueueTasks {
11821182
});
11831183

11841184
if (!run) {
1185-
logger.error("Run not found", { id: runId, envId });
1185+
logger.error("getLazyAttemptPayload: Run not found", { runId, envId });
11861186
return;
11871187
}
11881188

apps/webapp/app/v3/services/crashTaskRun.server.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { BaseService } from "./baseService.server";
55
import { logger } from "~/services/logger.server";
66
import { AuthenticatedEnvironment } from "~/services/apiAuth.server";
77
import { CRASHABLE_ATTEMPT_STATUSES, isCrashableRunStatus } from "../taskStatus";
8-
import { sanitizeError } from "@trigger.dev/core/v3";
8+
import { sanitizeError, TaskRunInternalError } from "@trigger.dev/core/v3";
99
import { FinalizeTaskRunService } from "./finalizeTaskRun.server";
1010

1111
export type CrashTaskRunServiceOptions = {
@@ -15,6 +15,7 @@ export type CrashTaskRunServiceOptions = {
1515
crashAttempts?: boolean;
1616
crashedAt?: Date;
1717
overrideCompletion?: boolean;
18+
errorCode?: TaskRunInternalError["code"];
1819
};
1920

2021
export class CrashTaskRunService extends BaseService {
@@ -26,6 +27,8 @@ export class CrashTaskRunService extends BaseService {
2627
...options,
2728
};
2829

30+
logger.debug("CrashTaskRunService.call", { runId, opts });
31+
2932
const taskRun = await this._prisma.taskRun.findFirst({
3033
where: {
3134
id: runId,
@@ -71,7 +74,7 @@ export class CrashTaskRunService extends BaseService {
7174
attemptStatus: "FAILED",
7275
error: {
7376
type: "INTERNAL_ERROR",
74-
code: "TASK_RUN_CRASHED",
77+
code: opts.errorCode ?? "TASK_RUN_CRASHED",
7578
message: opts.reason,
7679
stackTrace: opts.logs,
7780
},
@@ -129,6 +132,7 @@ export class CrashTaskRunService extends BaseService {
129132
error: {
130133
reason: string;
131134
logs?: string;
135+
code?: TaskRunInternalError["code"];
132136
}
133137
) {
134138
return await this.traceWithEnv("failAttempt()", environment, async (span) => {
@@ -146,7 +150,7 @@ export class CrashTaskRunService extends BaseService {
146150
completedAt: failedAt,
147151
error: sanitizeError({
148152
type: "INTERNAL_ERROR",
149-
code: "TASK_RUN_CRASHED",
153+
code: error.code ?? "TASK_RUN_CRASHED",
150154
message: error.reason,
151155
stackTrace: error.logs,
152156
}),

0 commit comments

Comments
 (0)