Skip to content

Commit 8e10c72

Browse files
committed
add missing prod timeline events
1 parent 4ccd4b8 commit 8e10c72

File tree

4 files changed

+59
-16
lines changed

4 files changed

+59
-16
lines changed

apps/supervisor/src/workloadManager/docker.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ export class DockerWorkloadManager implements WorkloadManager {
2828
"run",
2929
"--detach",
3030
`--network=${env.DOCKER_NETWORK}`,
31+
`--env=TRIGGER_POD_SCHEDULED_AT_MS=${Date.now()}`,
3132
`--env=TRIGGER_ENV_ID=${opts.envId}`,
3233
`--env=TRIGGER_RUN_ID=${opts.runFriendlyId}`,
3334
`--env=TRIGGER_SNAPSHOT_ID=${opts.snapshotFriendlyId}`,

apps/supervisor/src/workloadManager/kubernetes.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ export class KubernetesWorkloadManager implements WorkloadManager {
6161
],
6262
resources: this.#getResourcesForMachine(opts.machine),
6363
env: [
64+
{
65+
name: "TRIGGER_POD_SCHEDULED_AT_MS",
66+
value: Date.now().toString(),
67+
},
6468
{
6569
name: "TRIGGER_RUN_ID",
6670
value: opts.runFriendlyId,

packages/cli-v3/src/entryPoints/dev-run-controller.ts

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -516,8 +516,6 @@ export class DevRunController {
516516
snapshot: snapshot.friendlyId,
517517
});
518518

519-
// TODO: We may already be executing this run, this may be a new attempt
520-
// This is the only case where incrementing the attempt number is allowed
521519
this.enterRunPhase(run, snapshot);
522520

523521
const metrics = [
@@ -543,9 +541,6 @@ export class DevRunController {
543541
try {
544542
return await this.executeRun({ run, snapshot, execution, envVars, metrics });
545543
} catch (error) {
546-
// TODO: Handle the case where we're in the warm start phase or executing a new run
547-
// This can happen if we kill the run while it's still executing, e.g. after receiving an attempt number mismatch
548-
549544
logger.debug("Error while executing attempt", {
550545
error,
551546
});
@@ -574,8 +569,6 @@ export class DevRunController {
574569
error: completionResult.error,
575570
});
576571

577-
// TODO: Maybe we should keep retrying for a while longer
578-
579572
this.runFinished();
580573
return;
581574
}

packages/cli-v3/src/entryPoints/managed-run-controller.ts

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
type CompleteRunAttemptResult,
99
HeartbeatService,
1010
type RunExecutionData,
11+
type TaskRunExecutionMetrics,
1112
type TaskRunExecutionResult,
1213
type TaskRunFailedExecutionResult,
1314
WorkerManifest,
@@ -50,6 +51,9 @@ const Env = z.object({
5051
TRIGGER_RUNNER_ID: z.string(),
5152
TRIGGER_METADATA_URL: z.string().optional(),
5253

54+
// Timeline metrics
55+
TRIGGER_POD_SCHEDULED_AT_MS: z.coerce.date(),
56+
5357
// May be overridden
5458
TRIGGER_SUPERVISOR_API_PROTOCOL: z.enum(["http", "https"]),
5559
TRIGGER_SUPERVISOR_API_DOMAIN: z.string(),
@@ -734,10 +738,14 @@ class ManagedRunController {
734738
private async startAndExecuteRunAttempt({
735739
runFriendlyId,
736740
snapshotFriendlyId,
741+
dequeuedAt,
742+
podScheduledAt,
737743
isWarmStart = false,
738744
}: {
739745
runFriendlyId: string;
740746
snapshotFriendlyId: string;
747+
dequeuedAt?: Date;
748+
podScheduledAt?: Date;
741749
isWarmStart?: boolean;
742750
}) {
743751
if (!this.socket) {
@@ -749,6 +757,8 @@ class ManagedRunController {
749757
snapshot: { friendlyId: snapshotFriendlyId },
750758
});
751759

760+
const attemptStartedAt = Date.now();
761+
752762
const start = await this.httpClient.startRunAttempt(runFriendlyId, snapshotFriendlyId, {
753763
isWarmStart,
754764
});
@@ -760,28 +770,58 @@ class ManagedRunController {
760770
return;
761771
}
762772

773+
const attemptDuration = Date.now() - attemptStartedAt;
774+
763775
const { run, snapshot, execution, envVars } = start.data;
764776

765777
logger.debug("[ManagedRunController] Started run", {
766778
runId: run.friendlyId,
767779
snapshot: snapshot.friendlyId,
768780
});
769781

770-
// TODO: We may already be executing this run, this may be a new attempt
771-
// This is the only case where incrementing the attempt number is allowed
772782
this.enterRunPhase(run, snapshot);
773783

784+
const metrics = [
785+
{
786+
name: "start",
787+
event: "create_attempt",
788+
timestamp: attemptStartedAt,
789+
duration: attemptDuration,
790+
},
791+
]
792+
.concat(
793+
dequeuedAt
794+
? [
795+
{
796+
name: "start",
797+
event: "dequeue",
798+
timestamp: dequeuedAt.getTime(),
799+
duration: 0,
800+
},
801+
]
802+
: []
803+
)
804+
.concat(
805+
podScheduledAt
806+
? [
807+
{
808+
name: "start",
809+
event: "pod_scheduled",
810+
timestamp: podScheduledAt.getTime(),
811+
duration: 0,
812+
},
813+
]
814+
: []
815+
) satisfies TaskRunExecutionMetrics;
816+
774817
const taskRunEnv = {
775818
...gatherProcessEnv(),
776819
...envVars,
777820
};
778821

779822
try {
780-
return await this.executeRun({ run, snapshot, envVars: taskRunEnv, execution });
823+
return await this.executeRun({ run, snapshot, envVars: taskRunEnv, execution, metrics });
781824
} catch (error) {
782-
// TODO: Handle the case where we're in the warm start phase or executing a new run
783-
// This can happen if we kill the run while it's still executing, e.g. after receiving an attempt number mismatch
784-
785825
console.error("Error while executing attempt", {
786826
error,
787827
});
@@ -810,8 +850,6 @@ class ManagedRunController {
810850
error: completionResult.error,
811851
});
812852

813-
// TODO: Maybe we should keep retrying for a while longer
814-
815853
this.waitForNextRun();
816854
return;
817855
}
@@ -923,6 +961,7 @@ class ManagedRunController {
923961
this.startAndExecuteRunAttempt({
924962
runFriendlyId: nextRun.run.friendlyId,
925963
snapshotFriendlyId: nextRun.snapshot.friendlyId,
964+
dequeuedAt: nextRun.dequeuedAt,
926965
isWarmStart: true,
927966
}).finally(() => {});
928967
return;
@@ -1032,7 +1071,10 @@ class ManagedRunController {
10321071
snapshot,
10331072
envVars,
10341073
execution,
1035-
}: WorkloadRunAttemptStartResponseBody) {
1074+
metrics,
1075+
}: WorkloadRunAttemptStartResponseBody & {
1076+
metrics?: TaskRunExecutionMetrics;
1077+
}) {
10361078
this.snapshotPoller.start();
10371079

10381080
if (!this.taskRunProcess || !this.taskRunProcess.isPreparedForNextRun) {
@@ -1058,6 +1100,7 @@ class ManagedRunController {
10581100
payload: {
10591101
execution,
10601102
traceContext: execution.run.traceContext ?? {},
1103+
metrics,
10611104
},
10621105
messageId: run.friendlyId,
10631106
env: envVars,
@@ -1212,6 +1255,8 @@ class ManagedRunController {
12121255
this.startAndExecuteRunAttempt({
12131256
runFriendlyId: env.TRIGGER_RUN_ID,
12141257
snapshotFriendlyId: env.TRIGGER_SNAPSHOT_ID,
1258+
dequeuedAt: new Date(),
1259+
podScheduledAt: env.TRIGGER_POD_SCHEDULED_AT_MS,
12151260
}).finally(() => {});
12161261
return;
12171262
}

0 commit comments

Comments
 (0)