Skip to content

Commit e7f3890

Browse files
authored
Merge branch 'main' into patch-1
2 parents f82e920 + 7652d8e commit e7f3890

File tree

76 files changed

+1696
-384
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+1696
-384
lines changed

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ jobs:
6969
commit: "chore: Update version for release"
7070
title: "chore: Update version for release"
7171
publish: pnpm run changeset:release
72-
createGithubReleases: false
72+
createGithubReleases: true
7373
env:
7474
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
7575
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}

apps/coordinator/src/index.ts

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,49 @@ class TaskCoordinator {
162162

163163
taskSocket.emit("RESUME_AFTER_DEPENDENCY", message);
164164
},
165+
RESUME_AFTER_DEPENDENCY_WITH_ACK: async (message) => {
166+
const taskSocket = await this.#getAttemptSocket(message.attemptFriendlyId);
167+
168+
if (!taskSocket) {
169+
logger.log("Socket for attempt not found", {
170+
attemptFriendlyId: message.attemptFriendlyId,
171+
});
172+
return {
173+
success: false,
174+
error: {
175+
name: "SocketNotFoundError",
176+
message: "Socket for attempt not found",
177+
},
178+
};
179+
}
180+
181+
//if this is set, we want to kill the process because it will be resumed with the checkpoint from the queue
182+
if (taskSocket.data.requiresCheckpointResumeWithMessage) {
183+
logger.log("RESUME_AFTER_DEPENDENCY_WITH_ACK: Checkpoint is set so going to nack", {
184+
socketData: taskSocket.data,
185+
});
186+
187+
return {
188+
success: false,
189+
error: {
190+
name: "CheckpointMessagePresentError",
191+
message:
192+
"Checkpoint message is present, so we need to kill the process and resume from the queue.",
193+
},
194+
};
195+
}
196+
197+
await chaosMonkey.call();
198+
199+
// In case the task resumed faster than we could checkpoint
200+
this.#cancelCheckpoint(message.runId);
201+
202+
taskSocket.emit("RESUME_AFTER_DEPENDENCY", message);
203+
204+
return {
205+
success: true,
206+
};
207+
},
165208
RESUME_AFTER_DURATION: async (message) => {
166209
const taskSocket = await this.#getAttemptSocket(message.attemptFriendlyId);
167210

@@ -792,6 +835,18 @@ class TaskCoordinator {
792835
return;
793836
}
794837

838+
logger.log("WAIT_FOR_TASK checkpoint created", {
839+
checkpoint,
840+
socketData: socket.data,
841+
});
842+
843+
//setting this means we can only resume from a checkpoint
844+
socket.data.requiresCheckpointResumeWithMessage = `location:${checkpoint.location}-docker:${checkpoint.docker}`;
845+
logger.log("WAIT_FOR_TASK set requiresCheckpointResumeWithMessage", {
846+
checkpoint,
847+
socketData: socket.data,
848+
});
849+
795850
const ack = await this.#platformSocket?.sendWithAck("CHECKPOINT_CREATED", {
796851
version: "v1",
797852
attemptFriendlyId: message.attemptFriendlyId,
@@ -804,6 +859,7 @@ class TaskCoordinator {
804859
});
805860

806861
if (ack?.keepRunAlive) {
862+
socket.data.requiresCheckpointResumeWithMessage = undefined;
807863
logger.log("keeping run alive after task checkpoint", { runId: socket.data.runId });
808864
return;
809865
}
@@ -862,6 +918,18 @@ class TaskCoordinator {
862918
return;
863919
}
864920

921+
logger.log("WAIT_FOR_BATCH checkpoint created", {
922+
checkpoint,
923+
socketData: socket.data,
924+
});
925+
926+
//setting this means we can only resume from a checkpoint
927+
socket.data.requiresCheckpointResumeWithMessage = `location:${checkpoint.location}-docker:${checkpoint.docker}`;
928+
logger.log("WAIT_FOR_BATCH set checkpoint", {
929+
checkpoint,
930+
socketData: socket.data,
931+
});
932+
865933
const ack = await this.#platformSocket?.sendWithAck("CHECKPOINT_CREATED", {
866934
version: "v1",
867935
attemptFriendlyId: message.attemptFriendlyId,
@@ -875,6 +943,7 @@ class TaskCoordinator {
875943
});
876944

877945
if (ack?.keepRunAlive) {
946+
socket.data.requiresCheckpointResumeWithMessage = undefined;
878947
logger.log("keeping run alive after batch checkpoint", { runId: socket.data.runId });
879948
return;
880949
}

apps/webapp/app/env.server.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,11 @@ const EnvironmentSchema = z.object({
177177

178178
LOOPS_API_KEY: z.string().optional(),
179179
MARQS_DISABLE_REBALANCING: z.coerce.boolean().default(false),
180+
MARQS_VISIBILITY_TIMEOUT_MS: z.coerce
181+
.number()
182+
.int()
183+
.default(60 * 1000 * 15),
184+
PROD_TASK_HEARTBEAT_INTERVAL_MS: z.coerce.number().int().optional(),
180185

181186
VERBOSE_GRAPHILE_LOGGING: z.string().default("false"),
182187
V2_MARQS_ENABLED: z.string().default("0"),

apps/webapp/app/hooks/useSearchParam.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ export function useSearchParams() {
1818
}
1919

2020
if (typeof value === "string") {
21-
search.set(param, encodeURIComponent(value));
21+
search.set(param, value);
2222
continue;
2323
}
2424

2525
search.delete(param);
2626
for (const v of value) {
27-
search.append(param, encodeURIComponent(v));
27+
search.append(param, v);
2828
}
2929
}
3030
},

apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.v3.$projectParam.runs.electric.$runParam/route.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1155,7 +1155,7 @@ function ConnectedDevWarning() {
11551155
<Paragraph variant="small">
11561156
Runs usually start within 2 seconds in{" "}
11571157
<EnvironmentLabel environment={{ type: "DEVELOPMENT" }} />. Check you're running the
1158-
CLI: <InlineCode className="whitespace-nowrap">npx trigger.dev@beta dev</InlineCode>
1158+
CLI: <InlineCode className="whitespace-nowrap">npx trigger.dev@latest dev</InlineCode>
11591159
</Paragraph>
11601160
</div>
11611161
</Callout>

apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,15 @@ async function resolveBuiltInProdVariables(runtimeEnvironment: RuntimeEnvironmen
798798
]);
799799
}
800800

801+
if (env.PROD_TASK_HEARTBEAT_INTERVAL_MS) {
802+
result = result.concat([
803+
{
804+
key: "HEARTBEAT_INTERVAL_MS",
805+
value: String(env.PROD_TASK_HEARTBEAT_INTERVAL_MS),
806+
},
807+
]);
808+
}
809+
801810
const commonVariables = await resolveCommonBuiltInVariables(runtimeEnvironment);
802811

803812
return [...result, ...commonVariables];

apps/webapp/app/v3/marqs/devQueueConsumer.server.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,8 @@ export class DevQueueConsumer {
162162
/**
163163
* @deprecated Use `taskRunHeartbeat` instead
164164
*/
165-
public async taskHeartbeat(workerId: string, id: string, seconds: number = 60) {
166-
logger.debug("[DevQueueConsumer] taskHeartbeat()", { id, seconds });
165+
public async taskHeartbeat(workerId: string, id: string) {
166+
logger.debug("[DevQueueConsumer] taskHeartbeat()", { id });
167167

168168
const taskRunAttempt = await prisma.taskRunAttempt.findUnique({
169169
where: { friendlyId: id },
@@ -173,13 +173,13 @@ export class DevQueueConsumer {
173173
return;
174174
}
175175

176-
await marqs?.heartbeatMessage(taskRunAttempt.taskRunId, seconds);
176+
await marqs?.heartbeatMessage(taskRunAttempt.taskRunId);
177177
}
178178

179-
public async taskRunHeartbeat(workerId: string, id: string, seconds: number = 60) {
180-
logger.debug("[DevQueueConsumer] taskRunHeartbeat()", { id, seconds });
179+
public async taskRunHeartbeat(workerId: string, id: string) {
180+
logger.debug("[DevQueueConsumer] taskRunHeartbeat()", { id });
181181

182-
await marqs?.heartbeatMessage(id, seconds);
182+
await marqs?.heartbeatMessage(id);
183183
}
184184

185185
public async stop(reason: string = "CLI disconnected") {

apps/webapp/app/v3/marqs/index.server.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -698,8 +698,8 @@ export class MarQS {
698698
}
699699

700700
// This should increment by the number of seconds, but with a max value of Date.now() + visibilityTimeoutInMs
701-
public async heartbeatMessage(messageId: string, seconds: number = 30) {
702-
await this.options.visibilityTimeoutStrategy.heartbeat(messageId, seconds * 1000);
701+
public async heartbeatMessage(messageId: string) {
702+
await this.options.visibilityTimeoutStrategy.heartbeat(messageId, this.visibilityTimeoutInMs);
703703
}
704704

705705
get visibilityTimeoutInMs() {
@@ -1871,7 +1871,7 @@ function getMarQSClient() {
18711871
redis: redisOptions,
18721872
defaultEnvConcurrency: env.DEFAULT_ENV_EXECUTION_CONCURRENCY_LIMIT,
18731873
defaultOrgConcurrency: env.DEFAULT_ORG_EXECUTION_CONCURRENCY_LIMIT,
1874-
visibilityTimeoutInMs: 120 * 1000, // 2 minutes,
1874+
visibilityTimeoutInMs: env.MARQS_VISIBILITY_TIMEOUT_MS,
18751875
enableRebalancing: !env.MARQS_DISABLE_REBALANCING,
18761876
subscriber: concurrencyTracker,
18771877
});

apps/webapp/app/v3/marqs/sharedQueueConsumer.server.ts

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -725,20 +725,47 @@ export class SharedQueueConsumer {
725725
}
726726

727727
try {
728-
logger.debug("Broadcasting RESUME_AFTER_DEPENDENCY", {
729-
runId: resumableAttempt.taskRunId,
730-
attemptId: resumableAttempt.id,
731-
});
732-
733-
// The attempt should still be running so we can broadcast to all coordinators to resume immediately
734-
socketIo.coordinatorNamespace.emit("RESUME_AFTER_DEPENDENCY", {
735-
version: "v1",
728+
const resumeMessage = {
729+
version: "v1" as const,
736730
runId: resumableAttempt.taskRunId,
737731
attemptId: resumableAttempt.id,
738732
attemptFriendlyId: resumableAttempt.friendlyId,
739733
completions,
740734
executions,
735+
};
736+
737+
logger.debug("Broadcasting RESUME_AFTER_DEPENDENCY_WITH_ACK", { resumeMessage, message });
738+
739+
// The attempt should still be running so we can broadcast to all coordinators to resume immediately
740+
const responses = await socketIo.coordinatorNamespace
741+
.timeout(10_000)
742+
.emitWithAck("RESUME_AFTER_DEPENDENCY_WITH_ACK", resumeMessage);
743+
744+
logger.debug("RESUME_AFTER_DEPENDENCY_WITH_ACK received", {
745+
resumeMessage,
746+
responses,
747+
message,
741748
});
749+
750+
if (responses.length === 0) {
751+
logger.error("RESUME_AFTER_DEPENDENCY_WITH_ACK no response", {
752+
resumeMessage,
753+
message,
754+
});
755+
await this.#nackAndDoMoreWork(message.messageId, this._options.nextTickInterval, 5_000);
756+
return;
757+
}
758+
759+
const hasSuccess = responses.some((response) => response.success);
760+
if (!hasSuccess) {
761+
logger.warn("RESUME_AFTER_DEPENDENCY_WITH_ACK failed", {
762+
resumeMessage,
763+
responses,
764+
message,
765+
});
766+
await this.#nackAndDoMoreWork(message.messageId, this._options.nextTickInterval, 5_000);
767+
return;
768+
}
742769
} catch (e) {
743770
if (e instanceof Error) {
744771
this._currentSpan?.recordException(e);
@@ -748,7 +775,12 @@ export class SharedQueueConsumer {
748775

749776
this._endSpanInNextIteration = true;
750777

751-
await this.#nackAndDoMoreWork(message.messageId);
778+
logger.error("RESUME_AFTER_DEPENDENCY_WITH_ACK threw, nacking with delay", {
779+
message,
780+
error: e,
781+
});
782+
783+
await this.#nackAndDoMoreWork(message.messageId, this._options.nextTickInterval, 5_000);
752784
return;
753785
}
754786

@@ -1169,8 +1201,8 @@ class SharedQueueTasks {
11691201
} satisfies TaskRunExecutionLazyAttemptPayload;
11701202
}
11711203

1172-
async taskHeartbeat(attemptFriendlyId: string, seconds: number = 60) {
1173-
logger.debug("[SharedQueueConsumer] taskHeartbeat()", { id: attemptFriendlyId, seconds });
1204+
async taskHeartbeat(attemptFriendlyId: string) {
1205+
logger.debug("[SharedQueueConsumer] taskHeartbeat()", { id: attemptFriendlyId });
11741206

11751207
const taskRunAttempt = await prisma.taskRunAttempt.findUnique({
11761208
where: { friendlyId: attemptFriendlyId },
@@ -1180,13 +1212,13 @@ class SharedQueueTasks {
11801212
return;
11811213
}
11821214

1183-
await marqs?.heartbeatMessage(taskRunAttempt.taskRunId, seconds);
1215+
await marqs?.heartbeatMessage(taskRunAttempt.taskRunId);
11841216
}
11851217

1186-
async taskRunHeartbeat(runId: string, seconds: number = 60) {
1187-
logger.debug("[SharedQueueConsumer] taskRunHeartbeat()", { runId, seconds });
1218+
async taskRunHeartbeat(runId: string) {
1219+
logger.debug("[SharedQueueConsumer] taskRunHeartbeat()", { runId });
11881220

1189-
await marqs?.heartbeatMessage(runId, seconds);
1221+
await marqs?.heartbeatMessage(runId);
11901222
}
11911223

11921224
public async taskRunFailed(completion: TaskRunFailedExecutionResult) {

apps/webapp/app/v3/services/resumeBatchRun.server.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,9 @@ export class ResumeBatchRunService extends BaseService {
132132
if (wasUpdated) {
133133
logger.debug("ResumeBatchRunService: Resuming dependent run without checkpoint", {
134134
batchRunId: batchRun.id,
135-
dependentTaskAttemptId: batchRun.dependentTaskAttempt.id,
135+
dependentTaskAttempt: batchRun.dependentTaskAttempt,
136+
checkpointEventId: batchRun.checkpointEventId,
137+
hasCheckpointEvent: !!batchRun.checkpointEventId,
136138
});
137139
await marqs?.replaceMessage(dependentRun.id, {
138140
type: "RESUME",

docs/cli-deploy-commands.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
title: "CLI deploy options"
3-
sidebarTitle: "Deploy"
3+
sidebarTitle: "deploy"
44
description: "Use these options to help deploy your tasks to Trigger.dev."
55
---
66

docs/cli-deploy.mdx

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@ title: "CLI deploy command"
33
description: "The `trigger.dev deploy` command can be used to manually deploy."
44
---
55

6-
import ComingSoon from '/snippets/coming-soon-generic.mdx';
76
import CliDeployCommands from '/snippets/cli-commands-deploy.mdx';
87

98
<CliDeployCommands/>
10-
11-
{/* todo add options, remove the reference docs */}

docs/cli-dev-commands.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
title: "CLI dev command"
3-
sidebarTitle: "Dev"
3+
sidebarTitle: "dev"
44
description: "The `trigger.dev dev` command is used to run your tasks locally."
55
---
66

docs/cli-init-commands.mdx

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
---
22
title: "CLI init command"
3-
sidebarTitle: "Init"
3+
sidebarTitle: "init"
44
description: "Use these options when running the CLI `init` command."
55
---
66

7+
import CommonOptions from '/snippets/cli-options-common.mdx';
8+
79
Run the command like this:
810

911
<CodeGroup>
@@ -49,25 +51,8 @@ yarn dlx trigger.dev@latest init
4951
Additional arguments to pass to the package manager. Accepts CSV for multiple args.
5052
</ParamField>
5153

52-
<ParamField body="Login profile" type="--profile">
53-
The login profile to use. Defaults to "default".
54-
</ParamField>
54+
### Common options
5555

56-
<ParamField body="API URL" type="--api-url | -a">
57-
Override the default API URL. If not specified, it uses `https://api.trigger.dev`.
58-
</ParamField>
59-
60-
<ParamField body="Log level" type="--log-level | -l">
61-
The CLI log level to use. Options are `debug`, `info`, `log`, `warn`, `error`, and `none`. This
62-
does not affect the log level of your trigger.dev tasks. Defaults to "log".
63-
</ParamField>
56+
These options are available on most commands.
6457

65-
<ParamField body="Skip telemetry" type="--skip-telemetry">
66-
Opt-out of sending telemetry data.
67-
</ParamField>
68-
69-
## Standard options
70-
71-
<ParamField body="Help" type="--help | -h">
72-
Shows the help information for the command.
73-
</ParamField>
58+
<CommonOptions />

0 commit comments

Comments
 (0)