Skip to content

Commit 1642fd7

Browse files
authored
perf: migrate to graphile worker v0.16.6 (#1097)
* migrate to graphile worker v0.16.6 * remove stale docs link * fix jobs cleanup query
1 parent c644912 commit 1642fd7

File tree

6 files changed

+298
-88
lines changed

6 files changed

+298
-88
lines changed

apps/webapp/app/entry.server.tsx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,10 @@ Worker.init().catch((error) => {
174174

175175
function logError(error: unknown, request?: Request) {
176176
console.error(error);
177+
178+
if (error instanceof Error && error.message.startsWith("There are locked jobs present")) {
179+
console.log("⚠️ graphile-worker migration issue detected!");
180+
}
177181
}
178182

179183
const sqsEventConsumer = singleton("sqsEventConsumer", getSharedSqsEventConsumer);

apps/webapp/app/platform/zodWorker.server.ts

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
11
import type {
22
CronItem,
33
CronItemOptions,
4-
Job as GraphileJob,
4+
DbJob as GraphileJob,
55
Runner as GraphileRunner,
66
JobHelpers,
77
RunnerOptions,
88
Task,
99
TaskList,
1010
TaskSpec,
11+
WorkerUtils,
1112
} from "graphile-worker";
12-
import { run as graphileRun, parseCronItems } from "graphile-worker";
13+
import { run as graphileRun, makeWorkerUtils, parseCronItems } from "graphile-worker";
1314
import { SpanKind, trace } from "@opentelemetry/api";
1415

1516
import omit from "lodash.omit";
1617
import { z } from "zod";
17-
import { PrismaClient, PrismaClientOrTransaction } from "~/db.server";
18+
import { $replica, PrismaClient, PrismaClientOrTransaction } from "~/db.server";
1819
import { PgListenService } from "~/services/db/pgListen.server";
1920
import { workerLogger as logger } from "~/services/logger.server";
2021
import { flattenAttributes } from "@trigger.dev/core/v3";
@@ -34,8 +35,8 @@ const RawCronPayloadSchema = z.object({
3435

3536
const GraphileJobSchema = z.object({
3637
id: z.coerce.string(),
37-
queue_name: z.string().nullable(),
38-
task_identifier: z.string(),
38+
job_queue_id: z.number().nullable(),
39+
task_id: z.number(),
3940
payload: z.unknown(),
4041
priority: z.number(),
4142
run_at: z.coerce.date(),
@@ -72,7 +73,7 @@ type RecurringTaskPayload = {
7273

7374
export type ZodRecurringTasks = {
7475
[key: string]: {
75-
pattern: string;
76+
match: string;
7677
options?: CronItemOptions;
7778
handler: (payload: RecurringTaskPayload, job: GraphileJob) => Promise<void>;
7879
};
@@ -129,6 +130,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
129130
#rateLimiter?: ZodWorkerRateLimiter;
130131
#shutdownTimeoutInMs?: number;
131132
#shuttingDown = false;
133+
#workerUtils?: WorkerUtils;
132134

133135
constructor(options: ZodWorkerOptions<TMessageCatalog>) {
134136
this.#name = options.name;
@@ -158,6 +160,8 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
158160

159161
const parsedCronItems = parseCronItems(this.#createCronItemsFromRecurringTasks());
160162

163+
this.#workerUtils = await makeWorkerUtils(this.#runnerOptions);
164+
161165
this.#runner = await graphileRun({
162166
...this.#runnerOptions,
163167
noHandleSignals: true,
@@ -188,7 +192,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
188192
this.#logDebug("Detected incoming migration", { latestMigration });
189193

190194
if (latestMigration > 10) {
191-
// already migrated past v0.14 - nothing to do
195+
this.#logDebug("Already migrated past v0.14 - nothing to do", { latestMigration });
192196
return;
193197
}
194198

@@ -263,6 +267,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
263267

264268
public async stop() {
265269
await this.#runner?.stop();
270+
await this.#workerUtils?.release();
266271
}
267272

268273
public async enqueue<K extends keyof TMessageCatalog>(
@@ -442,12 +447,29 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
442447
return taskList;
443448
}
444449

450+
async #getQueueName(queueId: number | null) {
451+
if (queueId === null) {
452+
return;
453+
}
454+
455+
const schema = z.array(z.object({ queue_name: z.string() }));
456+
457+
const rawQueueNameResults = await $replica.$queryRawUnsafe(
458+
`SELECT queue_name FROM ${this.graphileWorkerSchema}._private_job_queues WHERE id = $1`,
459+
queueId
460+
);
461+
462+
const queueNameResults = schema.parse(rawQueueNameResults);
463+
464+
return queueNameResults[0]?.queue_name;
465+
}
466+
445467
async #rescheduleTask(payload: unknown, helpers: JobHelpers) {
446468
this.#logDebug("Rescheduling task", { payload, job: helpers.job });
447469

448470
await this.enqueue(helpers.job.task_identifier, payload, {
449471
runAt: helpers.job.run_at,
450-
queueName: helpers.job.queue_name ?? undefined,
472+
queueName: await this.#getQueueName(helpers.job.job_queue_id),
451473
priority: helpers.job.priority,
452474
jobKey: helpers.job.key ?? undefined,
453475
flags: Object.keys(helpers.job.flags ?? []),
@@ -460,7 +482,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
460482

461483
if (this.#cleanup) {
462484
cronItems.push({
463-
pattern: this.#cleanup.frequencyExpression,
485+
match: this.#cleanup.frequencyExpression,
464486
identifier: CLEANUP_TASK_NAME,
465487
task: CLEANUP_TASK_NAME,
466488
options: this.#cleanup.taskOptions,
@@ -469,7 +491,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
469491

470492
if (this.#reporter) {
471493
cronItems.push({
472-
pattern: "50 * * * *", // Every hour at 50 minutes past the hour
494+
match: "50 * * * *", // Every hour at 50 minutes past the hour
473495
identifier: REPORTER_TASK_NAME,
474496
task: REPORTER_TASK_NAME,
475497
});
@@ -481,7 +503,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
481503

482504
for (const [key, task] of Object.entries(this.#recurringTasks)) {
483505
const cronItem: CronItem = {
484-
pattern: task.pattern,
506+
match: task.match,
485507
identifier: key,
486508
task: key,
487509
options: task.options,
@@ -529,7 +551,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
529551
attributes: {
530552
"job.task_identifier": job.task_identifier,
531553
"job.id": job.id,
532-
...(job.queue_name ? { "job.queue_name": job.queue_name } : {}),
554+
...(job.job_queue_id ? { "job.queue_id": job.job_queue_id } : {}),
533555
...flattenAttributes(job.payload as Record<string, unknown>, "job.payload"),
534556
"job.priority": job.priority,
535557
"job.run_at": job.run_at.toISOString(),
@@ -599,7 +621,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
599621
attributes: {
600622
"job.task_identifier": job.task_identifier,
601623
"job.id": job.id,
602-
...(job.queue_name ? { "job.queue_name": job.queue_name } : {}),
624+
...(job.job_queue_id ? { "job.queue_id": job.job_queue_id } : {}),
603625
...flattenAttributes(job.payload as Record<string, unknown>, "job.payload"),
604626
"job.priority": job.priority,
605627
"job.run_at": job.run_at.toISOString(),
@@ -638,6 +660,10 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
638660
return;
639661
}
640662

663+
if (!this.#workerUtils) {
664+
throw new Error("WorkerUtils need to be initialized before running job cleanup.");
665+
}
666+
641667
const job = helpers.job;
642668

643669
logger.debug("Received cleanup task", {
@@ -663,23 +689,38 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
663689
payload,
664690
});
665691

666-
const rawResults = await this.#prisma.$queryRawUnsafe(
667-
`WITH rows AS (SELECT id FROM ${this.graphileWorkerSchema}.jobs WHERE run_at < $1 AND locked_at IS NULL AND max_attempts = attempts LIMIT $2 FOR UPDATE) DELETE FROM ${this.graphileWorkerSchema}.jobs WHERE id IN (SELECT id FROM rows) RETURNING id`,
692+
const rawResults = await $replica.$queryRawUnsafe(
693+
`SELECT id
694+
FROM ${this.graphileWorkerSchema}.jobs
695+
WHERE run_at < $1
696+
AND locked_at IS NULL
697+
AND max_attempts = attempts
698+
LIMIT $2`,
668699
expirationDate,
669700
this.#cleanup.maxCount
670701
);
671702

672-
const results = Array.isArray(rawResults) ? rawResults : [];
703+
const results = z
704+
.array(
705+
z.object({
706+
id: z.coerce.string(),
707+
})
708+
)
709+
.parse(rawResults);
710+
711+
const completedJobs = await this.#workerUtils.completeJobs(results.map((job) => job.id));
673712

674713
logger.debug("Cleaned up old jobs", {
675-
count: results.length,
714+
found: results.length,
715+
deleted: completedJobs.length,
676716
expirationDate,
677717
payload,
678718
});
679719

680720
if (this.#reporter) {
681721
await this.#reporter("cleanup_stats", {
682-
count: results.length,
722+
found: results.length,
723+
deleted: completedJobs.length,
683724
expirationDate,
684725
ts: payload._cron.ts,
685726
});
@@ -711,7 +752,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
711752
const schema = z.array(z.object({ count: z.coerce.number() }));
712753

713754
// Count the number of jobs that have been added since the startAt date and before the payload._cron.ts date
714-
const rawAddedResults = await this.#prisma.$queryRawUnsafe(
755+
const rawAddedResults = await $replica.$queryRawUnsafe(
715756
`SELECT COUNT(*) FROM ${this.graphileWorkerSchema}.jobs WHERE created_at > $1 AND created_at < $2`,
716757
startAt,
717758
payload._cron.ts
@@ -720,7 +761,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
720761
const addedCountResults = schema.parse(rawAddedResults)[0];
721762

722763
// Count the total number of jobs in the jobs table
723-
const rawTotalResults = await this.#prisma.$queryRawUnsafe(
764+
const rawTotalResults = await $replica.$queryRawUnsafe(
724765
`SELECT COUNT(*) FROM ${this.graphileWorkerSchema}.jobs`
725766
);
726767

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import { runMigrations } from "graphile-worker";
2+
import { PrismaClient, prisma } from "~/db.server";
3+
import { env } from "~/env.server";
4+
import { logger } from "~/services/logger.server";
5+
import { PgNotifyService } from "./pgNotify.server";
6+
import { z } from "zod";
7+
8+
export class GraphileMigrationHelperService {
9+
#prismaClient: PrismaClient;
10+
11+
constructor(prismaClient: PrismaClient = prisma) {
12+
this.#prismaClient = prismaClient;
13+
}
14+
15+
public async call() {
16+
this.#logDebug("GraphileMigrationHelperService.call");
17+
18+
await this.#detectAndPrepareForMigrations();
19+
20+
await runMigrations({
21+
connectionString: env.DATABASE_URL,
22+
schema: env.WORKER_SCHEMA,
23+
});
24+
}
25+
26+
#logDebug(message: string, args?: any) {
27+
logger.debug(`[migrationHelper] ${message}`, args);
28+
}
29+
30+
async #getLatestMigration() {
31+
const migrationQueryResult = await this.#prismaClient.$queryRawUnsafe(`
32+
SELECT id FROM ${env.WORKER_SCHEMA}.migrations
33+
ORDER BY id DESC LIMIT 1
34+
`);
35+
36+
const MigrationQueryResultSchema = z.array(z.object({ id: z.number() }));
37+
38+
const migrationResults = MigrationQueryResultSchema.parse(migrationQueryResult);
39+
40+
if (!migrationResults.length) {
41+
// no migrations applied yet
42+
return -1;
43+
}
44+
45+
return migrationResults[0].id;
46+
}
47+
48+
async #graphileSchemaExists() {
49+
const schemaCount = await this.#prismaClient.$executeRaw`
50+
SELECT schema_name FROM information_schema.schemata
51+
WHERE schema_name = ${env.WORKER_SCHEMA}
52+
`;
53+
54+
return schemaCount === 1;
55+
}
56+
57+
/** Helper for graphile-worker v0.14.0 migration. No-op if already migrated. */
58+
async #detectAndPrepareForMigrations() {
59+
if (!(await this.#graphileSchemaExists())) {
60+
// no schema yet, likely first start
61+
return;
62+
}
63+
64+
const latestMigration = await this.#getLatestMigration();
65+
66+
if (latestMigration < 0) {
67+
// no migrations found
68+
return;
69+
}
70+
71+
// the first v0.14.0 migration has ID 11
72+
if (latestMigration > 10) {
73+
// already migrated
74+
return;
75+
}
76+
77+
// add 15s to graceful shutdown timeout, just to be safe
78+
const migrationDelayInMs = env.GRACEFUL_SHUTDOWN_TIMEOUT + 15000;
79+
80+
this.#logDebug("Delaying worker startup due to pending migration", {
81+
latestMigration,
82+
migrationDelayInMs,
83+
});
84+
85+
console.log(`⚠️ detected pending graphile migration`);
86+
console.log(`⚠️ notifying running workers`);
87+
88+
const pgNotify = new PgNotifyService();
89+
await pgNotify.call("trigger:graphile:migrate", { latestMigration });
90+
91+
console.log(`⚠️ delaying worker startup by ${migrationDelayInMs}ms`);
92+
93+
await new Promise((resolve) => setTimeout(resolve, migrationDelayInMs));
94+
}
95+
}

apps/webapp/app/services/worker.server.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import { TriggerScheduledTaskService } from "~/v3/services/triggerScheduledTask.
4040
import { PerformTaskAttemptAlertsService } from "~/v3/services/alerts/performTaskAttemptAlerts.server";
4141
import { DeliverAlertService } from "~/v3/services/alerts/deliverAlert.server";
4242
import { PerformDeploymentAlertsService } from "~/v3/services/alerts/performDeploymentAlerts.server";
43+
import { GraphileMigrationHelperService } from "./db/graphileMigrationHelper.server";
4344

4445
const workerCatalog = {
4546
indexEndpoint: z.object({
@@ -211,9 +212,8 @@ if (env.NODE_ENV === "production") {
211212
}
212213

213214
export async function init() {
214-
// const pgNotify = new PgNotifyService();
215-
// await pgNotify.call("trigger:graphile:migrate", { latestMigration: 10 });
216-
// await new Promise((resolve) => setTimeout(resolve, 10000))
215+
const migrationHelper = new GraphileMigrationHelperService();
216+
await migrationHelper.call();
217217

218218
if (env.WORKER_ENABLED === "true") {
219219
await workerQueue.initialize();
@@ -250,7 +250,7 @@ function getWorkerQueue() {
250250
recurringTasks: {
251251
// Run this every 5 minutes
252252
autoIndexProductionEndpoints: {
253-
pattern: "*/5 * * * *",
253+
match: "*/5 * * * *",
254254
handler: async (payload, job) => {
255255
const service = new RecurringEndpointIndexService();
256256

@@ -259,7 +259,7 @@ function getWorkerQueue() {
259259
},
260260
// Run this every hour
261261
purgeOldIndexings: {
262-
pattern: "0 * * * *",
262+
match: "0 * * * *",
263263
handler: async (payload, job) => {
264264
// Delete indexings that are older than 7 days
265265
await prisma.endpointIndex.deleteMany({
@@ -273,7 +273,7 @@ function getWorkerQueue() {
273273
},
274274
// Run this every hour at the 13 minute mark
275275
purgeOldTaskEvents: {
276-
pattern: "47 * * * *",
276+
match: "47 * * * *",
277277
handler: async (payload, job) => {
278278
await eventRepository.truncateEvents();
279279
},

apps/webapp/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@
116116
"evt": "^2.4.13",
117117
"express": "^4.18.1",
118118
"framer-motion": "^10.12.11",
119-
"graphile-worker": "^0.13.0",
119+
"graphile-worker": "0.16.6",
120120
"highlight.run": "^7.3.4",
121121
"humanize-duration": "^3.27.3",
122122
"intl-parse-accept-language": "^1.0.0",

0 commit comments

Comments
 (0)