@@ -9,6 +9,7 @@ import { nanoid } from "nanoid";
9
9
import pLimit from "p-limit" ;
10
10
import { createRedisClient } from "@internal/redis" ;
11
11
import { shutdownManager } from "@trigger.dev/core/v3/serverOnly" ;
12
+ import { Registry , Histogram } from "prom-client" ;
12
13
13
14
export type WorkerCatalog = {
14
15
[ key : string ] : {
@@ -48,6 +49,9 @@ type WorkerOptions<TCatalog extends WorkerCatalog> = {
48
49
shutdownTimeoutMs ?: number ;
49
50
logger ?: Logger ;
50
51
tracer ?: Tracer ;
52
+ metrics ?: {
53
+ register : Registry ;
54
+ } ;
51
55
} ;
52
56
53
57
// This results in attempt 12 being a delay of 1 hour
@@ -65,6 +69,16 @@ class Worker<TCatalog extends WorkerCatalog> {
65
69
private subscriber : Redis | undefined ;
66
70
private tracer : Tracer ;
67
71
72
+ private metrics : {
73
+ register ?: Registry ;
74
+ enqueueDuration ?: Histogram ;
75
+ dequeueDuration ?: Histogram ;
76
+ jobDuration ?: Histogram ;
77
+ ackDuration ?: Histogram ;
78
+ redriveDuration ?: Histogram ;
79
+ rescheduleDuration ?: Histogram ;
80
+ } = { } ;
81
+
68
82
queue : SimpleQueue < QueueCatalogFromWorkerCatalog < TCatalog > > ;
69
83
private jobs : WorkerOptions < TCatalog > [ "jobs" ] ;
70
84
private logger : Logger ;
@@ -100,6 +114,61 @@ class Worker<TCatalog extends WorkerCatalog> {
100
114
101
115
// Create a p-limit instance using this limit.
102
116
this . limiter = pLimit ( this . concurrency . limit ) ;
117
+
118
+ this . metrics . register = options . metrics ?. register ;
119
+
120
+ if ( ! this . metrics . register ) {
121
+ return ;
122
+ }
123
+
124
+ this . metrics . enqueueDuration = new Histogram ( {
125
+ name : "redis_worker_enqueue_duration_seconds" ,
126
+ help : "The duration of enqueue operations" ,
127
+ labelNames : [ "worker_name" , "job_type" , "has_available_at" ] ,
128
+ buckets : [ 0.001 , 0.005 , 0.01 , 0.025 , 0.05 , 0.1 , 0.25 , 0.5 , 1 ] ,
129
+ registers : [ this . metrics . register ] ,
130
+ } ) ;
131
+
132
+ this . metrics . dequeueDuration = new Histogram ( {
133
+ name : "redis_worker_dequeue_duration_seconds" ,
134
+ help : "The duration of dequeue operations" ,
135
+ labelNames : [ "worker_name" , "worker_id" , "task_count" ] ,
136
+ buckets : [ 0.001 , 0.005 , 0.01 , 0.025 , 0.05 , 0.1 , 0.25 , 0.5 , 1 ] ,
137
+ registers : [ this . metrics . register ] ,
138
+ } ) ;
139
+
140
+ this . metrics . jobDuration = new Histogram ( {
141
+ name : "redis_worker_job_duration_seconds" ,
142
+ help : "The duration of job operations" ,
143
+ labelNames : [ "worker_name" , "worker_id" , "batch_size" , "job_type" , "attempt" ] ,
144
+ // use different buckets here as jobs can take a while to run
145
+ buckets : [ 0.1 , 0.25 , 0.5 , 1 , 2.5 , 5 , 10 , 20 , 30 , 45 , 60 ] ,
146
+ registers : [ this . metrics . register ] ,
147
+ } ) ;
148
+
149
+ this . metrics . ackDuration = new Histogram ( {
150
+ name : "redis_worker_ack_duration_seconds" ,
151
+ help : "The duration of ack operations" ,
152
+ labelNames : [ "worker_name" ] ,
153
+ buckets : [ 0.001 , 0.005 , 0.01 , 0.025 , 0.05 , 0.1 , 0.25 , 0.5 , 1 ] ,
154
+ registers : [ this . metrics . register ] ,
155
+ } ) ;
156
+
157
+ this . metrics . redriveDuration = new Histogram ( {
158
+ name : "redis_worker_redrive_duration_seconds" ,
159
+ help : "The duration of redrive operations" ,
160
+ labelNames : [ "worker_name" ] ,
161
+ buckets : [ 0.001 , 0.005 , 0.01 , 0.025 , 0.05 , 0.1 , 0.25 , 0.5 , 1 ] ,
162
+ registers : [ this . metrics . register ] ,
163
+ } ) ;
164
+
165
+ this . metrics . rescheduleDuration = new Histogram ( {
166
+ name : "redis_worker_reschedule_duration_seconds" ,
167
+ help : "The duration of reschedule operations" ,
168
+ labelNames : [ "worker_name" ] ,
169
+ buckets : [ 0.001 , 0.005 , 0.01 , 0.025 , 0.05 , 0.1 , 0.25 , 0.5 , 1 ] ,
170
+ registers : [ this . metrics . register ] ,
171
+ } ) ;
103
172
}
104
173
105
174
public start ( ) {
@@ -160,18 +229,25 @@ class Worker<TCatalog extends WorkerCatalog> {
160
229
161
230
span . setAttribute ( "job_visibility_timeout_ms" , timeout ) ;
162
231
163
- return this . queue . enqueue ( {
164
- id,
165
- job,
166
- item : payload ,
167
- visibilityTimeoutMs : timeout ,
168
- availableAt,
169
- } ) ;
232
+ return this . withHistogram (
233
+ this . metrics . enqueueDuration ,
234
+ this . queue . enqueue ( {
235
+ id,
236
+ job,
237
+ item : payload ,
238
+ visibilityTimeoutMs : timeout ,
239
+ availableAt,
240
+ } ) ,
241
+ {
242
+ job_type : String ( job ) ,
243
+ has_available_at : availableAt ? "true" : "false" ,
244
+ }
245
+ ) ;
170
246
} ,
171
247
{
172
248
kind : SpanKind . PRODUCER ,
173
249
attributes : {
174
- job_type : job as string ,
250
+ job_type : String ( job ) ,
175
251
job_id : id ,
176
252
} ,
177
253
}
@@ -187,7 +263,10 @@ class Worker<TCatalog extends WorkerCatalog> {
187
263
this . tracer ,
188
264
"reschedule" ,
189
265
async ( span ) => {
190
- return this . queue . reschedule ( id , availableAt ) ;
266
+ return this . withHistogram (
267
+ this . metrics . rescheduleDuration ,
268
+ this . queue . reschedule ( id , availableAt )
269
+ ) ;
191
270
} ,
192
271
{
193
272
kind : SpanKind . PRODUCER ,
@@ -203,7 +282,7 @@ class Worker<TCatalog extends WorkerCatalog> {
203
282
this . tracer ,
204
283
"ack" ,
205
284
( ) => {
206
- return this . queue . ack ( id ) ;
285
+ return this . withHistogram ( this . metrics . ackDuration , this . queue . ack ( id ) ) ;
207
286
} ,
208
287
{
209
288
attributes : {
@@ -229,7 +308,14 @@ class Worker<TCatalog extends WorkerCatalog> {
229
308
}
230
309
231
310
try {
232
- const items = await this . queue . dequeue ( taskCount ) ;
311
+ const items = await this . withHistogram (
312
+ this . metrics . dequeueDuration ,
313
+ this . queue . dequeue ( taskCount ) ,
314
+ {
315
+ worker_id : workerId ,
316
+ task_count : taskCount ,
317
+ }
318
+ ) ;
233
319
234
320
if ( items . length === 0 ) {
235
321
await Worker . delay ( pollIntervalMs ) ;
@@ -274,7 +360,17 @@ class Worker<TCatalog extends WorkerCatalog> {
274
360
this . tracer ,
275
361
"processItem" ,
276
362
async ( ) => {
277
- await handler ( { id, payload : item , visibilityTimeoutMs, attempt } ) ;
363
+ await this . withHistogram (
364
+ this . metrics . jobDuration ,
365
+ handler ( { id, payload : item , visibilityTimeoutMs, attempt } ) ,
366
+ {
367
+ worker_id : workerId ,
368
+ batch_size : batchSize ,
369
+ job_type : job ,
370
+ attempt,
371
+ }
372
+ ) ;
373
+
278
374
// On success, acknowledge the item.
279
375
await this . queue . ack ( id ) ;
280
376
} ,
@@ -363,6 +459,23 @@ class Worker<TCatalog extends WorkerCatalog> {
363
459
} ) ;
364
460
}
365
461
462
+ private async withHistogram < T > (
463
+ histogram : Histogram < string > | undefined ,
464
+ promise : Promise < T > ,
465
+ labels ?: Record < string , string | number >
466
+ ) : Promise < T > {
467
+ if ( ! histogram || ! this . metrics . register ) {
468
+ return promise ;
469
+ }
470
+
471
+ const end = histogram . startTimer ( { worker_name : this . options . name , ...labels } ) ;
472
+ try {
473
+ return await promise ;
474
+ } finally {
475
+ end ( ) ;
476
+ }
477
+ }
478
+
366
479
// A simple helper to delay for a given number of milliseconds.
367
480
private static delay ( ms : number ) : Promise < void > {
368
481
return new Promise ( ( resolve ) => setTimeout ( resolve , ms ) ) ;
@@ -387,7 +500,10 @@ class Worker<TCatalog extends WorkerCatalog> {
387
500
if ( typeof id !== "string" ) {
388
501
throw new Error ( "Invalid message format: id must be a string" ) ;
389
502
}
390
- await this . queue . redriveFromDeadLetterQueue ( id ) ;
503
+ await this . withHistogram (
504
+ this . metrics . redriveDuration ,
505
+ this . queue . redriveFromDeadLetterQueue ( id )
506
+ ) ;
391
507
this . logger . log ( `Redrived item ${ id } from Dead Letter Queue` ) ;
392
508
} catch ( error ) {
393
509
this . logger . error ( "Error processing redrive message" , { error, message } ) ;
0 commit comments