@@ -6,6 +6,7 @@ import { tryCatch } from "@trigger.dev/core/utils";
6
6
import { parsePacket } from "@trigger.dev/core/v3/utils/ioSerialization" ;
7
7
import { TaskRun } from "@trigger.dev/database" ;
8
8
import { nanoid } from "nanoid" ;
9
+ import EventEmitter from "node:events" ;
9
10
import pLimit from "p-limit" ;
10
11
import { Counter , Gauge } from "prom-client" ;
11
12
import type { MetricsRegister } from "~/metrics.server" ;
@@ -25,10 +26,15 @@ export type RunsReplicationServiceOptions = {
25
26
leaderLockTimeoutMs ?: number ;
26
27
leaderLockExtendIntervalMs ?: number ;
27
28
ackIntervalSeconds ?: number ;
29
+ logger ?: Logger ;
28
30
} ;
29
31
30
32
type TaskRunInsert = { _version : bigint ; run : TaskRun ; event : "insert" | "update" | "delete" } ;
31
33
34
+ export type RunsReplicationServiceEvents = {
35
+ message : [ { lsn : string ; message : PgoutputMessage ; service : RunsReplicationService } ] ;
36
+ } ;
37
+
32
38
export class RunsReplicationService {
33
39
private _lastLsn : string | null = null ;
34
40
private _isSubscribed = false ;
@@ -44,12 +50,16 @@ export class RunsReplicationService {
44
50
private logger : Logger ;
45
51
private _lastReplicationLagMs : number | null = null ;
46
52
private _transactionCounter ?: Counter ;
53
+ private _lagGauge ?: Gauge ;
47
54
private _insertStrategy : "streaming" | "batching" ;
48
55
private _isShuttingDown = false ;
49
56
private _isShutDownComplete = false ;
50
57
58
+ public readonly events : EventEmitter < RunsReplicationServiceEvents > ;
59
+
51
60
constructor ( private readonly options : RunsReplicationServiceOptions ) {
52
- this . logger = new Logger ( "RunsReplicationService" , "debug" ) ;
61
+ this . logger = options . logger ?? new Logger ( "RunsReplicationService" , "debug" ) ;
62
+ this . events = new EventEmitter ( ) ;
53
63
54
64
this . _insertStrategy = options . insertStrategy ?? "streaming" ;
55
65
@@ -113,7 +123,7 @@ export class RunsReplicationService {
113
123
114
124
if ( options . metricsRegister ) {
115
125
const replicationService = this ;
116
- new Gauge ( {
126
+ this . _lagGauge = new Gauge ( {
117
127
name : "runs_replication_service_replication_lag_ms" ,
118
128
help : "The replication lag in milliseconds" ,
119
129
collect ( ) {
@@ -134,9 +144,25 @@ export class RunsReplicationService {
134
144
}
135
145
}
136
146
137
- public shutdown ( ) {
138
- this . logger . info ( "Initiating shutdown of runs replication service" ) ;
147
+ public async getTransactionCountMetric ( ) {
148
+ return this . _transactionCounter ?. get ( ) ;
149
+ }
150
+
151
+ public async getLagGaugeMetric ( ) {
152
+ return this . _lagGauge ?. get ( ) ;
153
+ }
154
+
155
+ public async shutdown ( ) {
139
156
this . _isShuttingDown = true ;
157
+
158
+ this . logger . info ( "Initiating shutdown of runs replication service" ) ;
159
+
160
+ if ( ! this . _currentTransaction ) {
161
+ this . logger . info ( "No transaction to commit, shutting down immediately" ) ;
162
+ await this . _replicationClient . stop ( ) ;
163
+ this . _isShutDownComplete = true ;
164
+ return ;
165
+ }
140
166
}
141
167
142
168
async start ( insertStrategy ?: "streaming" | "batching" ) {
@@ -162,8 +188,19 @@ export class RunsReplicationService {
162
188
}
163
189
164
190
async #handleData( lsn : string , message : PgoutputMessage ) {
191
+ this . logger . debug ( "Handling data" , {
192
+ lsn,
193
+ tag : message . tag ,
194
+ } ) ;
195
+
196
+ this . events . emit ( "message" , { lsn, message, service : this } ) ;
197
+
165
198
switch ( message . tag ) {
166
199
case "begin" : {
200
+ if ( this . _isShuttingDown || this . _isShutDownComplete ) {
201
+ return ;
202
+ }
203
+
167
204
this . _currentTransaction = {
168
205
commitLsn : message . commitLsn ,
169
206
xid : message . xid ,
@@ -195,15 +232,29 @@ export class RunsReplicationService {
195
232
} ) ;
196
233
break ;
197
234
}
235
+ case "delete" : {
236
+ if ( ! this . _currentTransaction ) {
237
+ return ;
238
+ }
239
+
240
+ this . _currentTransaction . events . push ( {
241
+ tag : message . tag ,
242
+ data : message . old as TaskRun ,
243
+ raw : message ,
244
+ } ) ;
245
+
246
+ break ;
247
+ }
198
248
case "commit" : {
199
249
if ( ! this . _currentTransaction ) {
200
250
return ;
201
251
}
202
252
const replicationLagMs = Date . now ( ) - Number ( message . commitTime / 1000n ) ;
203
253
this . _currentTransaction . commitEndLsn = message . commitEndLsn ;
204
254
this . _currentTransaction . replicationLagMs = replicationLagMs ;
205
- await this . #handleTransaction ( this . _currentTransaction as Transaction < TaskRun > ) ;
255
+ const transaction = this . _currentTransaction as Transaction < TaskRun > ;
206
256
this . _currentTransaction = null ;
257
+ await this . #handleTransaction( transaction ) ;
207
258
break ;
208
259
}
209
260
}
0 commit comments