@@ -27,6 +27,7 @@ import { randomUUID } from "node:crypto";
27
27
import { readFile } from "node:fs/promises" ;
28
28
import { createServer } from "node:http" ;
29
29
import { setTimeout as timeout } from "node:timers/promises" ;
30
+ import { Evt } from "evt" ;
30
31
31
32
declare const __PROJECT_CONFIG__ : Config ;
32
33
@@ -64,6 +65,8 @@ class ProdWorker {
64
65
private waitForPostStart = false ;
65
66
private connectionCount = 0 ;
66
67
68
+ private restoreNotification = Evt . create ( ) ;
69
+
67
70
private waitForTaskReplay :
68
71
| {
69
72
idempotencyKey : string ;
@@ -500,6 +503,17 @@ class ProdWorker {
500
503
// checkpointSafeInternalTimeout is accurate even after non-simulated restores
501
504
await Promise . race ( [ internalTimeout , checkpointSafeInternalTimeout ] ) ;
502
505
506
+ const idempotencyKey = randomUUID ( ) ;
507
+ this . durationResumeFallback = { idempotencyKey } ;
508
+
509
+ try {
510
+ await this . restoreNotification . waitFor ( 5_000 ) ;
511
+ } catch ( error ) {
512
+ logger . error ( "Did not receive restore notification in time" , {
513
+ error,
514
+ } ) ;
515
+ }
516
+
503
517
try {
504
518
// The coordinator should cancel any in-progress checkpoints so we don't end up with race conditions
505
519
const { checkpointCanceled } = await this . #coordinatorSocket. socket
@@ -518,9 +532,6 @@ class ProdWorker {
518
532
519
533
logger . log ( "Waiting for external duration resume as we may have been restored" ) ;
520
534
521
- const idempotencyKey = randomUUID ( ) ;
522
- this . durationResumeFallback = { idempotencyKey } ;
523
-
524
535
setTimeout ( ( ) => {
525
536
if ( ! this . durationResumeFallback ) {
526
537
logger . error ( "Already resumed after duration, skipping fallback" ) ;
@@ -633,6 +644,8 @@ class ProdWorker {
633
644
this . nextResumeAfter = undefined ;
634
645
this . waitForPostStart = false ;
635
646
647
+ this . durationResumeFallback = undefined ;
648
+
636
649
this . #backgroundWorker. waitCompletedNotification ( ) ;
637
650
}
638
651
@@ -875,8 +888,6 @@ class ProdWorker {
875
888
return ;
876
889
}
877
890
878
- this . durationResumeFallback = undefined ;
879
-
880
891
this . #resumeAfterDuration( ) ;
881
892
} ,
882
893
EXECUTE_TASK_RUN : async ( ) => {
@@ -1316,6 +1327,7 @@ class ProdWorker {
1316
1327
}
1317
1328
case "restore" : {
1318
1329
await this . #reconnectAfterPostStart( ) ;
1330
+ this . restoreNotification . post ( ) ;
1319
1331
break ;
1320
1332
}
1321
1333
default : {
0 commit comments