@@ -66,18 +66,7 @@ export class WaitpointSystem {
66
66
isError : boolean ;
67
67
} ;
68
68
} ) : Promise < Waitpoint > {
69
- // 1. Find the TaskRuns blocked by this waitpoint
70
- const affectedTaskRuns = await this . $ . prisma . taskRunWaitpoint . findMany ( {
71
- where : { waitpointId : id } ,
72
- select : { taskRunId : true , spanIdToComplete : true , createdAt : true } ,
73
- } ) ;
74
-
75
- if ( affectedTaskRuns . length === 0 ) {
76
- this . $ . logger . debug ( `completeWaitpoint: No TaskRunWaitpoints found for waitpoint` , {
77
- waitpointId : id ,
78
- } ) ;
79
- }
80
-
69
+ // 1. Complete the Waitpoint (if not completed)
81
70
let [ waitpointError , waitpoint ] = await tryCatch (
82
71
this . $ . prisma . waitpoint . update ( {
83
72
where : { id, status : "PENDING" } ,
@@ -109,15 +98,41 @@ export class WaitpointSystem {
109
98
throw new Error ( `Waitpoint ${ id } not found` ) ;
110
99
}
111
100
112
- //schedule trying to continue the runs
101
+ if ( waitpoint . status !== "COMPLETED" ) {
102
+ throw new Error ( `Waitpoint ${ id } is not completed` ) ;
103
+ }
104
+
105
+ // 2. Find the TaskRuns blocked by this waitpoint
106
+ const affectedTaskRuns = await this . $ . prisma . taskRunWaitpoint . findMany ( {
107
+ where : { waitpointId : id } ,
108
+ select : { taskRunId : true , spanIdToComplete : true , createdAt : true } ,
109
+ } ) ;
110
+
111
+ if ( affectedTaskRuns . length === 0 ) {
112
+ this . $ . logger . debug ( `completeWaitpoint: no TaskRunWaitpoints found for waitpoint` , {
113
+ waitpointId : id ,
114
+ } ) ;
115
+ }
116
+
117
+ // 3. Schedule trying to continue the runs
113
118
for ( const run of affectedTaskRuns ) {
119
+ const jobId = `continueRunIfUnblocked:${ run . taskRunId } ` ;
120
+ //50ms in the future
121
+ const availableAt = new Date ( Date . now ( ) + 50 ) ;
122
+
123
+ this . $ . logger . debug ( `completeWaitpoint: enqueueing continueRunIfUnblocked` , {
124
+ waitpointId : id ,
125
+ runId : run . taskRunId ,
126
+ jobId,
127
+ availableAt,
128
+ } ) ;
129
+
114
130
await this . $ . worker . enqueue ( {
115
131
//this will debounce the call
116
- id : `continueRunIfUnblocked: ${ run . taskRunId } ` ,
132
+ id : jobId ,
117
133
job : "continueRunIfUnblocked" ,
118
134
payload : { runId : run . taskRunId } ,
119
- //50ms in the future
120
- availableAt : new Date ( Date . now ( ) + 50 ) ,
135
+ availableAt,
121
136
} ) ;
122
137
123
138
// emit an event to complete associated cached runs
@@ -469,6 +484,10 @@ export class WaitpointSystem {
469
484
}
470
485
471
486
public async continueRunIfUnblocked ( { runId } : { runId : string } ) {
487
+ this . $ . logger . debug ( `continueRunIfUnblocked: start` , {
488
+ runId,
489
+ } ) ;
490
+
472
491
// 1. Get the any blocking waitpoints
473
492
const blockingWaitpoints = await this . $ . prisma . taskRunWaitpoint . findMany ( {
474
493
where : { taskRunId : runId } ,
@@ -483,6 +502,10 @@ export class WaitpointSystem {
483
502
484
503
// 2. There are blockers still, so do nothing
485
504
if ( blockingWaitpoints . some ( ( w ) => w . waitpoint . status !== "COMPLETED" ) ) {
505
+ this . $ . logger . debug ( `continueRunIfUnblocked: blocking waitpoints still exist` , {
506
+ runId,
507
+ blockingWaitpoints,
508
+ } ) ;
486
509
return ;
487
510
}
488
511
@@ -505,15 +528,18 @@ export class WaitpointSystem {
505
528
} ) ;
506
529
507
530
if ( ! run ) {
508
- throw new Error ( `#continueRunIfUnblocked: run not found: ${ runId } ` ) ;
531
+ this . $ . logger . error ( `continueRunIfUnblocked: run not found` , {
532
+ runId,
533
+ } ) ;
534
+ throw new Error ( `continueRunIfUnblocked: run not found: ${ runId } ` ) ;
509
535
}
510
536
511
537
//4. Continue the run whether it's executing or not
512
538
await this . $ . runLock . lock ( "continueRunIfUnblocked" , [ runId ] , 5000 , async ( ) => {
513
539
const snapshot = await getLatestExecutionSnapshot ( this . $ . prisma , runId ) ;
514
540
515
541
if ( isFinishedOrPendingFinished ( snapshot . executionStatus ) ) {
516
- this . $ . logger . debug ( `# continueRunIfUnblocked: run is finished, skipping` , {
542
+ this . $ . logger . debug ( `continueRunIfUnblocked: run is finished, skipping` , {
517
543
runId,
518
544
snapshot,
519
545
} ) ;
@@ -555,6 +581,15 @@ export class WaitpointSystem {
555
581
556
582
await this . releaseConcurrencySystem . refillTokensForSnapshot ( snapshot ) ;
557
583
584
+ this . $ . logger . debug (
585
+ `continueRunIfUnblocked: run was still executing, sending notification` ,
586
+ {
587
+ runId,
588
+ snapshot,
589
+ newSnapshot,
590
+ }
591
+ ) ;
592
+
558
593
await sendNotificationToWorker ( {
559
594
runId,
560
595
snapshot : newSnapshot ,
@@ -563,7 +598,7 @@ export class WaitpointSystem {
563
598
} else {
564
599
// Because we cannot reacquire the concurrency, we need to enqueue the run again
565
600
// and because the run is still executing, we need to set the status to QUEUED_EXECUTING
566
- await this . enqueueSystem . enqueueRun ( {
601
+ const newSnapshot = await this . enqueueSystem . enqueueRun ( {
567
602
run,
568
603
env : run . runtimeEnvironment ,
569
604
snapshot : {
@@ -577,21 +612,27 @@ export class WaitpointSystem {
577
612
index : b . batchIndex ?? undefined ,
578
613
} ) ) ,
579
614
} ) ;
615
+
616
+ this . $ . logger . debug ( `continueRunIfUnblocked: run goes to QUEUED_EXECUTING` , {
617
+ runId,
618
+ snapshot,
619
+ newSnapshot,
620
+ } ) ;
580
621
}
581
622
} else {
582
623
if ( snapshot . executionStatus !== "RUN_CREATED" && ! snapshot . checkpointId ) {
583
624
// TODO: We're screwed, should probably fail the run immediately
584
- this . $ . logger . error ( `# continueRunIfUnblocked: run has no checkpoint` , {
625
+ this . $ . logger . error ( `continueRunIfUnblocked: run has no checkpoint` , {
585
626
runId : run . id ,
586
627
snapshot,
587
628
blockingWaitpoints,
588
629
} ) ;
589
- throw new Error ( `# continueRunIfUnblocked: run has no checkpoint: ${ run . id } ` ) ;
630
+ throw new Error ( `continueRunIfUnblocked: run has no checkpoint: ${ run . id } ` ) ;
590
631
}
591
632
592
633
//put it back in the queue, with the original timestamp (w/ priority)
593
634
//this prioritizes dequeuing waiting runs over new runs
594
- await this . enqueueSystem . enqueueRun ( {
635
+ const newSnapshot = await this . enqueueSystem . enqueueRun ( {
595
636
run,
596
637
env : run . runtimeEnvironment ,
597
638
snapshot : {
@@ -604,6 +645,12 @@ export class WaitpointSystem {
604
645
} ) ) ,
605
646
checkpointId : snapshot . checkpointId ?? undefined ,
606
647
} ) ;
648
+
649
+ this . $ . logger . debug ( `continueRunIfUnblocked: run goes to QUEUED` , {
650
+ runId,
651
+ snapshot,
652
+ newSnapshot,
653
+ } ) ;
607
654
}
608
655
} ) ;
609
656
@@ -613,6 +660,10 @@ export class WaitpointSystem {
613
660
taskRunId : runId ,
614
661
} ,
615
662
} ) ;
663
+
664
+ this . $ . logger . debug ( `continueRunIfUnblocked: removed blocking waitpoints` , {
665
+ runId,
666
+ } ) ;
616
667
}
617
668
618
669
public async createRunAssociatedWaitpoint (
0 commit comments