Skip to content

Commit ee660a3

Browse files
committed
fix dev retries
1 parent 8e5b71d commit ee660a3

File tree

1 file changed

+68
-41
lines changed

1 file changed

+68
-41
lines changed

packages/cli-v3/src/workers/dev/backgroundWorker.ts

Lines changed: 68 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -419,22 +419,28 @@ export class BackgroundWorker {
419419
}
420420
}
421421

422-
async #initializeTaskRunProcess(
422+
#prefixedMessage(payload: TaskRunExecutionPayload, message: string = "") {
423+
return `[${payload.execution.run.id}.${payload.execution.attempt.number}] ${message}`;
424+
}
425+
426+
async #getFreshTaskRunProcess(
423427
payload: TaskRunExecutionPayload,
424428
messageId?: string
425429
): Promise<TaskRunProcess> {
430+
logger.debug(this.#prefixedMessage(payload, "getFreshTaskRunProcess()"));
431+
426432
if (!this.metadata) {
427433
throw new Error("Worker not registered");
428434
}
429435

430436
this._closed = false;
431437

432-
if (this._taskRunProcesses.has(payload.execution.run.id)) {
433-
return this._taskRunProcesses.get(payload.execution.run.id) as TaskRunProcess;
434-
}
438+
logger.debug(this.#prefixedMessage(payload, "killing current task run process before attempt"));
435439

436440
await this.#killCurrentTaskRunProcessBeforeAttempt(payload.execution.run.id);
437441

442+
logger.debug(this.#prefixedMessage(payload, "creating new task run process"));
443+
438444
const taskRunProcess = new TaskRunProcess(
439445
payload.execution.run.id,
440446
payload.execution.run.isTest,
@@ -450,7 +456,15 @@ export class BackgroundWorker {
450456
);
451457

452458
taskRunProcess.onExit.attach(({ pid }) => {
453-
this._taskRunProcesses.delete(payload.execution.run.id);
459+
logger.debug(this.#prefixedMessage(payload, "onExit()"), { pid });
460+
461+
const taskRunProcess = this._taskRunProcesses.get(payload.execution.run.id);
462+
463+
// Only delete the task run process if the pid matches
464+
if (taskRunProcess?.pid === pid) {
465+
this._taskRunProcesses.delete(payload.execution.run.id);
466+
}
467+
454468
if (pid) {
455469
this._taskRunProcessesBeingKilled.delete(pid);
456470
}
@@ -481,54 +495,62 @@ export class BackgroundWorker {
481495
const taskRunProcess = this._taskRunProcesses.get(runId);
482496

483497
if (!taskRunProcess) {
498+
logger.debug(`[${runId}] no current task process to kill`);
484499
return;
485500
}
486501

502+
logger.debug(`[${runId}] killing current task process`, {
503+
pid: taskRunProcess.pid,
504+
});
505+
487506
if (taskRunProcess.isBeingKilled) {
488507
if (this._taskRunProcessesBeingKilled.size > 1) {
489-
// If there's more than one being killed, wait for graceful exit
490-
try {
491-
await taskRunProcess.onExit.waitFor(5_000);
492-
} catch (error) {
493-
console.error("TaskRunProcess graceful kill timeout exceeded", error);
494-
495-
try {
496-
const forcedKill = taskRunProcess.onExit.waitFor(5_000);
497-
taskRunProcess.kill("SIGKILL");
498-
await forcedKill;
499-
} catch (error) {
500-
console.error("TaskRunProcess forced kill timeout exceeded", error);
501-
throw new SigKillTimeoutProcessError();
502-
}
503-
}
508+
await this.#tryGracefulExit(taskRunProcess);
504509
} else {
505510
// If there's only one or none being killed, don't do anything so we can create a fresh one in parallel
506511
}
507512
} else {
508513
// It's not being killed, so kill it
509514
if (this._taskRunProcessesBeingKilled.size > 0) {
510-
// If there's one being killed already, wait for graceful exit
511-
try {
512-
await taskRunProcess.onExit.waitFor(5_000);
513-
} catch (error) {
514-
console.error("TaskRunProcess graceful kill timeout exceeded", error);
515-
516-
try {
517-
const forcedKill = taskRunProcess.onExit.waitFor(5_000);
518-
taskRunProcess.kill("SIGKILL");
519-
await forcedKill;
520-
} catch (error) {
521-
console.error("TaskRunProcess forced kill timeout exceeded", error);
522-
throw new SigKillTimeoutProcessError();
523-
}
524-
}
515+
await this.#tryGracefulExit(taskRunProcess);
525516
} else {
526517
// There's none being killed yet, so we can kill it without waiting. We still set a timeout to kill it forcefully just in case it sticks around.
527518
taskRunProcess.kill("SIGTERM", 5_000).catch(() => {});
528519
}
529520
}
530521
}
531522

523+
async #tryGracefulExit(
524+
taskRunProcess: TaskRunProcess,
525+
kill = false,
526+
initialSignal: number | NodeJS.Signals = "SIGTERM"
527+
) {
528+
try {
529+
const initialExit = taskRunProcess.onExit.waitFor(5_000);
530+
531+
if (kill) {
532+
taskRunProcess.kill(initialSignal);
533+
}
534+
535+
await initialExit;
536+
} catch (error) {
537+
logger.error("TaskRunProcess graceful kill timeout exceeded", error);
538+
539+
this.#tryForcefulExit(taskRunProcess);
540+
}
541+
}
542+
543+
async #tryForcefulExit(taskRunProcess: TaskRunProcess) {
544+
try {
545+
const forcedKill = taskRunProcess.onExit.waitFor(5_000);
546+
taskRunProcess.kill("SIGKILL");
547+
await forcedKill;
548+
} catch (error) {
549+
logger.error("TaskRunProcess forced kill timeout exceeded", error);
550+
throw new SigKillTimeoutProcessError();
551+
}
552+
}
553+
532554
async cancelRun(taskRunId: string) {
533555
const taskRunProcess = this._taskRunProcesses.get(taskRunId);
534556

@@ -636,7 +658,12 @@ export class BackgroundWorker {
636658
messageId?: string
637659
): Promise<TaskRunExecutionResult> {
638660
try {
639-
const taskRunProcess = await this.#initializeTaskRunProcess(payload, messageId);
661+
const taskRunProcess = await this.#getFreshTaskRunProcess(payload, messageId);
662+
663+
logger.debug(this.#prefixedMessage(payload, "executing task run"), {
664+
pid: taskRunProcess.pid,
665+
});
666+
640667
const result = await taskRunProcess.executeTaskRun(payload);
641668

642669
// Always kill the worker
@@ -829,7 +856,7 @@ class TaskRunProcess {
829856
this.onIsBeingKilled.post(this._child?.pid);
830857
}
831858

832-
logger.debug(`[${this.runId}] cleaning up task run process`, { kill });
859+
logger.debug(`[${this.runId}] cleaning up task run process`, { kill, pid: this.pid });
833860

834861
await this._sender.send("CLEANUP", {
835862
flush: true,
@@ -841,7 +868,7 @@ class TaskRunProcess {
841868
// Set a timeout to kill the child process if it hasn't been killed within 5 seconds
842869
setTimeout(() => {
843870
if (this._child && !this._child.killed) {
844-
logger.debug(`[${this.runId}] killing task run process after timeout`);
871+
logger.debug(`[${this.runId}] killing task run process after timeout`, { pid: this.pid });
845872

846873
this._child.kill();
847874
}
@@ -949,7 +976,7 @@ class TaskRunProcess {
949976
}
950977

951978
async #handleExit(code: number | null, signal: NodeJS.Signals | null) {
952-
logger.debug(`[${this.runId}] task run process exiting`, { code, signal });
979+
logger.debug(`[${this.runId}] handle task run process exit`, { code, signal, pid: this.pid });
953980

954981
// Go through all the attempts currently pending and reject them
955982
for (const [id, status] of this._attemptStatuses.entries()) {
@@ -1014,9 +1041,9 @@ class TaskRunProcess {
10141041
}
10151042

10161043
#kill() {
1017-
if (this._child && !this._child.killed) {
1018-
logger.debug(`[${this.runId}] killing task run process`);
1044+
logger.debug(`[${this.runId}] #kill()`, { pid: this.pid });
10191045

1046+
if (this._child && !this._child.killed) {
10201047
this._child?.kill();
10211048
}
10221049
}

0 commit comments

Comments
 (0)