Skip to content

Commit ad7022d

Browse files
authored
Handle failure to launch kernel while connecting (#11454)
For #10479
1 parent 6218433 commit ad7022d

File tree

6 files changed

+63
-58
lines changed

6 files changed

+63
-58
lines changed

pythonFiles/vscode_datascience_helpers/kernel_launcher_daemon.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ def _monitor_kernel(self):
108108

109109
exit_code = self.kernel.poll()
110110
std_err = self.kernel.stderr.read()
111-
if std_err:
112-
std_err = std_err.decode()
111+
if std_err is not None:
112+
std_err = std_err.decode("utf-8")
113113
self.log.warn("Kernel has exited with exit code %s, %s", exit_code, std_err)
114114
sys.stdout.flush()
115115
sys.stderr.flush()
@@ -163,6 +163,11 @@ def _exec_module_observable_in_background(
163163
target=self._monitor_kernel, daemon=True, name="kerne_monitor"
164164
).start()
165165

166+
# We could wait for 0.5-1s to ensure kernel doesn't die, however that will not work.
167+
# On windows, launching a conda process can take 4s, hence just spinning up a process takes 4s
168+
# and then python will attempt to run the kernel module, at which point it could fail.
169+
# Meaning there's no way to know it successfully launched the kernel without trying to connect to it.
170+
166171
def signal_kernel(self, signum):
167172
"""Sends a signal to the process group of the kernel (this
168173
usually includes the kernel and any subprocesses spawned by

src/client/datascience/kernel-launcher/kernelDaemon.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,15 @@ export class PythonKernelDaemon extends BasePythonDaemon implements IPythonKerne
3939
options: SpawnOptions
4040
): Promise<ObservableExecutionResult<string>> {
4141
const subject = new Subject<Output<string>>();
42+
let stdErr = '';
4243
// Message from daemon when kernel dies.
4344
const KernelDiedNotification = new NotificationType<{ exit_code: string; reason?: string }, void>(
4445
'kernel_died'
4546
);
4647
this.connection.onNotification(KernelDiedNotification, (output) => {
48+
// If we don't have a reason why things failed, then just include the stderr.
4749
subject.error(
48-
new PythonKernelDiedError({ exitCode: parseInt(output.exit_code, 10), reason: output.reason })
50+
new PythonKernelDiedError({ exitCode: parseInt(output.exit_code, 10), reason: output.reason || stdErr })
4951
);
5052
});
5153

@@ -55,6 +57,7 @@ export class PythonKernelDaemon extends BasePythonDaemon implements IPythonKerne
5557
this.outputObservale.subscribe(
5658
(out) => {
5759
if (out.source === 'stderr' && options.throwOnStdErr) {
60+
stdErr += out.out;
5861
subject.error(new StdErrError(out.out));
5962
} else if (out.source === 'stderr' && options.mergeStdOutErr) {
6063
subject.next({ source: 'stdout', out: out.out });

src/client/datascience/kernel-launcher/kernelProcess.ts

Lines changed: 18 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,10 @@
55
import { ChildProcess } from 'child_process';
66
import { Event, EventEmitter } from 'vscode';
77
import { PYTHON_LANGUAGE } from '../../common/constants';
8-
import { WrappedError } from '../../common/errors/errorUtils';
98
import { traceError, traceInfo, traceWarning } from '../../common/logger';
109
import { IFileSystem, TemporaryFile } from '../../common/platform/types';
1110
import { IProcessServiceFactory, IPythonExecutionFactory, ObservableExecutionResult } from '../../common/process/types';
1211
import { Resource } from '../../common/types';
13-
import { createDeferred, Deferred, sleep } from '../../common/utils/async';
14-
import * as localize from '../../common/utils/localize';
1512
import { noop, swallowExceptions } from '../../common/utils/misc';
1613
import { IJupyterKernelSpec } from '../types';
1714
import { findIndexOfConnectionFile } from './kernelFinder';
@@ -24,10 +21,7 @@ import cloneDeep = require('lodash/cloneDeep');
2421
// Launches and disposes a kernel process given a kernelspec and a resource or python interpreter.
2522
// Exposes connection information and the process itself.
2623
export class KernelProcess implements IKernelProcess {
27-
public get ready(): Promise<void> {
28-
return this.readyPromise.promise;
29-
}
30-
public get exited(): Event<number | null> {
24+
public get exited(): Event<{ exitCode?: number; reason?: string }> {
3125
return this.exitEvent.event;
3226
}
3327
public get kernelSpec(): Readonly<IJupyterKernelSpec> {
@@ -41,10 +35,10 @@ export class KernelProcess implements IKernelProcess {
4135
}
4236
private _process?: ChildProcess;
4337
private connectionFile?: TemporaryFile;
44-
private readyPromise: Deferred<void>;
45-
private exitEvent: EventEmitter<number | null> = new EventEmitter<number | null>();
38+
private exitEvent = new EventEmitter<{ exitCode?: number; reason?: string }>();
4639
private pythonKernelLauncher?: PythonKernelLauncherDaemon;
4740
private launchedOnce?: boolean;
41+
private disposed?: boolean;
4842
private kernelDaemon?: IPythonKernelDaemon;
4943
private readonly _kernelSpec: IJupyterKernelSpec;
5044
private readonly originalKernelSpec: IJupyterKernelSpec;
@@ -58,7 +52,6 @@ export class KernelProcess implements IKernelProcess {
5852
) {
5953
this.originalKernelSpec = kernelSpec;
6054
this._kernelSpec = cloneDeep(kernelSpec);
61-
this.readyPromise = createDeferred<void>();
6255
}
6356
public async interrupt(): Promise<void> {
6457
if (this.kernelDaemon) {
@@ -75,12 +68,6 @@ export class KernelProcess implements IKernelProcess {
7568

7669
const exeObs = await this.launchAsObservable();
7770

78-
sleep(1_000)
79-
.then(() => {
80-
this.readyPromise.resolve();
81-
})
82-
.catch(noop);
83-
8471
let stdout = '';
8572
let stderr = '';
8673
exeObs.out.subscribe(
@@ -91,38 +78,29 @@ export class KernelProcess implements IKernelProcess {
9178
traceWarning(`StdErr from Kernel Process ${output.out}`);
9279
} else {
9380
stdout += output.out;
94-
// Search for --existing this is the message that will indicate that our kernel is actually
95-
// up and started from stdout
96-
// To connect another client to this kernel, use:
97-
// --existing /var/folders/q7/cn8fg6s94fgdcl0h7rbxldf00000gn/T/tmp-16231TOL2dgBoWET1.json
98-
if (!this.readyPromise.completed && stdout.includes('--existing')) {
99-
this.readyPromise.resolve();
100-
}
101-
traceInfo(output.out);
81+
traceInfo(`Kernel Output: ${stdout}`);
10282
}
10383
},
10484
(error) => {
105-
if (this.readyPromise.completed) {
106-
traceInfo('KernelProcess Error', error, stderr);
107-
} else {
108-
traceError('Kernel died before it could start', error, stderr);
109-
// Include original error and stderr in error thrown.
110-
const errorMessage = `${localize.DataScience.rawKernelProcessExitBeforeConnect()}. Error = ${error}, stderr = ${stderr}`;
111-
const errorToWrap = error instanceof Error ? error : new Error(error);
112-
this.readyPromise.reject(new WrappedError(errorMessage, errorToWrap));
85+
if (this.disposed) {
86+
traceInfo('Kernel died', error, stderr);
87+
return;
11388
}
89+
traceError('Kernel died', error, stderr);
11490
if (error instanceof PythonKernelDiedError) {
115-
traceInfo('KernelProcess Exit', `Exit - ${error.exitCode}`);
116-
this.exitEvent.fire(error.exitCode);
117-
118-
// As the kernel has died, kill the daemon.
119-
this.kernelDaemon?.kill().catch(noop); // NOSONAR
91+
if (this.disposed) {
92+
traceInfo('KernelProcess Exit', `Exit - ${error.exitCode}, ${error.reason}`, error);
93+
} else {
94+
traceError('KernelProcess Exit', `Exit - ${error.exitCode}, ${error.reason}`, error);
95+
}
96+
this.exitEvent.fire({ exitCode: error.exitCode, reason: error.reason || error.message });
12097
}
12198
}
12299
);
123100
}
124101

125102
public async dispose(): Promise<void> {
103+
this.disposed = true;
126104
if (this.kernelDaemon) {
127105
await this.kernelDaemon.kill().catch(noop);
128106
swallowExceptions(() => this.kernelDaemon?.dispose());
@@ -167,16 +145,12 @@ export class KernelProcess implements IKernelProcess {
167145
}
168146

169147
if (exeObs.proc) {
170-
exeObs.proc!.on('exit', (exitCode) => {
148+
exeObs.proc.on('exit', (exitCode) => {
171149
traceInfo('KernelProcess Exit', `Exit - ${exitCode}`);
172-
if (!this.readyPromise.completed) {
173-
this.readyPromise.reject(new Error(localize.DataScience.rawKernelProcessExitBeforeConnect()));
174-
}
175-
this.exitEvent.fire(exitCode);
150+
this.exitEvent.fire({ exitCode: exitCode || undefined });
176151
});
177152
} else {
178-
traceInfo('KernelProcess failed to launch');
179-
this.readyPromise.reject(new Error(localize.DataScience.rawKernelProcessNotStarted()));
153+
throw new Error('KernelProcess failed to launch');
180154
}
181155

182156
this._process = exeObs.proc;

src/client/datascience/kernel-launcher/types.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,11 @@ export interface IKernelConnection {
2929

3030
export interface IKernelProcess extends IAsyncDisposable {
3131
readonly connection: Readonly<IKernelConnection>;
32-
/**
33-
* This promise is resolved when the launched process is ready to get JMP messages
34-
*/
35-
readonly ready: Promise<void>;
3632
readonly kernelSpec: Readonly<IJupyterKernelSpec>;
3733
/**
3834
* This event is triggered if the process is exited
3935
*/
40-
readonly exited: Event<number | null>;
36+
readonly exited: Event<{ exitCode?: number; reason?: string }>;
4137
interrupt(): Promise<void>;
4238
}
4339

src/client/datascience/raw-kernel/rawJupyterSession.ts

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jupyterlabs interface as well as starting up and connecting to a raw session
3030
*/
3131
export class RawJupyterSession extends BaseJupyterSession {
3232
private processExitHandler: IDisposable | undefined;
33-
33+
private _disposables: IDisposable[] = [];
3434
constructor(
3535
private readonly kernelLauncher: IKernelLauncher,
3636
kernelSelector: KernelSelector,
@@ -44,6 +44,10 @@ export class RawJupyterSession extends BaseJupyterSession {
4444
public async waitForIdle(_timeout: number): Promise<void> {
4545
// RawKernels are good to go right away
4646
}
47+
public async dispose(): Promise<void> {
48+
this._disposables.forEach((d) => d.dispose());
49+
await super.dispose();
50+
}
4751

4852
public shutdown(): Promise<void> {
4953
if (this.processExitHandler) {
@@ -179,12 +183,19 @@ export class RawJupyterSession extends BaseJupyterSession {
179183
@captureTelemetry(Telemetry.RawKernelStartRawSession, undefined, true)
180184
private async startRawSession(
181185
kernelSpec: IJupyterKernelSpec,
182-
_cancelToken?: CancellationToken
186+
cancelToken?: CancellationToken
183187
): Promise<RawSession> {
184-
const process = await this.kernelLauncher.launch(kernelSpec, this.resource);
185-
186-
// Wait for the process to actually be ready to connect to
187-
await process.ready;
188+
const cancellationPromise = createPromiseFromCancellation({
189+
cancelAction: 'reject',
190+
defaultValue: undefined,
191+
token: cancelToken
192+
}) as Promise<never>;
193+
cancellationPromise.catch(noop);
194+
195+
const process = await Promise.race([
196+
this.kernelLauncher.launch(kernelSpec, this.resource),
197+
cancellationPromise
198+
]);
188199

189200
// Create our raw session, it will own the process lifetime
190201
const result = new RawSession(process);

src/client/datascience/raw-kernel/rawSession.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import type { Kernel, KernelMessage, ServerConnection, Session } from '@jupyterlab/services';
44
import type { ISignal, Signal } from '@phosphor/signaling';
55
import * as uuid from 'uuid/v4';
6+
import '../../common/extensions';
7+
import { traceError } from '../../common/logger';
8+
import { IDisposable } from '../../common/types';
69
import { IKernelProcess } from '../kernel-launcher/types';
710
import { ISessionWithSocket, KernelSocketInformation } from '../types';
811
import { createRawKernel, RawKernel } from './rawKernel';
@@ -14,6 +17,7 @@ ZMQ Kernel connection can pretend to be a jupyterlab Session
1417
*/
1518
export class RawSession implements ISessionWithSocket {
1619
public isDisposed: boolean = false;
20+
private isDisposing?: boolean;
1721

1822
// Note, ID is the ID of this session
1923
// ClientID is the ID that we pass in messages to the kernel
@@ -22,6 +26,7 @@ export class RawSession implements ISessionWithSocket {
2226
private _clientID: string;
2327
private _kernel: RawKernel;
2428
private readonly _statusChanged: Signal<this, Kernel.Status>;
29+
private readonly exitHandler: IDisposable;
2530

2631
// RawSession owns the lifetime of the kernel process and will dispose it
2732
constructor(public kernelProcess: IKernelProcess) {
@@ -37,10 +42,13 @@ export class RawSession implements ISessionWithSocket {
3742
// Connect our kernel and hook up status changes
3843
this._kernel = createRawKernel(kernelProcess, this._clientID);
3944
this._kernel.statusChanged.connect(this.onKernelStatus, this);
45+
this.exitHandler = kernelProcess.exited(this.handleUnhandledExitingOfKernelProcess, this);
4046
}
4147

4248
public async dispose() {
49+
this.isDisposing = true;
4350
if (!this.isDisposed) {
51+
this.exitHandler.dispose();
4452
await this._kernel.shutdown();
4553
this._kernel.dispose();
4654
this.kernelProcess.dispose().ignoreErrors();
@@ -135,4 +143,12 @@ export class RawSession implements ISessionWithSocket {
135143
private onKernelStatus(_sender: Kernel.IKernelConnection, state: Kernel.Status) {
136144
this._statusChanged.emit(state);
137145
}
146+
private handleUnhandledExitingOfKernelProcess(e: { exitCode?: number | undefined; reason?: string | undefined }) {
147+
if (this.isDisposing) {
148+
return;
149+
}
150+
traceError(`Disposing session as kernel process died ExitCode: ${e.exitCode}, Reason: ${e.reason}`);
151+
// Just kill the session.
152+
this.dispose().ignoreErrors();
153+
}
138154
}

0 commit comments

Comments
 (0)