Skip to content

Handle failure to launch kernel while connecting #11454

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ def _monitor_kernel(self):

exit_code = self.kernel.poll()
std_err = self.kernel.stderr.read()
if std_err:
std_err = std_err.decode()
if std_err is not None:
std_err = std_err.decode("utf-8")
self.log.warn("Kernel has exited with exit code %s, %s", exit_code, std_err)
sys.stdout.flush()
sys.stderr.flush()
Expand Down Expand Up @@ -163,6 +163,11 @@ def _exec_module_observable_in_background(
target=self._monitor_kernel, daemon=True, name="kerne_monitor"
).start()

# We could wait for 0.5-1s to ensure kernel doesn't die, however that will not work.
# On windows, launching a conda process can take 4s, hence just spinning up a process takes 4s
# and then python will attempt to run the kernel module, at which point it could fail.
# Meaning there's no way to know it successfully launched the kernel without trying to connect to it.

def signal_kernel(self, signum):
"""Sends a signal to the process group of the kernel (this
usually includes the kernel and any subprocesses spawned by
Expand Down
5 changes: 4 additions & 1 deletion src/client/datascience/kernel-launcher/kernelDaemon.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,15 @@ export class PythonKernelDaemon extends BasePythonDaemon implements IPythonKerne
options: SpawnOptions
): Promise<ObservableExecutionResult<string>> {
const subject = new Subject<Output<string>>();
let stdErr = '';
// Message from daemon when kernel dies.
const KernelDiedNotification = new NotificationType<{ exit_code: string; reason?: string }, void>(
'kernel_died'
);
this.connection.onNotification(KernelDiedNotification, (output) => {
// If we don't have a reason why things failed, then just include the stderr.
subject.error(
new PythonKernelDiedError({ exitCode: parseInt(output.exit_code, 10), reason: output.reason })
new PythonKernelDiedError({ exitCode: parseInt(output.exit_code, 10), reason: output.reason || stdErr })
);
});

Expand All @@ -55,6 +57,7 @@ export class PythonKernelDaemon extends BasePythonDaemon implements IPythonKerne
this.outputObservale.subscribe(
(out) => {
if (out.source === 'stderr' && options.throwOnStdErr) {
stdErr += out.out;
subject.error(new StdErrError(out.out));
} else if (out.source === 'stderr' && options.mergeStdOutErr) {
subject.next({ source: 'stdout', out: out.out });
Expand Down
62 changes: 18 additions & 44 deletions src/client/datascience/kernel-launcher/kernelProcess.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,10 @@
import { ChildProcess } from 'child_process';
import { Event, EventEmitter } from 'vscode';
import { PYTHON_LANGUAGE } from '../../common/constants';
import { WrappedError } from '../../common/errors/errorUtils';
import { traceError, traceInfo, traceWarning } from '../../common/logger';
import { IFileSystem, TemporaryFile } from '../../common/platform/types';
import { IProcessServiceFactory, IPythonExecutionFactory, ObservableExecutionResult } from '../../common/process/types';
import { Resource } from '../../common/types';
import { createDeferred, Deferred, sleep } from '../../common/utils/async';
import * as localize from '../../common/utils/localize';
import { noop, swallowExceptions } from '../../common/utils/misc';
import { IJupyterKernelSpec } from '../types';
import { findIndexOfConnectionFile } from './kernelFinder';
Expand All @@ -24,10 +21,7 @@ import cloneDeep = require('lodash/cloneDeep');
// Launches and disposes a kernel process given a kernelspec and a resource or python interpreter.
// Exposes connection information and the process itself.
export class KernelProcess implements IKernelProcess {
public get ready(): Promise<void> {
return this.readyPromise.promise;
}
public get exited(): Event<number | null> {
public get exited(): Event<{ exitCode?: number; reason?: string }> {
return this.exitEvent.event;
}
public get kernelSpec(): Readonly<IJupyterKernelSpec> {
Expand All @@ -41,10 +35,10 @@ export class KernelProcess implements IKernelProcess {
}
private _process?: ChildProcess;
private connectionFile?: TemporaryFile;
private readyPromise: Deferred<void>;
private exitEvent: EventEmitter<number | null> = new EventEmitter<number | null>();
private exitEvent = new EventEmitter<{ exitCode?: number; reason?: string }>();
private pythonKernelLauncher?: PythonKernelLauncherDaemon;
private launchedOnce?: boolean;
private disposed?: boolean;
private kernelDaemon?: IPythonKernelDaemon;
private readonly _kernelSpec: IJupyterKernelSpec;
private readonly originalKernelSpec: IJupyterKernelSpec;
Expand All @@ -58,7 +52,6 @@ export class KernelProcess implements IKernelProcess {
) {
this.originalKernelSpec = kernelSpec;
this._kernelSpec = cloneDeep(kernelSpec);
this.readyPromise = createDeferred<void>();
}
public async interrupt(): Promise<void> {
if (this.kernelDaemon) {
Expand All @@ -75,12 +68,6 @@ export class KernelProcess implements IKernelProcess {

const exeObs = await this.launchAsObservable();

sleep(1_000)
.then(() => {
this.readyPromise.resolve();
})
.catch(noop);

let stdout = '';
let stderr = '';
exeObs.out.subscribe(
Expand All @@ -91,38 +78,29 @@ export class KernelProcess implements IKernelProcess {
traceWarning(`StdErr from Kernel Process ${output.out}`);
} else {
stdout += output.out;
// Search for --existing this is the message that will indicate that our kernel is actually
// up and started from stdout
// To connect another client to this kernel, use:
// --existing /var/folders/q7/cn8fg6s94fgdcl0h7rbxldf00000gn/T/tmp-16231TOL2dgBoWET1.json
if (!this.readyPromise.completed && stdout.includes('--existing')) {
this.readyPromise.resolve();
}
traceInfo(output.out);
traceInfo(`Kernel Output: ${stdout}`);
}
},
(error) => {
if (this.readyPromise.completed) {
traceInfo('KernelProcess Error', error, stderr);
} else {
traceError('Kernel died before it could start', error, stderr);
// Include original error and stderr in error thrown.
const errorMessage = `${localize.DataScience.rawKernelProcessExitBeforeConnect()}. Error = ${error}, stderr = ${stderr}`;
const errorToWrap = error instanceof Error ? error : new Error(error);
this.readyPromise.reject(new WrappedError(errorMessage, errorToWrap));
if (this.disposed) {
traceInfo('Kernel died', error, stderr);
return;
}
traceError('Kernel died', error, stderr);
if (error instanceof PythonKernelDiedError) {
traceInfo('KernelProcess Exit', `Exit - ${error.exitCode}`);
this.exitEvent.fire(error.exitCode);

// As the kernel has died, kill the daemon.
this.kernelDaemon?.kill().catch(noop); // NOSONAR
if (this.disposed) {
traceInfo('KernelProcess Exit', `Exit - ${error.exitCode}, ${error.reason}`, error);
} else {
traceError('KernelProcess Exit', `Exit - ${error.exitCode}, ${error.reason}`, error);
}
this.exitEvent.fire({ exitCode: error.exitCode, reason: error.reason || error.message });
}
}
);
}

public async dispose(): Promise<void> {
this.disposed = true;
if (this.kernelDaemon) {
await this.kernelDaemon.kill().catch(noop);
swallowExceptions(() => this.kernelDaemon?.dispose());
Expand Down Expand Up @@ -167,16 +145,12 @@ export class KernelProcess implements IKernelProcess {
}

if (exeObs.proc) {
exeObs.proc!.on('exit', (exitCode) => {
exeObs.proc.on('exit', (exitCode) => {
traceInfo('KernelProcess Exit', `Exit - ${exitCode}`);
if (!this.readyPromise.completed) {
this.readyPromise.reject(new Error(localize.DataScience.rawKernelProcessExitBeforeConnect()));
}
this.exitEvent.fire(exitCode);
this.exitEvent.fire({ exitCode: exitCode || undefined });
});
} else {
traceInfo('KernelProcess failed to launch');
this.readyPromise.reject(new Error(localize.DataScience.rawKernelProcessNotStarted()));
throw new Error('KernelProcess failed to launch');
}

this._process = exeObs.proc;
Expand Down
6 changes: 1 addition & 5 deletions src/client/datascience/kernel-launcher/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,11 @@ export interface IKernelConnection {

export interface IKernelProcess extends IAsyncDisposable {
readonly connection: Readonly<IKernelConnection>;
/**
* This promise is resolved when the launched process is ready to get JMP messages
*/
readonly ready: Promise<void>;
readonly kernelSpec: Readonly<IJupyterKernelSpec>;
/**
* This event is triggered if the process is exited
*/
readonly exited: Event<number | null>;
readonly exited: Event<{ exitCode?: number; reason?: string }>;
interrupt(): Promise<void>;
}

Expand Down
23 changes: 17 additions & 6 deletions src/client/datascience/raw-kernel/rawJupyterSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jupyterlabs interface as well as starting up and connecting to a raw session
*/
export class RawJupyterSession extends BaseJupyterSession {
private processExitHandler: IDisposable | undefined;

private _disposables: IDisposable[] = [];
constructor(
private readonly kernelLauncher: IKernelLauncher,
kernelSelector: KernelSelector,
Expand All @@ -44,6 +44,10 @@ export class RawJupyterSession extends BaseJupyterSession {
public async waitForIdle(_timeout: number): Promise<void> {
// RawKernels are good to go right away
}
public async dispose(): Promise<void> {
this._disposables.forEach((d) => d.dispose());
await super.dispose();
}

public shutdown(): Promise<void> {
if (this.processExitHandler) {
Expand Down Expand Up @@ -179,12 +183,19 @@ export class RawJupyterSession extends BaseJupyterSession {
@captureTelemetry(Telemetry.RawKernelStartRawSession, undefined, true)
private async startRawSession(
kernelSpec: IJupyterKernelSpec,
_cancelToken?: CancellationToken
cancelToken?: CancellationToken
): Promise<RawSession> {
const process = await this.kernelLauncher.launch(kernelSpec, this.resource);

// Wait for the process to actually be ready to connect to
await process.ready;
const cancellationPromise = createPromiseFromCancellation({
cancelAction: 'reject',
defaultValue: undefined,
token: cancelToken
}) as Promise<never>;
cancellationPromise.catch(noop);

const process = await Promise.race([
this.kernelLauncher.launch(kernelSpec, this.resource),
cancellationPromise
]);

// Create our raw session, it will own the process lifetime
const result = new RawSession(process);
Expand Down
16 changes: 16 additions & 0 deletions src/client/datascience/raw-kernel/rawSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import type { Kernel, KernelMessage, ServerConnection, Session } from '@jupyterlab/services';
import type { ISignal, Signal } from '@phosphor/signaling';
import * as uuid from 'uuid/v4';
import '../../common/extensions';
import { traceError } from '../../common/logger';
import { IDisposable } from '../../common/types';
import { IKernelProcess } from '../kernel-launcher/types';
import { ISessionWithSocket, KernelSocketInformation } from '../types';
import { createRawKernel, RawKernel } from './rawKernel';
Expand All @@ -14,6 +17,7 @@ ZMQ Kernel connection can pretend to be a jupyterlab Session
*/
export class RawSession implements ISessionWithSocket {
public isDisposed: boolean = false;
private isDisposing?: boolean;

// Note, ID is the ID of this session
// ClientID is the ID that we pass in messages to the kernel
Expand All @@ -22,6 +26,7 @@ export class RawSession implements ISessionWithSocket {
private _clientID: string;
private _kernel: RawKernel;
private readonly _statusChanged: Signal<this, Kernel.Status>;
private readonly exitHandler: IDisposable;

// RawSession owns the lifetime of the kernel process and will dispose it
constructor(public kernelProcess: IKernelProcess) {
Expand All @@ -37,10 +42,13 @@ export class RawSession implements ISessionWithSocket {
// Connect our kernel and hook up status changes
this._kernel = createRawKernel(kernelProcess, this._clientID);
this._kernel.statusChanged.connect(this.onKernelStatus, this);
this.exitHandler = kernelProcess.exited(this.handleUnhandledExitingOfKernelProcess, this);
}

public async dispose() {
this.isDisposing = true;
if (!this.isDisposed) {
this.exitHandler.dispose();
await this._kernel.shutdown();
this._kernel.dispose();
this.kernelProcess.dispose().ignoreErrors();
Expand Down Expand Up @@ -135,4 +143,12 @@ export class RawSession implements ISessionWithSocket {
private onKernelStatus(_sender: Kernel.IKernelConnection, state: Kernel.Status) {
this._statusChanged.emit(state);
}
private handleUnhandledExitingOfKernelProcess(e: { exitCode?: number | undefined; reason?: string | undefined }) {
if (this.isDisposing) {
return;
}
traceError(`Disposing session as kernel process died ExitCode: ${e.exitCode}, Reason: ${e.reason}`);
// Just kill the session.
this.dispose().ignoreErrors();
}
}