Skip to content

Miscellanous path_finder cleanup post release #621

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 9, 2025
7 changes: 4 additions & 3 deletions cuda_bindings/cuda/bindings/_path_finder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@ strategy for locating NVIDIA shared libraries:
- Falls back to native loader:
- `dlopen()` on Linux
- `LoadLibraryW()` on Windows
- Conda installations are expected to be discovered:
- Linux: Via `$ORIGIN/../lib` on `RPATH` (of the `python` binary;
note that this preempts `LD_LIBRARY_PATH` and `/etc/ld.so.conf.d/`)
- Windows: Via `%CONDA_PREFIX%\Library\bin` on system `PATH`
- CTK installations with system config updates are expected to be discovered:
- Linux: Via `/etc/ld.so.conf.d/*cuda*.conf`
- Windows: Via `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\bin` on system `PATH`
- Conda installations are expected to be discovered:
- Linux: Via `$ORIGIN/../lib` on `RPATH` (of the `python` binary)
- Windows: Via `%CONDA_PREFIX%\Library\bin` on system `PATH`

3. **Environment variables**
- Relies on `CUDA_HOME` or `CUDA_PATH` environment variables if set
Expand Down
47 changes: 5 additions & 42 deletions cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# Copyright 2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

import ctypes
import ctypes.wintypes
from typing import Optional

import pywintypes
Expand Down Expand Up @@ -36,46 +34,11 @@ def add_dll_directory(dll_abs_path: str) -> None:


def abs_path_for_dynamic_library(libname: str, handle: pywintypes.HANDLE) -> str:
"""Get the absolute path of a loaded dynamic library on Windows.

Args:
handle: The library handle

Returns:
The absolute path to the DLL file

Raises:
OSError: If GetModuleFileNameW fails
RuntimeError: If the required path length is unreasonably long
"""
MAX_ITERATIONS = 10 # Allows for extremely long paths (up to ~266,000 chars)
buf_size = 260 # Start with traditional MAX_PATH

for _ in range(MAX_ITERATIONS):
buf = ctypes.create_unicode_buffer(buf_size)
n_chars = ctypes.windll.kernel32.GetModuleFileNameW(ctypes.wintypes.HMODULE(handle), buf, buf_size)

if n_chars == 0:
raise OSError(
f"GetModuleFileNameW failed ({libname=!r}, {buf_size=}). "
"Long paths may require enabling the "
"Windows 10+ long path registry setting. See: "
"https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation"
)
if n_chars < buf_size - 1:
return buf.value

buf_size *= 2 # Double the buffer size and try again

raise RuntimeError(
f"Failed to retrieve the full path after {MAX_ITERATIONS} attempts "
f"(final buffer size: {buf_size} characters). "
"This may indicate:\n"
" 1. An extremely long path requiring Windows long path support, or\n"
" 2. An invalid or corrupt library handle, or\n"
" 3. An unexpected system error.\n"
"See: https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation"
)
"""Get the absolute path of a loaded dynamic library on Windows."""
try:
return win32api.GetModuleFileName(handle)
except Exception as e:
raise RuntimeError(f"GetModuleFileName failed for {libname!r} (exception type: {type(e)})") from e


def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]:
Expand Down
89 changes: 0 additions & 89 deletions cuda_bindings/tests/run_python_code_safely.py

This file was deleted.

126 changes: 126 additions & 0 deletions cuda_bindings/tests/spawned_process_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Copyright 2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

import multiprocessing
import queue # for Empty
import sys
import traceback
from dataclasses import dataclass
from io import StringIO
from typing import Any, Callable, Optional, Sequence

PROCESS_KILLED = -9
PROCESS_NO_RESULT = -999


# Similar to https://docs.python.org/3/library/subprocess.html#subprocess.CompletedProcess
# (args, check_returncode() are intentionally not supported here.)
@dataclass
class CompletedProcess:
returncode: int
stdout: str
stderr: str


class ChildProcessWrapper:
def __init__(self, result_queue, target, args, kwargs):
self.target = target
self.args = () if args is None else args
self.kwargs = {} if kwargs is None else kwargs
self.result_queue = result_queue

def __call__(self):
# Capture stdout/stderr
old_stdout = sys.stdout
old_stderr = sys.stderr
sys.stdout = StringIO()
sys.stderr = StringIO()

try:
self.target(*self.args, **self.kwargs)
returncode = 0
except SystemExit as e: # Handle sys.exit()
returncode = e.code if isinstance(e.code, int) else 0
except BaseException:
traceback.print_exc()
returncode = 1
finally:
# Collect outputs and restore streams
stdout = sys.stdout.getvalue()
stderr = sys.stderr.getvalue()
sys.stdout = old_stdout
sys.stderr = old_stderr
try: # noqa: SIM105
self.result_queue.put((returncode, stdout, stderr))
except Exception: # nosec B110
# If the queue is broken (e.g., parent gone), best effort logging
pass


def run_in_spawned_child_process(
target: Callable[..., None],
*,
args: Optional[Sequence[Any]] = None,
kwargs: Optional[dict[str, Any]] = None,
timeout: Optional[float] = None,
rethrow: bool = False,
) -> CompletedProcess:
"""Run `target` in a spawned child process, capturing stdout/stderr.

The provided `target` must be defined at the top level of a module, and must
be importable in the spawned child process. Lambdas, closures, or interactively
defined functions (e.g., in Jupyter notebooks) will not work.

If `rethrow=True` and the child process exits with a nonzero code,
raises ChildProcessError with the captured stderr.
"""
ctx = multiprocessing.get_context("spawn")
result_queue = ctx.Queue()
process = ctx.Process(target=ChildProcessWrapper(result_queue, target, args, kwargs))
process.start()

try:
process.join(timeout)
if process.is_alive():
process.terminate()
process.join()
result = CompletedProcess(
returncode=PROCESS_KILLED,
stdout="",
stderr=f"Process timed out after {timeout} seconds and was terminated.",
)
else:
try:
returncode, stdout, stderr = result_queue.get(timeout=1.0)
except (queue.Empty, EOFError):
result = CompletedProcess(
returncode=PROCESS_NO_RESULT,
stdout="",
stderr="Process exited or crashed before returning results.",
)
else:
result = CompletedProcess(
returncode=returncode,
stdout=stdout,
stderr=stderr,
)

if rethrow and result.returncode != 0:
raise ChildProcessError(
f"Child process exited with code {result.returncode}.\n"
"--- stderr-from-child-process ---\n"
f"{result.stderr}"
"<end-of-stderr-from-child-process>\n"
)

return result

finally:
try:
result_queue.close()
result_queue.join_thread()
except Exception: # nosec B110
pass
if process.is_alive():
process.kill()
process.join()
65 changes: 32 additions & 33 deletions cuda_bindings/tests/test_path_finder_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys

import pytest
from run_python_code_safely import run_python_code_safely
import spawned_process_runner

from cuda.bindings import path_finder
from cuda.bindings._path_finder import supported_libs
Expand Down Expand Up @@ -38,46 +38,45 @@ def test_all_libnames_expected_lib_symbols_consistency():
assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.EXPECTED_LIB_SYMBOLS.keys()))


def build_subprocess_failed_for_libname_message(libname, result):
def build_child_process_failed_for_libname_message(libname, result):
return (
f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n"
f"--- stdout-from-subprocess ---\n{result.stdout}<end-of-stdout-from-subprocess>\n"
f"--- stderr-from-subprocess ---\n{result.stderr}<end-of-stderr-from-subprocess>\n"
f"Child process failed for {libname=!r} with exit code {result.returncode}\n"
f"--- stdout-from-child-process ---\n{result.stdout}<end-of-stdout-from-child-process>\n"
f"--- stderr-from-child-process ---\n{result.stderr}<end-of-stderr-from-child-process>\n"
)


def child_process_func(libname):
import os

from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache
from cuda.bindings.path_finder import _load_nvidia_dynamic_library

loaded_dl_fresh = _load_nvidia_dynamic_library(libname)
if loaded_dl_fresh.was_already_loaded_from_elsewhere:
raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere")

loaded_dl_from_cache = _load_nvidia_dynamic_library(libname)
if loaded_dl_from_cache is not loaded_dl_fresh:
raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh")

loaded_dl_no_cache = _load_nvidia_dynamic_library_no_cache(libname)
if not loaded_dl_no_cache.was_already_loaded_from_elsewhere:
raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere")
if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path):
raise RuntimeError(f"not os.path.samefile({loaded_dl_no_cache.abs_path=!r}, {loaded_dl_fresh.abs_path=!r})")

print(f"{loaded_dl_fresh.abs_path!r}")


@pytest.mark.parametrize("libname", TEST_FIND_OR_LOAD_LIBNAMES)
def test_find_or_load_nvidia_dynamic_library(info_summary_append, libname):
# We intentionally run each dynamic library operation in a subprocess
# We intentionally run each dynamic library operation in a child process
# to ensure isolation of global dynamic linking state (e.g., dlopen handles).
# Without subprocesses, loading/unloading libraries during testing could
# Without child processes, loading/unloading libraries during testing could
# interfere across test cases and lead to nondeterministic or platform-specific failures.
#
# Defining the subprocess code snippets as strings ensures each subprocess
# runs a minimal, independent script tailored to the specific libname and API being tested.
code = f"""\
import os
from cuda.bindings.path_finder import _load_nvidia_dynamic_library
from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache

loaded_dl_fresh = _load_nvidia_dynamic_library({libname!r})
if loaded_dl_fresh.was_already_loaded_from_elsewhere:
raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere")

loaded_dl_from_cache = _load_nvidia_dynamic_library({libname!r})
if loaded_dl_from_cache is not loaded_dl_fresh:
raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh")

loaded_dl_no_cache = _load_nvidia_dynamic_library_no_cache({libname!r})
if not loaded_dl_no_cache.was_already_loaded_from_elsewhere:
raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere")
if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path):
raise RuntimeError(f"not os.path.samefile({{loaded_dl_no_cache.abs_path=!r}}, {{loaded_dl_fresh.abs_path=!r}})")

print(f"{{loaded_dl_fresh.abs_path!r}}")
"""
result = run_python_code_safely(code, timeout=30)
result = spawned_process_runner.run_in_spawned_child_process(child_process_func, args=(libname,), timeout=30)
if result.returncode == 0:
info_summary_append(f"abs_path={result.stdout.rstrip()}")
else:
raise RuntimeError(build_subprocess_failed_for_libname_message(libname, result))
raise RuntimeError(build_child_process_failed_for_libname_message(libname, result))
Loading
Loading