Skip to content

Commit 938c9e9

Browse files
authored
Miscellanous path_finder cleanup post release (#621)
* Use win32api.GetModuleFileName() in abs_path_for_dynamic_library(). With this, load_dl_windows.py consistently uses win32api. ctypes is no longer needed, which eliminates the potential for confusion due to different types of handles. * Address review comment #604 (comment) by at-kkraus14 * Rename function run_python_code_safely() → run_in_spawed_child_process() * Change run_in_spawned_child_process() to accept a callable function instead of a string with Python code. * ChatGPT suggestions * Add rethrow as suggested by ChatGPT * Better names: Worker → ChildProcessWrapper, func → target * ChatGPT suggestions * Add minimal test_spawned_process_runner.py as generated by ChatGPT
1 parent ee6b92e commit 938c9e9

File tree

6 files changed

+188
-167
lines changed

6 files changed

+188
-167
lines changed

cuda_bindings/cuda/bindings/_path_finder/README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,13 @@ strategy for locating NVIDIA shared libraries:
3131
- Falls back to native loader:
3232
- `dlopen()` on Linux
3333
- `LoadLibraryW()` on Windows
34+
- Conda installations are expected to be discovered:
35+
- Linux: Via `$ORIGIN/../lib` on `RPATH` (of the `python` binary;
36+
note that this preempts `LD_LIBRARY_PATH` and `/etc/ld.so.conf.d/`)
37+
- Windows: Via `%CONDA_PREFIX%\Library\bin` on system `PATH`
3438
- CTK installations with system config updates are expected to be discovered:
3539
- Linux: Via `/etc/ld.so.conf.d/*cuda*.conf`
3640
- Windows: Via `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\bin` on system `PATH`
37-
- Conda installations are expected to be discovered:
38-
- Linux: Via `$ORIGIN/../lib` on `RPATH` (of the `python` binary)
39-
- Windows: Via `%CONDA_PREFIX%\Library\bin` on system `PATH`
4041

4142
3. **Environment variables**
4243
- Relies on `CUDA_HOME` or `CUDA_PATH` environment variables if set

cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py

Lines changed: 5 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
# Copyright 2025 NVIDIA Corporation. All rights reserved.
22
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
33

4-
import ctypes
5-
import ctypes.wintypes
64
from typing import Optional
75

86
import pywintypes
@@ -36,46 +34,11 @@ def add_dll_directory(dll_abs_path: str) -> None:
3634

3735

3836
def abs_path_for_dynamic_library(libname: str, handle: pywintypes.HANDLE) -> str:
39-
"""Get the absolute path of a loaded dynamic library on Windows.
40-
41-
Args:
42-
handle: The library handle
43-
44-
Returns:
45-
The absolute path to the DLL file
46-
47-
Raises:
48-
OSError: If GetModuleFileNameW fails
49-
RuntimeError: If the required path length is unreasonably long
50-
"""
51-
MAX_ITERATIONS = 10 # Allows for extremely long paths (up to ~266,000 chars)
52-
buf_size = 260 # Start with traditional MAX_PATH
53-
54-
for _ in range(MAX_ITERATIONS):
55-
buf = ctypes.create_unicode_buffer(buf_size)
56-
n_chars = ctypes.windll.kernel32.GetModuleFileNameW(ctypes.wintypes.HMODULE(handle), buf, buf_size)
57-
58-
if n_chars == 0:
59-
raise OSError(
60-
f"GetModuleFileNameW failed ({libname=!r}, {buf_size=}). "
61-
"Long paths may require enabling the "
62-
"Windows 10+ long path registry setting. See: "
63-
"https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation"
64-
)
65-
if n_chars < buf_size - 1:
66-
return buf.value
67-
68-
buf_size *= 2 # Double the buffer size and try again
69-
70-
raise RuntimeError(
71-
f"Failed to retrieve the full path after {MAX_ITERATIONS} attempts "
72-
f"(final buffer size: {buf_size} characters). "
73-
"This may indicate:\n"
74-
" 1. An extremely long path requiring Windows long path support, or\n"
75-
" 2. An invalid or corrupt library handle, or\n"
76-
" 3. An unexpected system error.\n"
77-
"See: https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation"
78-
)
37+
"""Get the absolute path of a loaded dynamic library on Windows."""
38+
try:
39+
return win32api.GetModuleFileName(handle)
40+
except Exception as e:
41+
raise RuntimeError(f"GetModuleFileName failed for {libname!r} (exception type: {type(e)})") from e
7942

8043

8144
def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]:

cuda_bindings/tests/run_python_code_safely.py

Lines changed: 0 additions & 89 deletions
This file was deleted.
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# Copyright 2025 NVIDIA Corporation. All rights reserved.
2+
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3+
4+
import multiprocessing
5+
import queue # for Empty
6+
import sys
7+
import traceback
8+
from dataclasses import dataclass
9+
from io import StringIO
10+
from typing import Any, Callable, Optional, Sequence
11+
12+
PROCESS_KILLED = -9
13+
PROCESS_NO_RESULT = -999
14+
15+
16+
# Similar to https://docs.python.org/3/library/subprocess.html#subprocess.CompletedProcess
17+
# (args, check_returncode() are intentionally not supported here.)
18+
@dataclass
19+
class CompletedProcess:
20+
returncode: int
21+
stdout: str
22+
stderr: str
23+
24+
25+
class ChildProcessWrapper:
26+
def __init__(self, result_queue, target, args, kwargs):
27+
self.target = target
28+
self.args = () if args is None else args
29+
self.kwargs = {} if kwargs is None else kwargs
30+
self.result_queue = result_queue
31+
32+
def __call__(self):
33+
# Capture stdout/stderr
34+
old_stdout = sys.stdout
35+
old_stderr = sys.stderr
36+
sys.stdout = StringIO()
37+
sys.stderr = StringIO()
38+
39+
try:
40+
self.target(*self.args, **self.kwargs)
41+
returncode = 0
42+
except SystemExit as e: # Handle sys.exit()
43+
returncode = e.code if isinstance(e.code, int) else 0
44+
except BaseException:
45+
traceback.print_exc()
46+
returncode = 1
47+
finally:
48+
# Collect outputs and restore streams
49+
stdout = sys.stdout.getvalue()
50+
stderr = sys.stderr.getvalue()
51+
sys.stdout = old_stdout
52+
sys.stderr = old_stderr
53+
try: # noqa: SIM105
54+
self.result_queue.put((returncode, stdout, stderr))
55+
except Exception: # nosec B110
56+
# If the queue is broken (e.g., parent gone), best effort logging
57+
pass
58+
59+
60+
def run_in_spawned_child_process(
61+
target: Callable[..., None],
62+
*,
63+
args: Optional[Sequence[Any]] = None,
64+
kwargs: Optional[dict[str, Any]] = None,
65+
timeout: Optional[float] = None,
66+
rethrow: bool = False,
67+
) -> CompletedProcess:
68+
"""Run `target` in a spawned child process, capturing stdout/stderr.
69+
70+
The provided `target` must be defined at the top level of a module, and must
71+
be importable in the spawned child process. Lambdas, closures, or interactively
72+
defined functions (e.g., in Jupyter notebooks) will not work.
73+
74+
If `rethrow=True` and the child process exits with a nonzero code,
75+
raises ChildProcessError with the captured stderr.
76+
"""
77+
ctx = multiprocessing.get_context("spawn")
78+
result_queue = ctx.Queue()
79+
process = ctx.Process(target=ChildProcessWrapper(result_queue, target, args, kwargs))
80+
process.start()
81+
82+
try:
83+
process.join(timeout)
84+
if process.is_alive():
85+
process.terminate()
86+
process.join()
87+
result = CompletedProcess(
88+
returncode=PROCESS_KILLED,
89+
stdout="",
90+
stderr=f"Process timed out after {timeout} seconds and was terminated.",
91+
)
92+
else:
93+
try:
94+
returncode, stdout, stderr = result_queue.get(timeout=1.0)
95+
except (queue.Empty, EOFError):
96+
result = CompletedProcess(
97+
returncode=PROCESS_NO_RESULT,
98+
stdout="",
99+
stderr="Process exited or crashed before returning results.",
100+
)
101+
else:
102+
result = CompletedProcess(
103+
returncode=returncode,
104+
stdout=stdout,
105+
stderr=stderr,
106+
)
107+
108+
if rethrow and result.returncode != 0:
109+
raise ChildProcessError(
110+
f"Child process exited with code {result.returncode}.\n"
111+
"--- stderr-from-child-process ---\n"
112+
f"{result.stderr}"
113+
"<end-of-stderr-from-child-process>\n"
114+
)
115+
116+
return result
117+
118+
finally:
119+
try:
120+
result_queue.close()
121+
result_queue.join_thread()
122+
except Exception: # nosec B110
123+
pass
124+
if process.is_alive():
125+
process.kill()
126+
process.join()

cuda_bindings/tests/test_path_finder_load.py

Lines changed: 32 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import sys
66

77
import pytest
8-
from run_python_code_safely import run_python_code_safely
8+
import spawned_process_runner
99

1010
from cuda.bindings import path_finder
1111
from cuda.bindings._path_finder import supported_libs
@@ -38,46 +38,45 @@ def test_all_libnames_expected_lib_symbols_consistency():
3838
assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.EXPECTED_LIB_SYMBOLS.keys()))
3939

4040

41-
def build_subprocess_failed_for_libname_message(libname, result):
41+
def build_child_process_failed_for_libname_message(libname, result):
4242
return (
43-
f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n"
44-
f"--- stdout-from-subprocess ---\n{result.stdout}<end-of-stdout-from-subprocess>\n"
45-
f"--- stderr-from-subprocess ---\n{result.stderr}<end-of-stderr-from-subprocess>\n"
43+
f"Child process failed for {libname=!r} with exit code {result.returncode}\n"
44+
f"--- stdout-from-child-process ---\n{result.stdout}<end-of-stdout-from-child-process>\n"
45+
f"--- stderr-from-child-process ---\n{result.stderr}<end-of-stderr-from-child-process>\n"
4646
)
4747

4848

49+
def child_process_func(libname):
50+
import os
51+
52+
from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache
53+
from cuda.bindings.path_finder import _load_nvidia_dynamic_library
54+
55+
loaded_dl_fresh = _load_nvidia_dynamic_library(libname)
56+
if loaded_dl_fresh.was_already_loaded_from_elsewhere:
57+
raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere")
58+
59+
loaded_dl_from_cache = _load_nvidia_dynamic_library(libname)
60+
if loaded_dl_from_cache is not loaded_dl_fresh:
61+
raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh")
62+
63+
loaded_dl_no_cache = _load_nvidia_dynamic_library_no_cache(libname)
64+
if not loaded_dl_no_cache.was_already_loaded_from_elsewhere:
65+
raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere")
66+
if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path):
67+
raise RuntimeError(f"not os.path.samefile({loaded_dl_no_cache.abs_path=!r}, {loaded_dl_fresh.abs_path=!r})")
68+
69+
print(f"{loaded_dl_fresh.abs_path!r}")
70+
71+
4972
@pytest.mark.parametrize("libname", TEST_FIND_OR_LOAD_LIBNAMES)
5073
def test_find_or_load_nvidia_dynamic_library(info_summary_append, libname):
51-
# We intentionally run each dynamic library operation in a subprocess
74+
# We intentionally run each dynamic library operation in a child process
5275
# to ensure isolation of global dynamic linking state (e.g., dlopen handles).
53-
# Without subprocesses, loading/unloading libraries during testing could
76+
# Without child processes, loading/unloading libraries during testing could
5477
# interfere across test cases and lead to nondeterministic or platform-specific failures.
55-
#
56-
# Defining the subprocess code snippets as strings ensures each subprocess
57-
# runs a minimal, independent script tailored to the specific libname and API being tested.
58-
code = f"""\
59-
import os
60-
from cuda.bindings.path_finder import _load_nvidia_dynamic_library
61-
from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache
62-
63-
loaded_dl_fresh = _load_nvidia_dynamic_library({libname!r})
64-
if loaded_dl_fresh.was_already_loaded_from_elsewhere:
65-
raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere")
66-
67-
loaded_dl_from_cache = _load_nvidia_dynamic_library({libname!r})
68-
if loaded_dl_from_cache is not loaded_dl_fresh:
69-
raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh")
70-
71-
loaded_dl_no_cache = _load_nvidia_dynamic_library_no_cache({libname!r})
72-
if not loaded_dl_no_cache.was_already_loaded_from_elsewhere:
73-
raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere")
74-
if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path):
75-
raise RuntimeError(f"not os.path.samefile({{loaded_dl_no_cache.abs_path=!r}}, {{loaded_dl_fresh.abs_path=!r}})")
76-
77-
print(f"{{loaded_dl_fresh.abs_path!r}}")
78-
"""
79-
result = run_python_code_safely(code, timeout=30)
78+
result = spawned_process_runner.run_in_spawned_child_process(child_process_func, args=(libname,), timeout=30)
8079
if result.returncode == 0:
8180
info_summary_append(f"abs_path={result.stdout.rstrip()}")
8281
else:
83-
raise RuntimeError(build_subprocess_failed_for_libname_message(libname, result))
82+
raise RuntimeError(build_child_process_failed_for_libname_message(libname, result))

0 commit comments

Comments
 (0)