Skip to content

Commit fc63838

Browse files
authored
Merge branch 'main' into update-release-checklist
2 parents 19e3412 + 8ed9c03 commit fc63838

File tree

11 files changed

+161
-135
lines changed

11 files changed

+161
-135
lines changed

cuda_bindings/docs/build_docs.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ if [[ -z "${SPHINX_CUDA_BINDINGS_VER}" ]]; then
2323
fi
2424

2525
# build the docs (in parallel)
26-
SPHINXOPTS="-j 4" make html
26+
SPHINXOPTS="-j 4 -d build/.doctrees" make html
2727

2828
# for debugging/developing (conf.py), please comment out the above line and
2929
# use the line below instead, as we must build in serial to avoid getting

cuda_bindings/docs/source/overview.md

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,3 @@
1-
---
2-
jupytext:
3-
text_representation:
4-
format_name: myst
5-
kernelspec:
6-
display_name: Python 3
7-
name: python3
8-
---
9-
101
# Overview
112

123
<p style="font-size: 14px; color: grey; text-align: right;">by <a

cuda_core/cuda/core/experimental/_linker.py

Lines changed: 18 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44

55
import ctypes
6-
import warnings
76
import weakref
87
from contextlib import contextmanager
98
from dataclasses import dataclass
109
from typing import List, Optional
10+
from warnings import warn
1111

1212
from cuda.core.experimental._device import Device
1313
from cuda.core.experimental._module import ObjectCode
@@ -23,11 +23,11 @@
2323

2424

2525
# Note: this function is reused in the tests
26-
def _decide_nvjitlink_or_driver():
26+
def _decide_nvjitlink_or_driver() -> bool:
2727
"""Returns True if falling back to the cuLink* driver APIs."""
2828
global _driver_ver, _driver, _nvjitlink
2929
if _driver or _nvjitlink:
30-
return
30+
return _driver is not None
3131

3232
_driver_ver = handle_return(driver.cuDriverGetVersion())
3333
_driver_ver = (_driver_ver // 1000, (_driver_ver % 1000) // 10)
@@ -43,7 +43,7 @@ def _decide_nvjitlink_or_driver():
4343
_nvjitlink = None
4444

4545
if _nvjitlink is None:
46-
warnings.warn(
46+
warn(
4747
"nvJitLink is not installed or too old (<12.3). Therefore it is not usable "
4848
"and the culink APIs will be used instead.",
4949
stacklevel=3,
@@ -98,78 +98,59 @@ class LinkerOptions:
9898
will be used.
9999
max_register_count : int, optional
100100
Maximum register count.
101-
Maps to: ``-maxrregcount=<N>``.
102101
time : bool, optional
103102
Print timing information to the info log.
104-
Maps to ``-time``.
105103
Default: False.
106104
verbose : bool, optional
107105
Print verbose messages to the info log.
108-
Maps to ``-verbose``.
109106
Default: False.
110107
link_time_optimization : bool, optional
111108
Perform link time optimization.
112-
Maps to: ``-lto``.
113109
Default: False.
114110
ptx : bool, optional
115-
Emit PTX after linking instead of CUBIN; only supported with ``-lto``.
116-
Maps to ``-ptx``.
111+
Emit PTX after linking instead of CUBIN; only supported with ``link_time_optimization=True``.
117112
Default: False.
118113
optimization_level : int, optional
119114
Set optimization level. Only 0 and 3 are accepted.
120-
Maps to ``-O<N>``.
121115
debug : bool, optional
122116
Generate debug information.
123-
Maps to ``-g``
124117
Default: False.
125118
lineinfo : bool, optional
126119
Generate line information.
127-
Maps to ``-lineinfo``.
128120
Default: False.
129121
ftz : bool, optional
130122
Flush denormal values to zero.
131-
Maps to ``-ftz=<n>``.
132123
Default: False.
133124
prec_div : bool, optional
134125
Use precise division.
135-
Maps to ``-prec-div=<n>``.
136126
Default: True.
137127
prec_sqrt : bool, optional
138128
Use precise square root.
139-
Maps to ``-prec-sqrt=<n>``.
140129
Default: True.
141130
fma : bool, optional
142131
Use fast multiply-add.
143-
Maps to ``-fma=<n>``.
144132
Default: True.
145133
kernels_used : List[str], optional
146134
Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple
147135
times.
148-
Maps to ``-kernels-used=<name>``.
149136
variables_used : List[str], optional
150137
Pass a list of variables that are used; any not in the list can be removed.
151-
Maps to ``-variables-used=<name>``
152138
optimize_unused_variables : bool, optional
153139
Assume that if a variable is not referenced in device code, it can be removed.
154-
Maps to: ``-optimize-unused-variables``
155140
Default: False.
156-
xptxas : List[str], optional
141+
ptxas_options : List[str], optional
157142
Pass options to PTXAS.
158-
Maps to: ``-Xptxas=<opt>``.
159143
split_compile : int, optional
160144
Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split
161145
compilation (default).
162-
Maps to ``-split-compile=<N>``.
163146
Default: 1.
164147
split_compile_extended : int, optional
165148
A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value.
166149
Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This
167150
option can potentially impact performance of the compiled binary.
168-
Maps to ``-split-compile-extended=<N>``.
169151
Default: 1.
170152
no_cache : bool, optional
171153
Do not cache the intermediate steps of nvJitLink.
172-
Maps to ``-no-cache``.
173154
Default: False.
174155
"""
175156

@@ -189,7 +170,7 @@ class LinkerOptions:
189170
kernels_used: Optional[List[str]] = None
190171
variables_used: Optional[List[str]] = None
191172
optimize_unused_variables: Optional[bool] = None
192-
xptxas: Optional[List[str]] = None
173+
ptxas_options: Optional[List[str]] = None
193174
split_compile: Optional[int] = None
194175
split_compile_extended: Optional[int] = None
195176
no_cache: Optional[bool] = None
@@ -239,8 +220,8 @@ def _init_nvjitlink(self):
239220
self.formatted_options.append(f"-variables-used={variable}")
240221
if self.optimize_unused_variables is not None:
241222
self.formatted_options.append("-optimize-unused-variables")
242-
if self.xptxas is not None:
243-
for opt in self.xptxas:
223+
if self.ptxas_options is not None:
224+
for opt in self.ptxas_options:
244225
self.formatted_options.append(f"-Xptxas={opt}")
245226
if self.split_compile is not None:
246227
self.formatted_options.append(f"-split-compile={self.split_compile}")
@@ -290,21 +271,21 @@ def _init_driver(self):
290271
self.formatted_options.append(1)
291272
self.option_keys.append(_driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO)
292273
if self.ftz is not None:
293-
raise ValueError("ftz option is deprecated in the driver API")
274+
warn("ftz option is deprecated in the driver API", DeprecationWarning, stacklevel=3)
294275
if self.prec_div is not None:
295-
raise ValueError("prec_div option is deprecated in the driver API")
276+
warn("prec_div option is deprecated in the driver API", DeprecationWarning, stacklevel=3)
296277
if self.prec_sqrt is not None:
297-
raise ValueError("prec_sqrt option is deprecated in the driver API")
278+
warn("prec_sqrt option is deprecated in the driver API", DeprecationWarning, stacklevel=3)
298279
if self.fma is not None:
299-
raise ValueError("fma options is deprecated in the driver API")
280+
warn("fma options is deprecated in the driver API", DeprecationWarning, stacklevel=3)
300281
if self.kernels_used is not None:
301-
raise ValueError("kernels_used is deprecated in the driver API")
282+
warn("kernels_used is deprecated in the driver API", DeprecationWarning, stacklevel=3)
302283
if self.variables_used is not None:
303-
raise ValueError("variables_used is deprecated in the driver API")
284+
warn("variables_used is deprecated in the driver API", DeprecationWarning, stacklevel=3)
304285
if self.optimize_unused_variables is not None:
305-
raise ValueError("optimize_unused_variables is deprecated in the driver API")
306-
if self.xptxas is not None:
307-
raise ValueError("xptxas option is not supported by the driver API")
286+
warn("optimize_unused_variables is deprecated in the driver API", DeprecationWarning, stacklevel=3)
287+
if self.ptxas_options is not None:
288+
raise ValueError("ptxas_options option is not supported by the driver API")
308289
if self.split_compile is not None:
309290
raise ValueError("split_compile option is not supported by the driver API")
310291
if self.split_compile_extended is not None:

cuda_core/cuda/core/experimental/_memoryview.pyx

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,20 @@ cdef class StridedMemoryView:
4848
----------
4949
ptr : int
5050
Pointer to the tensor buffer (as a Python `int`).
51-
shape: tuple
51+
shape : tuple
5252
Shape of the tensor.
53-
strides: tuple
53+
strides : tuple
5454
Strides of the tensor (in **counts**, not bytes).
5555
dtype: numpy.dtype
5656
Data type of the tensor.
57-
device_id: int
57+
device_id : int
5858
The device ID for where the tensor is located. It is -1 for CPU tensors
5959
(meaning those only accessible from the host).
60-
is_device_accessible: bool
60+
is_device_accessible : bool
6161
Whether the tensor data can be accessed on the GPU.
6262
readonly: bool
6363
Whether the tensor data can be modified in place.
64-
exporting_obj: Any
64+
exporting_obj : Any
6565
A reference to the original tensor object that is being viewed.
6666
6767
Parameters
@@ -334,7 +334,8 @@ cdef StridedMemoryView view_as_cai(obj, stream_ptr, view=None):
334334

335335

336336
def args_viewable_as_strided_memory(tuple arg_indices):
337-
"""Decorator to create proxy objects to :obj:`StridedMemoryView` for the
337+
"""
338+
Decorator to create proxy objects to :obj:`StridedMemoryView` for the
338339
specified positional arguments.
339340
340341
This allows array/tensor attributes to be accessed inside the function

0 commit comments

Comments
 (0)