3
3
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
4
4
5
5
import ctypes
6
- import warnings
7
6
import weakref
8
7
from contextlib import contextmanager
9
8
from dataclasses import dataclass
10
9
from typing import List , Optional
10
+ from warnings import warn
11
11
12
12
from cuda .core .experimental ._device import Device
13
13
from cuda .core .experimental ._module import ObjectCode
23
23
24
24
25
25
# Note: this function is reused in the tests
26
- def _decide_nvjitlink_or_driver ():
26
+ def _decide_nvjitlink_or_driver () -> bool :
27
27
"""Returns True if falling back to the cuLink* driver APIs."""
28
28
global _driver_ver , _driver , _nvjitlink
29
29
if _driver or _nvjitlink :
30
- return
30
+ return _driver is not None
31
31
32
32
_driver_ver = handle_return (driver .cuDriverGetVersion ())
33
33
_driver_ver = (_driver_ver // 1000 , (_driver_ver % 1000 ) // 10 )
@@ -43,7 +43,7 @@ def _decide_nvjitlink_or_driver():
43
43
_nvjitlink = None
44
44
45
45
if _nvjitlink is None :
46
- warnings . warn (
46
+ warn (
47
47
"nvJitLink is not installed or too old (<12.3). Therefore it is not usable "
48
48
"and the culink APIs will be used instead." ,
49
49
stacklevel = 3 ,
@@ -98,78 +98,59 @@ class LinkerOptions:
98
98
will be used.
99
99
max_register_count : int, optional
100
100
Maximum register count.
101
- Maps to: ``-maxrregcount=<N>``.
102
101
time : bool, optional
103
102
Print timing information to the info log.
104
- Maps to ``-time``.
105
103
Default: False.
106
104
verbose : bool, optional
107
105
Print verbose messages to the info log.
108
- Maps to ``-verbose``.
109
106
Default: False.
110
107
link_time_optimization : bool, optional
111
108
Perform link time optimization.
112
- Maps to: ``-lto``.
113
109
Default: False.
114
110
ptx : bool, optional
115
- Emit PTX after linking instead of CUBIN; only supported with ``-lto``.
116
- Maps to ``-ptx``.
111
+ Emit PTX after linking instead of CUBIN; only supported with ``link_time_optimization=True``.
117
112
Default: False.
118
113
optimization_level : int, optional
119
114
Set optimization level. Only 0 and 3 are accepted.
120
- Maps to ``-O<N>``.
121
115
debug : bool, optional
122
116
Generate debug information.
123
- Maps to ``-g``
124
117
Default: False.
125
118
lineinfo : bool, optional
126
119
Generate line information.
127
- Maps to ``-lineinfo``.
128
120
Default: False.
129
121
ftz : bool, optional
130
122
Flush denormal values to zero.
131
- Maps to ``-ftz=<n>``.
132
123
Default: False.
133
124
prec_div : bool, optional
134
125
Use precise division.
135
- Maps to ``-prec-div=<n>``.
136
126
Default: True.
137
127
prec_sqrt : bool, optional
138
128
Use precise square root.
139
- Maps to ``-prec-sqrt=<n>``.
140
129
Default: True.
141
130
fma : bool, optional
142
131
Use fast multiply-add.
143
- Maps to ``-fma=<n>``.
144
132
Default: True.
145
133
kernels_used : List[str], optional
146
134
Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple
147
135
times.
148
- Maps to ``-kernels-used=<name>``.
149
136
variables_used : List[str], optional
150
137
Pass a list of variables that are used; any not in the list can be removed.
151
- Maps to ``-variables-used=<name>``
152
138
optimize_unused_variables : bool, optional
153
139
Assume that if a variable is not referenced in device code, it can be removed.
154
- Maps to: ``-optimize-unused-variables``
155
140
Default: False.
156
- xptxas : List[str], optional
141
+ ptxas_options : List[str], optional
157
142
Pass options to PTXAS.
158
- Maps to: ``-Xptxas=<opt>``.
159
143
split_compile : int, optional
160
144
Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split
161
145
compilation (default).
162
- Maps to ``-split-compile=<N>``.
163
146
Default: 1.
164
147
split_compile_extended : int, optional
165
148
A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value.
166
149
Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This
167
150
option can potentially impact performance of the compiled binary.
168
- Maps to ``-split-compile-extended=<N>``.
169
151
Default: 1.
170
152
no_cache : bool, optional
171
153
Do not cache the intermediate steps of nvJitLink.
172
- Maps to ``-no-cache``.
173
154
Default: False.
174
155
"""
175
156
@@ -189,7 +170,7 @@ class LinkerOptions:
189
170
kernels_used : Optional [List [str ]] = None
190
171
variables_used : Optional [List [str ]] = None
191
172
optimize_unused_variables : Optional [bool ] = None
192
- xptxas : Optional [List [str ]] = None
173
+ ptxas_options : Optional [List [str ]] = None
193
174
split_compile : Optional [int ] = None
194
175
split_compile_extended : Optional [int ] = None
195
176
no_cache : Optional [bool ] = None
@@ -239,8 +220,8 @@ def _init_nvjitlink(self):
239
220
self .formatted_options .append (f"-variables-used={ variable } " )
240
221
if self .optimize_unused_variables is not None :
241
222
self .formatted_options .append ("-optimize-unused-variables" )
242
- if self .xptxas is not None :
243
- for opt in self .xptxas :
223
+ if self .ptxas_options is not None :
224
+ for opt in self .ptxas_options :
244
225
self .formatted_options .append (f"-Xptxas={ opt } " )
245
226
if self .split_compile is not None :
246
227
self .formatted_options .append (f"-split-compile={ self .split_compile } " )
@@ -290,21 +271,21 @@ def _init_driver(self):
290
271
self .formatted_options .append (1 )
291
272
self .option_keys .append (_driver .CUjit_option .CU_JIT_GENERATE_LINE_INFO )
292
273
if self .ftz is not None :
293
- raise ValueError ("ftz option is deprecated in the driver API" )
274
+ warn ("ftz option is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
294
275
if self .prec_div is not None :
295
- raise ValueError ("prec_div option is deprecated in the driver API" )
276
+ warn ("prec_div option is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
296
277
if self .prec_sqrt is not None :
297
- raise ValueError ("prec_sqrt option is deprecated in the driver API" )
278
+ warn ("prec_sqrt option is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
298
279
if self .fma is not None :
299
- raise ValueError ("fma options is deprecated in the driver API" )
280
+ warn ("fma options is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
300
281
if self .kernels_used is not None :
301
- raise ValueError ("kernels_used is deprecated in the driver API" )
282
+ warn ("kernels_used is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
302
283
if self .variables_used is not None :
303
- raise ValueError ("variables_used is deprecated in the driver API" )
284
+ warn ("variables_used is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
304
285
if self .optimize_unused_variables is not None :
305
- raise ValueError ("optimize_unused_variables is deprecated in the driver API" )
306
- if self .xptxas is not None :
307
- raise ValueError ("xptxas option is not supported by the driver API" )
286
+ warn ("optimize_unused_variables is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
287
+ if self .ptxas_options is not None :
288
+ raise ValueError ("ptxas_options option is not supported by the driver API" )
308
289
if self .split_compile is not None :
309
290
raise ValueError ("split_compile option is not supported by the driver API" )
310
291
if self .split_compile_extended is not None :
0 commit comments