Skip to content

Commit fc11337

Browse files
committed
improve the comment on the LinkerOptions class
1 parent 275eb71 commit fc11337

File tree

1 file changed

+137
-23
lines changed

1 file changed

+137
-23
lines changed

cuda_core/cuda/core/experimental/_linker.py

Lines changed: 137 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,149 @@
11
from cuda.core.experimental._module import ObjectCode
22
from cuda.core.experimental._utils import check_or_create_options
33
from dataclasses import dataclass
4-
from typing import Optional
4+
from typing import Optional, List
55
from cuda.bindings import nvjitlink
66

77

88
@dataclass
99
class LinkerOptions:
10-
arch: str # /**< -arch=sm_<N> Pass SM architecture value. See nvcc for valid values of <N>. Can use compute_<N> value instead if only generating PTX. This is a required option. */
11-
max_register_count: Optional[int] = None # /**< -maxrregcount=<N> Maximum register count. */
12-
time: Optional[bool] = None # /**< -time Print timing information to InfoLog. */
13-
verbose: Optional[bool] = None # /**< -verbose Print verbose messages to InfoLog. */
14-
link_time_optimization: Optional[bool] = None # /**< -lto Do link time optimization. */
15-
ptx: Optional[bool] = None # /**< -ptx Emit ptx after linking instead of cubin; only supported with -lto. */
16-
optimization_level: Optional[int] = None # /**< -O<N> Optimization level. Only 0 and 3 are accepted. */
17-
debug: Optional[bool] = None # /**< -g Generate debug information. */
18-
lineinfo: Optional[bool] = None # /**< -lineinfo Generate line information. */
19-
ftz: Optional[bool] = None # /**< -ftz=<n> Flush to zero. */
20-
prec_div: Optional[bool] = None # /**< -prec-div=<n> Precise divide. */
21-
prec_sqrt: Optional[bool] = None # /**< -prec-sqrt=<n> Precise square root. */
22-
fma: Optional[bool] = None # /**< -fma=<n> Fast multiply add. */
23-
kernels_used: Optional[list[str]] = None # /**< -kernels-used=<name> Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple times. */
24-
variables_used: Optional[list[str]] = None # /**< -variables-used=<name> Pass list of variables that are used; any not in the list can be removed. This option can be specified multiple times. */
25-
optimize_unused_variables: Optional[bool] = None # /**< -optimize-unused-variables Normally device code optimization is limited by not knowing what the host code references. With this option it can assume that if a variable is not referenced in device code then it can be removed. */
26-
xptxas: Optional[list[str]] = None # /**< -Xptxas=<opt> Pass <opt> to ptxas. This option can be called multiple times. */
27-
split_compile: Optional[int] = None # /**< -split-compile=<N> Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split compilation (default). */
28-
split_compile_extended: Optional[int] = None # /**< -split-compile-extended=<N> A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value. Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This option can potentially impact performance of the compiled binary. */
29-
jump_table_density: Optional[int] = None # /**< -jump-table-density=<N> When doing LTO, specify the case density percentage in switch statements, and use it as a minimal threshold to determine whether jump table(brx.idx instruction) will be used to implement a switch statement. Default value is 101. The percentage ranges from 0 to 101 inclusively. */
30-
no_cache: Optional[bool] = None # /**< -no-cache Don’t cache the intermediate steps of nvJitLink. */
31-
device_stack_protector: Optional[bool] = None # /**< -device-stack-protector Enable stack canaries in device code. Stack canaries make it more difficult to exploit certain types of memory safety bugs involving stack-local variables. The compiler uses heuristics to assess the risk of such a bug in each function. Only those functions which are deemed high-risk make use of a stack canary. */
10+
"""Customizable :obj:`LinkerOptions` for nvJitLink.
3211
12+
Attributes
13+
----------
14+
arch : str
15+
Pass SM architecture value. Can use compute_<N> value instead if only generating PTX.
16+
This is a required option.
17+
Acceptable value type: str
18+
Maps to: -arch=sm_<N>
19+
max_register_count : int, optional
20+
Maximum register count.
21+
Default: None
22+
Acceptable value type: int
23+
Maps to: -maxrregcount=<N>
24+
time : bool, optional
25+
Print timing information to InfoLog.
26+
Default: False
27+
Acceptable value type: bool
28+
Maps to: -time
29+
verbose : bool, optional
30+
Print verbose messages to InfoLog.
31+
Default: False
32+
Acceptable value type: bool
33+
Maps to: -verbose
34+
link_time_optimization : bool, optional
35+
Perform link time optimization.
36+
Default: False
37+
Acceptable value type: bool
38+
Maps to: -lto
39+
ptx : bool, optional
40+
Emit PTX after linking instead of CUBIN; only supported with -lto.
41+
Default: False
42+
Acceptable value type: bool
43+
Maps to: -ptx
44+
optimization_level : int, optional
45+
Set optimization level. Only 0 and 3 are accepted.
46+
Default: None
47+
Acceptable value type: int
48+
Maps to: -O<N>
49+
debug : bool, optional
50+
Generate debug information.
51+
Default: False
52+
Acceptable value type: bool
53+
Maps to: -g
54+
lineinfo : bool, optional
55+
Generate line information.
56+
Default: False
57+
Acceptable value type: bool
58+
Maps to: -lineinfo
59+
ftz : bool, optional
60+
Flush denormal values to zero.
61+
Default: False
62+
Acceptable value type: bool
63+
Maps to: -ftz=<n>
64+
prec_div : bool, optional
65+
Use precise division.
66+
Default: True
67+
Acceptable value type: bool
68+
Maps to: -prec-div=<n>
69+
prec_sqrt : bool, optional
70+
Use precise square root.
71+
Default: True
72+
Acceptable value type: bool
73+
Maps to: -prec-sqrt=<n>
74+
fma : bool, optional
75+
Use fast multiply-add.
76+
Default: True
77+
Acceptable value type: bool
78+
Maps to: -fma=<n>
79+
kernels_used : List[str], optional
80+
Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple times.
81+
Default: None
82+
Acceptable value type: list of str
83+
Maps to: -kernels-used=<name>
84+
variables_used : List[str], optional
85+
Pass list of variables that are used; any not in the list can be removed. This option can be specified multiple times.
86+
Default: None
87+
Acceptable value type: list of str
88+
Maps to: -variables-used=<name>
89+
optimize_unused_variables : bool, optional
90+
Assume that if a variable is not referenced in device code, it can be removed.
91+
Default: False
92+
Acceptable value type: bool
93+
Maps to: -optimize-unused-variables
94+
xptxas : List[str], optional
95+
Pass options to PTXAS. This option can be called multiple times.
96+
Default: None
97+
Acceptable value type: list of str
98+
Maps to: -Xptxas=<opt>
99+
split_compile : int, optional
100+
Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split compilation (default).
101+
Default: 1
102+
Acceptable value type: int
103+
Maps to: -split-compile=<N>
104+
split_compile_extended : int, optional
105+
A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value. Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This option can potentially impact performance of the compiled binary.
106+
Default: 1
107+
Acceptable value type: int
108+
Maps to: -split-compile-extended=<N>
109+
jump_table_density : int, optional
110+
When doing LTO, specify the case density percentage in switch statements, and use it as a minimal threshold to determine whether jump table (brx.idx instruction) will be used to implement a switch statement. Default value is 101. The percentage ranges from 0 to 101 inclusively.
111+
Default: 101
112+
Acceptable value type: int
113+
Maps to: -jump-table-density=<N>
114+
no_cache : bool, optional
115+
Do not cache the intermediate steps of nvJitLink.
116+
Default: False
117+
Acceptable value type: bool
118+
Maps to: -no-cache
119+
device_stack_protector : bool, optional
120+
Enable stack canaries in device code. Stack canaries make it more difficult to exploit certain types of memory safety bugs involving stack-local variables. The compiler uses heuristics to assess the risk of such a bug in each function. Only those functions which are deemed high-risk make use of a stack canary.
121+
Default: False
122+
Acceptable value type: bool
123+
Maps to: -device-stack-protector
124+
"""
125+
arch: str
126+
max_register_count: Optional[int] = None
127+
time: Optional[bool] = None
128+
verbose: Optional[bool] = None
129+
link_time_optimization: Optional[bool] = None
130+
ptx: Optional[bool] = None
131+
optimization_level: Optional[int] = None
132+
debug: Optional[bool] = None
133+
lineinfo: Optional[bool] = None
134+
ftz: Optional[bool] = None
135+
prec_div: Optional[bool] = None
136+
prec_sqrt: Optional[bool] = None
137+
fma: Optional[bool] = None
138+
kernels_used: Optional[List[str]] = None
139+
variables_used: Optional[List[str]] = None
140+
optimize_unused_variables: Optional[bool] = None
141+
xptxas: Optional[List[str]] = None
142+
split_compile: Optional[int] = None
143+
split_compile_extended: Optional[int] = None
144+
jump_table_density: Optional[int] = None
145+
no_cache: Optional[bool] = None
146+
device_stack_protector: Optional[bool] = None
33147

34148
def __post_init__(self):
35149
self.formatted_options = []

0 commit comments

Comments
 (0)