|
1 | 1 | from cuda.core.experimental._module import ObjectCode
|
2 | 2 | from cuda.core.experimental._utils import check_or_create_options
|
3 | 3 | from dataclasses import dataclass
|
4 |
| -from typing import Optional |
| 4 | +from typing import Optional, List |
5 | 5 | from cuda.bindings import nvjitlink
|
6 | 6 |
|
7 | 7 |
|
8 | 8 | @dataclass
|
9 | 9 | class LinkerOptions:
|
10 |
| - arch: str # /**< -arch=sm_<N> Pass SM architecture value. See nvcc for valid values of <N>. Can use compute_<N> value instead if only generating PTX. This is a required option. */ |
11 |
| - max_register_count: Optional[int] = None # /**< -maxrregcount=<N> Maximum register count. */ |
12 |
| - time: Optional[bool] = None # /**< -time Print timing information to InfoLog. */ |
13 |
| - verbose: Optional[bool] = None # /**< -verbose Print verbose messages to InfoLog. */ |
14 |
| - link_time_optimization: Optional[bool] = None # /**< -lto Do link time optimization. */ |
15 |
| - ptx: Optional[bool] = None # /**< -ptx Emit ptx after linking instead of cubin; only supported with -lto. */ |
16 |
| - optimization_level: Optional[int] = None # /**< -O<N> Optimization level. Only 0 and 3 are accepted. */ |
17 |
| - debug: Optional[bool] = None # /**< -g Generate debug information. */ |
18 |
| - lineinfo: Optional[bool] = None # /**< -lineinfo Generate line information. */ |
19 |
| - ftz: Optional[bool] = None # /**< -ftz=<n> Flush to zero. */ |
20 |
| - prec_div: Optional[bool] = None # /**< -prec-div=<n> Precise divide. */ |
21 |
| - prec_sqrt: Optional[bool] = None # /**< -prec-sqrt=<n> Precise square root. */ |
22 |
| - fma: Optional[bool] = None # /**< -fma=<n> Fast multiply add. */ |
23 |
| - kernels_used: Optional[list[str]] = None # /**< -kernels-used=<name> Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple times. */ |
24 |
| - variables_used: Optional[list[str]] = None # /**< -variables-used=<name> Pass list of variables that are used; any not in the list can be removed. This option can be specified multiple times. */ |
25 |
| - optimize_unused_variables: Optional[bool] = None # /**< -optimize-unused-variables Normally device code optimization is limited by not knowing what the host code references. With this option it can assume that if a variable is not referenced in device code then it can be removed. */ |
26 |
| - xptxas: Optional[list[str]] = None # /**< -Xptxas=<opt> Pass <opt> to ptxas. This option can be called multiple times. */ |
27 |
| - split_compile: Optional[int] = None # /**< -split-compile=<N> Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split compilation (default). */ |
28 |
| - split_compile_extended: Optional[int] = None # /**< -split-compile-extended=<N> A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value. Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This option can potentially impact performance of the compiled binary. */ |
29 |
| - jump_table_density: Optional[int] = None # /**< -jump-table-density=<N> When doing LTO, specify the case density percentage in switch statements, and use it as a minimal threshold to determine whether jump table(brx.idx instruction) will be used to implement a switch statement. Default value is 101. The percentage ranges from 0 to 101 inclusively. */ |
30 |
| - no_cache: Optional[bool] = None # /**< -no-cache Don’t cache the intermediate steps of nvJitLink. */ |
31 |
| - device_stack_protector: Optional[bool] = None # /**< -device-stack-protector Enable stack canaries in device code. Stack canaries make it more difficult to exploit certain types of memory safety bugs involving stack-local variables. The compiler uses heuristics to assess the risk of such a bug in each function. Only those functions which are deemed high-risk make use of a stack canary. */ |
| 10 | + """Customizable :obj:`LinkerOptions` for nvJitLink. |
32 | 11 |
|
| 12 | + Attributes |
| 13 | + ---------- |
| 14 | + arch : str |
| 15 | + Pass SM architecture value. Can use compute_<N> value instead if only generating PTX. |
| 16 | + This is a required option. |
| 17 | + Acceptable value type: str |
| 18 | + Maps to: -arch=sm_<N> |
| 19 | + max_register_count : int, optional |
| 20 | + Maximum register count. |
| 21 | + Default: None |
| 22 | + Acceptable value type: int |
| 23 | + Maps to: -maxrregcount=<N> |
| 24 | + time : bool, optional |
| 25 | + Print timing information to InfoLog. |
| 26 | + Default: False |
| 27 | + Acceptable value type: bool |
| 28 | + Maps to: -time |
| 29 | + verbose : bool, optional |
| 30 | + Print verbose messages to InfoLog. |
| 31 | + Default: False |
| 32 | + Acceptable value type: bool |
| 33 | + Maps to: -verbose |
| 34 | + link_time_optimization : bool, optional |
| 35 | + Perform link time optimization. |
| 36 | + Default: False |
| 37 | + Acceptable value type: bool |
| 38 | + Maps to: -lto |
| 39 | + ptx : bool, optional |
| 40 | + Emit PTX after linking instead of CUBIN; only supported with -lto. |
| 41 | + Default: False |
| 42 | + Acceptable value type: bool |
| 43 | + Maps to: -ptx |
| 44 | + optimization_level : int, optional |
| 45 | + Set optimization level. Only 0 and 3 are accepted. |
| 46 | + Default: None |
| 47 | + Acceptable value type: int |
| 48 | + Maps to: -O<N> |
| 49 | + debug : bool, optional |
| 50 | + Generate debug information. |
| 51 | + Default: False |
| 52 | + Acceptable value type: bool |
| 53 | + Maps to: -g |
| 54 | + lineinfo : bool, optional |
| 55 | + Generate line information. |
| 56 | + Default: False |
| 57 | + Acceptable value type: bool |
| 58 | + Maps to: -lineinfo |
| 59 | + ftz : bool, optional |
| 60 | + Flush denormal values to zero. |
| 61 | + Default: False |
| 62 | + Acceptable value type: bool |
| 63 | + Maps to: -ftz=<n> |
| 64 | + prec_div : bool, optional |
| 65 | + Use precise division. |
| 66 | + Default: True |
| 67 | + Acceptable value type: bool |
| 68 | + Maps to: -prec-div=<n> |
| 69 | + prec_sqrt : bool, optional |
| 70 | + Use precise square root. |
| 71 | + Default: True |
| 72 | + Acceptable value type: bool |
| 73 | + Maps to: -prec-sqrt=<n> |
| 74 | + fma : bool, optional |
| 75 | + Use fast multiply-add. |
| 76 | + Default: True |
| 77 | + Acceptable value type: bool |
| 78 | + Maps to: -fma=<n> |
| 79 | + kernels_used : List[str], optional |
| 80 | + Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple times. |
| 81 | + Default: None |
| 82 | + Acceptable value type: list of str |
| 83 | + Maps to: -kernels-used=<name> |
| 84 | + variables_used : List[str], optional |
| 85 | + Pass list of variables that are used; any not in the list can be removed. This option can be specified multiple times. |
| 86 | + Default: None |
| 87 | + Acceptable value type: list of str |
| 88 | + Maps to: -variables-used=<name> |
| 89 | + optimize_unused_variables : bool, optional |
| 90 | + Assume that if a variable is not referenced in device code, it can be removed. |
| 91 | + Default: False |
| 92 | + Acceptable value type: bool |
| 93 | + Maps to: -optimize-unused-variables |
| 94 | + xptxas : List[str], optional |
| 95 | + Pass options to PTXAS. This option can be called multiple times. |
| 96 | + Default: None |
| 97 | + Acceptable value type: list of str |
| 98 | + Maps to: -Xptxas=<opt> |
| 99 | + split_compile : int, optional |
| 100 | + Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split compilation (default). |
| 101 | + Default: 1 |
| 102 | + Acceptable value type: int |
| 103 | + Maps to: -split-compile=<N> |
| 104 | + split_compile_extended : int, optional |
| 105 | + A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value. Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This option can potentially impact performance of the compiled binary. |
| 106 | + Default: 1 |
| 107 | + Acceptable value type: int |
| 108 | + Maps to: -split-compile-extended=<N> |
| 109 | + jump_table_density : int, optional |
| 110 | + When doing LTO, specify the case density percentage in switch statements, and use it as a minimal threshold to determine whether jump table (brx.idx instruction) will be used to implement a switch statement. Default value is 101. The percentage ranges from 0 to 101 inclusively. |
| 111 | + Default: 101 |
| 112 | + Acceptable value type: int |
| 113 | + Maps to: -jump-table-density=<N> |
| 114 | + no_cache : bool, optional |
| 115 | + Do not cache the intermediate steps of nvJitLink. |
| 116 | + Default: False |
| 117 | + Acceptable value type: bool |
| 118 | + Maps to: -no-cache |
| 119 | + device_stack_protector : bool, optional |
| 120 | + Enable stack canaries in device code. Stack canaries make it more difficult to exploit certain types of memory safety bugs involving stack-local variables. The compiler uses heuristics to assess the risk of such a bug in each function. Only those functions which are deemed high-risk make use of a stack canary. |
| 121 | + Default: False |
| 122 | + Acceptable value type: bool |
| 123 | + Maps to: -device-stack-protector |
| 124 | + """ |
| 125 | + arch: str |
| 126 | + max_register_count: Optional[int] = None |
| 127 | + time: Optional[bool] = None |
| 128 | + verbose: Optional[bool] = None |
| 129 | + link_time_optimization: Optional[bool] = None |
| 130 | + ptx: Optional[bool] = None |
| 131 | + optimization_level: Optional[int] = None |
| 132 | + debug: Optional[bool] = None |
| 133 | + lineinfo: Optional[bool] = None |
| 134 | + ftz: Optional[bool] = None |
| 135 | + prec_div: Optional[bool] = None |
| 136 | + prec_sqrt: Optional[bool] = None |
| 137 | + fma: Optional[bool] = None |
| 138 | + kernels_used: Optional[List[str]] = None |
| 139 | + variables_used: Optional[List[str]] = None |
| 140 | + optimize_unused_variables: Optional[bool] = None |
| 141 | + xptxas: Optional[List[str]] = None |
| 142 | + split_compile: Optional[int] = None |
| 143 | + split_compile_extended: Optional[int] = None |
| 144 | + jump_table_density: Optional[int] = None |
| 145 | + no_cache: Optional[bool] = None |
| 146 | + device_stack_protector: Optional[bool] = None |
33 | 147 |
|
34 | 148 | def __post_init__(self):
|
35 | 149 | self.formatted_options = []
|
|
0 commit comments