improve the comment on the LinkerOptions class

ksimpson-work · ksimpson-work · commit fc113377923d · 2024-11-14T10:56:43.000-08:00
diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
@@ -1,35 +1,149 @@
 from cuda.core.experimental._module import ObjectCode
 from cuda.core.experimental._utils import check_or_create_options
 from dataclasses import dataclass
-from typing import Optional
+from typing import Optional, List
 from cuda.bindings import nvjitlink
 
 
 @dataclass
 class LinkerOptions:
-    arch: str  # /**< -arch=sm_<N> Pass SM architecture value. See nvcc for valid values of <N>. Can use compute_<N> value instead if only generating PTX. This is a required option. */
-    max_register_count: Optional[int] = None  # /**< -maxrregcount=<N> Maximum register count. */
-    time: Optional[bool] = None  # /**< -time Print timing information to InfoLog. */
-    verbose: Optional[bool] = None  # /**< -verbose Print verbose messages to InfoLog. */
-    link_time_optimization: Optional[bool] = None  # /**< -lto Do link time optimization. */
-    ptx: Optional[bool] = None  # /**< -ptx Emit ptx after linking instead of cubin; only supported with -lto. */
-    optimization_level: Optional[int] = None  # /**< -O<N> Optimization level. Only 0 and 3 are accepted. */
-    debug: Optional[bool] = None  # /**< -g Generate debug information. */
-    lineinfo: Optional[bool] = None  # /**< -lineinfo Generate line information. */
-    ftz: Optional[bool] = None  # /**< -ftz=<n> Flush to zero. */
-    prec_div: Optional[bool] = None  # /**< -prec-div=<n> Precise divide. */
-    prec_sqrt: Optional[bool] = None  # /**< -prec-sqrt=<n> Precise square root. */
-    fma: Optional[bool] = None  # /**< -fma=<n> Fast multiply add. */
-    kernels_used: Optional[list[str]] = None  # /**< -kernels-used=<name> Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple times. */
-    variables_used: Optional[list[str]] = None  # /**< -variables-used=<name> Pass list of variables that are used; any not in the list can be removed. This option can be specified multiple times. */
-    optimize_unused_variables: Optional[bool] = None  # /**< -optimize-unused-variables Normally device code optimization is limited by not knowing what the host code references. With this option it can assume that if a variable is not referenced in device code then it can be removed. */
-    xptxas: Optional[list[str]] = None  # /**< -Xptxas=<opt> Pass <opt> to ptxas. This option can be called multiple times. */
-    split_compile: Optional[int] = None  # /**< -split-compile=<N> Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split compilation (default). */
-    split_compile_extended: Optional[int] = None  # /**< -split-compile-extended=<N> A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value. Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This option can potentially impact performance of the compiled binary. */
-    jump_table_density: Optional[int] = None  # /**< -jump-table-density=<N> When doing LTO, specify the case density percentage in switch statements, and use it as a minimal threshold to determine whether jump table(brx.idx instruction) will be used to implement a switch statement. Default value is 101. The percentage ranges from 0 to 101 inclusively. */
-    no_cache: Optional[bool] = None  # /**< -no-cache Don’t cache the intermediate steps of nvJitLink. */
-    device_stack_protector: Optional[bool] = None  # /**< -device-stack-protector Enable stack canaries in device code. Stack canaries make it more difficult to exploit certain types of memory safety bugs involving stack-local variables. The compiler uses heuristics to assess the risk of such a bug in each function. Only those functions which are deemed high-risk make use of a stack canary. */
+    """Customizable :obj:`LinkerOptions` for nvJitLink.
 
+    Attributes
+    ----------
+    arch : str
+        Pass SM architecture value. Can use compute_<N> value instead if only generating PTX.
+        This is a required option.
+        Acceptable value type: str
+        Maps to: -arch=sm_<N>
+    max_register_count : int, optional
+        Maximum register count.
+        Default: None
+        Acceptable value type: int
+        Maps to: -maxrregcount=<N>
+    time : bool, optional
+        Print timing information to InfoLog.
+        Default: False
+        Acceptable value type: bool
+        Maps to: -time
+    verbose : bool, optional
+        Print verbose messages to InfoLog.
+        Default: False
+        Acceptable value type: bool
+        Maps to: -verbose
+    link_time_optimization : bool, optional
+        Perform link time optimization.
+        Default: False
+        Acceptable value type: bool
+        Maps to: -lto
+    ptx : bool, optional
+        Emit PTX after linking instead of CUBIN; only supported with -lto.
+        Default: False
+        Acceptable value type: bool
+        Maps to: -ptx
+    optimization_level : int, optional
+        Set optimization level. Only 0 and 3 are accepted.
+        Default: None
+        Acceptable value type: int
+        Maps to: -O<N>
+    debug : bool, optional
+        Generate debug information.
+        Default: False
+        Acceptable value type: bool
+        Maps to: -g
+    lineinfo : bool, optional
+        Generate line information.
+        Default: False
+        Acceptable value type: bool
+        Maps to: -lineinfo
+    ftz : bool, optional
+        Flush denormal values to zero.
+        Default: False
+        Acceptable value type: bool
+        Maps to: -ftz=<n>
+    prec_div : bool, optional
+        Use precise division.
+        Default: True
+        Acceptable value type: bool
+        Maps to: -prec-div=<n>
+    prec_sqrt : bool, optional
+        Use precise square root.
+        Default: True
+        Acceptable value type: bool
+        Maps to: -prec-sqrt=<n>
+    fma : bool, optional
+        Use fast multiply-add.
+        Default: True
+        Acceptable value type: bool
+        Maps to: -fma=<n>
+    kernels_used : List[str], optional
+        Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple times.
+        Default: None
+        Acceptable value type: list of str
+        Maps to: -kernels-used=<name>
+    variables_used : List[str], optional
+        Pass list of variables that are used; any not in the list can be removed. This option can be specified multiple times.
+        Default: None
+        Acceptable value type: list of str
+        Maps to: -variables-used=<name>
+    optimize_unused_variables : bool, optional
+        Assume that if a variable is not referenced in device code, it can be removed.
+        Default: False
+        Acceptable value type: bool
+        Maps to: -optimize-unused-variables
+    xptxas : List[str], optional
+        Pass options to PTXAS. This option can be called multiple times.
+        Default: None
+        Acceptable value type: list of str
+        Maps to: -Xptxas=<opt>
+    split_compile : int, optional
+        Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split compilation (default).
+        Default: 1
+        Acceptable value type: int
+        Maps to: -split-compile=<N>
+    split_compile_extended : int, optional
+        A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value. Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This option can potentially impact performance of the compiled binary.
+        Default: 1
+        Acceptable value type: int
+        Maps to: -split-compile-extended=<N>
+    jump_table_density : int, optional
+        When doing LTO, specify the case density percentage in switch statements, and use it as a minimal threshold to determine whether jump table (brx.idx instruction) will be used to implement a switch statement. Default value is 101. The percentage ranges from 0 to 101 inclusively.
+        Default: 101
+        Acceptable value type: int
+        Maps to: -jump-table-density=<N>
+    no_cache : bool, optional
+        Do not cache the intermediate steps of nvJitLink.
+        Default: False
+        Acceptable value type: bool
+        Maps to: -no-cache
+    device_stack_protector : bool, optional
+        Enable stack canaries in device code. Stack canaries make it more difficult to exploit certain types of memory safety bugs involving stack-local variables. The compiler uses heuristics to assess the risk of such a bug in each function. Only those functions which are deemed high-risk make use of a stack canary.
+        Default: False
+        Acceptable value type: bool
+        Maps to: -device-stack-protector
+    """
+    arch: str
+    max_register_count: Optional[int] = None
+    time: Optional[bool] = None
+    verbose: Optional[bool] = None
+    link_time_optimization: Optional[bool] = None
+    ptx: Optional[bool] = None
+    optimization_level: Optional[int] = None
+    debug: Optional[bool] = None
+    lineinfo: Optional[bool] = None
+    ftz: Optional[bool] = None
+    prec_div: Optional[bool] = None
+    prec_sqrt: Optional[bool] = None
+    fma: Optional[bool] = None
+    kernels_used: Optional[List[str]] = None
+    variables_used: Optional[List[str]] = None
+    optimize_unused_variables: Optional[bool] = None
+    xptxas: Optional[List[str]] = None
+    split_compile: Optional[int] = None
+    split_compile_extended: Optional[int] = None
+    jump_table_density: Optional[int] = None
+    no_cache: Optional[bool] = None
+    device_stack_protector: Optional[bool] = None
 
     def __post_init__(self):
         self.formatted_options = []