|
11 | 11 | //! [llvm_docs]:
|
12 | 12 | //! https://llvm.org/docs/NVPTXUsage.html
|
13 | 13 |
|
| 14 | +use core::ffi::c_void; |
| 15 | + |
14 | 16 | #[allow(improper_ctypes)]
|
15 | 17 | extern "C" {
|
16 | 18 | #[link_name = "llvm.nvvm.barrier0"]
|
@@ -124,3 +126,88 @@ pub unsafe fn _thread_idx_z() -> i32 {
|
124 | 126 | pub unsafe fn trap() -> ! {
|
125 | 127 | crate::intrinsics::abort()
|
126 | 128 | }
|
| 129 | + |
| 130 | +// Basic CUDA syscall declarations. |
| 131 | +extern "C" { |
| 132 | + /// Print formatted output from a kernel to a host-side output stream. |
| 133 | + /// |
| 134 | + /// Syscall arguments: |
| 135 | + /// * `status`: The status value that is returned by `vprintf`. |
| 136 | + /// * `format`: A pointer to the format specifier input (uses common `printf` format). |
| 137 | + /// * `valist`: A pointer to the valist input. |
| 138 | + /// |
| 139 | + /// ``` |
| 140 | + /// #[repr(C)] |
| 141 | + /// struct PrintArgs(f32, f32, f32, i32); |
| 142 | + /// |
| 143 | + /// vprintf( |
| 144 | + /// "int(%f + %f) = int(%f) = %d\n".as_ptr(), |
| 145 | + /// transmute(&PrintArgs(a, b, a + b, (a + b) as i32)), |
| 146 | + /// ); |
| 147 | + /// ``` |
| 148 | + /// |
| 149 | + /// Sources: |
| 150 | + /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output), |
| 151 | + /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). |
| 152 | + pub fn vprintf(format: *const u8, valist: *const c_void) -> i32; |
| 153 | + |
| 154 | + /// Allocate memory dynamically from a fixed-size heap in global memory. |
| 155 | + /// |
| 156 | + /// The CUDA in-kernel `malloc()` function allocates at least `size` bytes |
| 157 | + /// from the device heap and returns a pointer to the allocated memory |
| 158 | + /// or `NULL` if insufficient memory exists to fulfill the request. |
| 159 | + /// |
| 160 | + /// The returned pointer is guaranteed to be aligned to a 16-byte boundary. |
| 161 | + /// |
| 162 | + /// The memory allocated by a given CUDA thread via `malloc()` remains allocated |
| 163 | + /// for the lifetime of the CUDA context, or until it is explicitly released |
| 164 | + /// by a call to `free()`. It can be used by any other CUDA threads |
| 165 | + /// even from subsequent kernel launches. |
| 166 | + /// |
| 167 | + /// Sources: |
| 168 | + /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations), |
| 169 | + /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). |
| 170 | + // FIXME(denzp): assign `malloc` and `nothrow` attributes. |
| 171 | + pub fn malloc(size: usize) -> *mut c_void; |
| 172 | + |
| 173 | + /// Free previously dynamically allocated memory. |
| 174 | + /// |
| 175 | + /// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`, |
| 176 | + /// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL, |
| 177 | + /// the call to `free()` is ignored. |
| 178 | + /// |
| 179 | + /// Any CUDA thread may free memory allocated by another thread, but care should be taken |
| 180 | + /// to ensure that the same pointer is not freed more than once. Repeated calls to `free()` |
| 181 | + /// with the same `ptr` has undefined behavior. |
| 182 | + /// |
| 183 | + /// Sources: |
| 184 | + /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations), |
| 185 | + /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). |
| 186 | + // FIXME(denzp): assign `nothrow` attribute. |
| 187 | + pub fn free(ptr: *mut c_void); |
| 188 | + |
| 189 | + // Internal declaration of the syscall. Exported variant has |
| 190 | + // the `char_size` parameter set to `1` (single char size in bytes). |
| 191 | + fn __assertfail( |
| 192 | + message: *const u8, |
| 193 | + file: *const u8, |
| 194 | + line: u32, |
| 195 | + function: *const u8, |
| 196 | + char_size: usize, |
| 197 | + ); |
| 198 | +} |
| 199 | + |
| 200 | +/// Syscall to be used whenever the *assert expression produces a `false` value*. |
| 201 | +/// |
| 202 | +/// Syscall arguments: |
| 203 | +/// * `message`: The pointer to the string that should be output. |
| 204 | +/// * `file`: The pointer to the file name string associated with the assert. |
| 205 | +/// * `line`: The line number associated with the assert. |
| 206 | +/// * `function`: The pointer to the function name string associated with the assert. |
| 207 | +/// |
| 208 | +/// Source: |
| 209 | +/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). |
| 210 | +#[inline] |
| 211 | +pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) { |
| 212 | + __assertfail(message, file, line, function, 1) |
| 213 | +} |
0 commit comments