Skip to content

Commit fb33e66

Browse files
denzpgnzlbg
authored andcommitted
NVPTX syscalls
1 parent 51a27d2 commit fb33e66

File tree

1 file changed

+87
-0
lines changed
  • crates/core_arch/src/nvptx

1 file changed

+87
-0
lines changed

crates/core_arch/src/nvptx/mod.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
//! [llvm_docs]:
1212
//! https://llvm.org/docs/NVPTXUsage.html
1313
14+
use core::ffi::c_void;
15+
1416
#[allow(improper_ctypes)]
1517
extern "C" {
1618
#[link_name = "llvm.nvvm.barrier0"]
@@ -124,3 +126,88 @@ pub unsafe fn _thread_idx_z() -> i32 {
124126
pub unsafe fn trap() -> ! {
125127
crate::intrinsics::abort()
126128
}
129+
130+
// Basic CUDA syscall declarations.
131+
extern "C" {
132+
/// Print formatted output from a kernel to a host-side output stream.
133+
///
134+
/// Syscall arguments:
135+
/// * `status`: The status value that is returned by `vprintf`.
136+
/// * `format`: A pointer to the format specifier input (uses common `printf` format).
137+
/// * `valist`: A pointer to the valist input.
138+
///
139+
/// ```
140+
/// #[repr(C)]
141+
/// struct PrintArgs(f32, f32, f32, i32);
142+
///
143+
/// vprintf(
144+
/// "int(%f + %f) = int(%f) = %d\n".as_ptr(),
145+
/// transmute(&PrintArgs(a, b, a + b, (a + b) as i32)),
146+
/// );
147+
/// ```
148+
///
149+
/// Sources:
150+
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output),
151+
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
152+
pub fn vprintf(format: *const u8, valist: *const c_void) -> i32;
153+
154+
/// Allocate memory dynamically from a fixed-size heap in global memory.
155+
///
156+
/// The CUDA in-kernel `malloc()` function allocates at least `size` bytes
157+
/// from the device heap and returns a pointer to the allocated memory
158+
/// or `NULL` if insufficient memory exists to fulfill the request.
159+
///
160+
/// The returned pointer is guaranteed to be aligned to a 16-byte boundary.
161+
///
162+
/// The memory allocated by a given CUDA thread via `malloc()` remains allocated
163+
/// for the lifetime of the CUDA context, or until it is explicitly released
164+
/// by a call to `free()`. It can be used by any other CUDA threads
165+
/// even from subsequent kernel launches.
166+
///
167+
/// Sources:
168+
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
169+
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
170+
// FIXME(denzp): assign `malloc` and `nothrow` attributes.
171+
pub fn malloc(size: usize) -> *mut c_void;
172+
173+
/// Free previously dynamically allocated memory.
174+
///
175+
/// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`,
176+
/// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL,
177+
/// the call to `free()` is ignored.
178+
///
179+
/// Any CUDA thread may free memory allocated by another thread, but care should be taken
180+
/// to ensure that the same pointer is not freed more than once. Repeated calls to `free()`
181+
/// with the same `ptr` has undefined behavior.
182+
///
183+
/// Sources:
184+
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
185+
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
186+
// FIXME(denzp): assign `nothrow` attribute.
187+
pub fn free(ptr: *mut c_void);
188+
189+
// Internal declaration of the syscall. Exported variant has
190+
// the `char_size` parameter set to `1` (single char size in bytes).
191+
fn __assertfail(
192+
message: *const u8,
193+
file: *const u8,
194+
line: u32,
195+
function: *const u8,
196+
char_size: usize,
197+
);
198+
}
199+
200+
/// Syscall to be used whenever the *assert expression produces a `false` value*.
201+
///
202+
/// Syscall arguments:
203+
/// * `message`: The pointer to the string that should be output.
204+
/// * `file`: The pointer to the file name string associated with the assert.
205+
/// * `line`: The line number associated with the assert.
206+
/// * `function`: The pointer to the function name string associated with the assert.
207+
///
208+
/// Source:
209+
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
210+
#[inline]
211+
pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) {
212+
__assertfail(message, file, line, function, 1)
213+
}

0 commit comments

Comments
 (0)