core/stdarch/crates/core_arch/src/nvptx/
mod.rs

1//! NVPTX intrinsics (experimental)
2//!
3//! These intrinsics form the foundation of the CUDA
4//! programming model.
5//!
6//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also
7//! the [LLVM NVPTX Backend documentation][llvm_docs].
8//!
9//! [cuda_c]:
10//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
11//! [llvm_docs]:
12//! https://llvm.org/docs/NVPTXUsage.html
13
14use crate::ffi::c_void;
15
16mod packed;
17
18#[unstable(feature = "stdarch_nvptx", issue = "111199")]
19pub use packed::*;
20
21#[allow(improper_ctypes)]
22unsafe extern "C" {
23    #[link_name = "llvm.nvvm.barrier0"]
24    fn syncthreads() -> ();
25    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
26    fn block_dim_x() -> i32;
27    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
28    fn block_dim_y() -> i32;
29    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
30    fn block_dim_z() -> i32;
31    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
32    fn block_idx_x() -> i32;
33    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
34    fn block_idx_y() -> i32;
35    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
36    fn block_idx_z() -> i32;
37    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
38    fn grid_dim_x() -> i32;
39    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
40    fn grid_dim_y() -> i32;
41    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
42    fn grid_dim_z() -> i32;
43    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
44    fn thread_idx_x() -> i32;
45    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
46    fn thread_idx_y() -> i32;
47    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
48    fn thread_idx_z() -> i32;
49}
50
51/// Synchronizes all threads in the block.
52#[inline]
53#[unstable(feature = "stdarch_nvptx", issue = "111199")]
54pub unsafe fn _syncthreads() -> () {
55    syncthreads()
56}
57
58/// x-th thread-block dimension.
59#[inline]
60#[unstable(feature = "stdarch_nvptx", issue = "111199")]
61pub unsafe fn _block_dim_x() -> i32 {
62    block_dim_x()
63}
64
65/// y-th thread-block dimension.
66#[inline]
67#[unstable(feature = "stdarch_nvptx", issue = "111199")]
68pub unsafe fn _block_dim_y() -> i32 {
69    block_dim_y()
70}
71
72/// z-th thread-block dimension.
73#[inline]
74#[unstable(feature = "stdarch_nvptx", issue = "111199")]
75pub unsafe fn _block_dim_z() -> i32 {
76    block_dim_z()
77}
78
79/// x-th thread-block index.
80#[inline]
81#[unstable(feature = "stdarch_nvptx", issue = "111199")]
82pub unsafe fn _block_idx_x() -> i32 {
83    block_idx_x()
84}
85
86/// y-th thread-block index.
87#[inline]
88#[unstable(feature = "stdarch_nvptx", issue = "111199")]
89pub unsafe fn _block_idx_y() -> i32 {
90    block_idx_y()
91}
92
93/// z-th thread-block index.
94#[inline]
95#[unstable(feature = "stdarch_nvptx", issue = "111199")]
96pub unsafe fn _block_idx_z() -> i32 {
97    block_idx_z()
98}
99
100/// x-th block-grid dimension.
101#[inline]
102#[unstable(feature = "stdarch_nvptx", issue = "111199")]
103pub unsafe fn _grid_dim_x() -> i32 {
104    grid_dim_x()
105}
106
107/// y-th block-grid dimension.
108#[inline]
109#[unstable(feature = "stdarch_nvptx", issue = "111199")]
110pub unsafe fn _grid_dim_y() -> i32 {
111    grid_dim_y()
112}
113
114/// z-th block-grid dimension.
115#[inline]
116#[unstable(feature = "stdarch_nvptx", issue = "111199")]
117pub unsafe fn _grid_dim_z() -> i32 {
118    grid_dim_z()
119}
120
121/// x-th thread index.
122#[inline]
123#[unstable(feature = "stdarch_nvptx", issue = "111199")]
124pub unsafe fn _thread_idx_x() -> i32 {
125    thread_idx_x()
126}
127
128/// y-th thread index.
129#[inline]
130#[unstable(feature = "stdarch_nvptx", issue = "111199")]
131pub unsafe fn _thread_idx_y() -> i32 {
132    thread_idx_y()
133}
134
135/// z-th thread index.
136#[inline]
137#[unstable(feature = "stdarch_nvptx", issue = "111199")]
138pub unsafe fn _thread_idx_z() -> i32 {
139    thread_idx_z()
140}
141
142/// Generates the trap instruction `TRAP`
143#[inline]
144#[unstable(feature = "stdarch_nvptx", issue = "111199")]
145pub unsafe fn trap() -> ! {
146    crate::intrinsics::abort()
147}
148
149// Basic CUDA syscall declarations.
150unsafe extern "C" {
151    /// Print formatted output from a kernel to a host-side output stream.
152    ///
153    /// Syscall arguments:
154    /// * `status`: The status value that is returned by `vprintf`.
155    /// * `format`: A pointer to the format specifier input (uses common `printf` format).
156    /// * `valist`: A pointer to the valist input.
157    ///
158    /// ```
159    /// #[repr(C)]
160    /// struct PrintArgs(f32, f32, f32, i32);
161    ///
162    /// vprintf(
163    ///     "int(%f + %f) = int(%f) = %d\n".as_ptr(),
164    ///     transmute(&PrintArgs(a, b, a + b, (a + b) as i32)),
165    /// );
166    /// ```
167    ///
168    /// Sources:
169    /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output),
170    /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
171    #[unstable(feature = "stdarch_nvptx", issue = "111199")]
172    pub fn vprintf(format: *const u8, valist: *const c_void) -> i32;
173
174    /// Allocate memory dynamically from a fixed-size heap in global memory.
175    ///
176    /// The CUDA in-kernel `malloc()` function allocates at least `size` bytes
177    /// from the device heap and returns a pointer to the allocated memory
178    /// or `NULL` if insufficient memory exists to fulfill the request.
179    ///
180    /// The returned pointer is guaranteed to be aligned to a 16-byte boundary.
181    ///
182    /// The memory allocated by a given CUDA thread via `malloc()` remains allocated
183    /// for the lifetime of the CUDA context, or until it is explicitly released
184    /// by a call to `free()`. It can be used by any other CUDA threads
185    /// even from subsequent kernel launches.
186    ///
187    /// Sources:
188    /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
189    /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
190    // FIXME(denzp): assign `malloc` and `nothrow` attributes.
191    #[unstable(feature = "stdarch_nvptx", issue = "111199")]
192    pub fn malloc(size: usize) -> *mut c_void;
193
194    /// Free previously dynamically allocated memory.
195    ///
196    /// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`,
197    /// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL,
198    /// the call to `free()` is ignored.
199    ///
200    /// Any CUDA thread may free memory allocated by another thread, but care should be taken
201    /// to ensure that the same pointer is not freed more than once. Repeated calls to `free()`
202    /// with the same `ptr` has undefined behavior.
203    ///
204    /// Sources:
205    /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
206    /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
207    // FIXME(denzp): assign `nothrow` attribute.
208    #[unstable(feature = "stdarch_nvptx", issue = "111199")]
209    pub fn free(ptr: *mut c_void);
210
211    // Internal declaration of the syscall. Exported variant has
212    // the `char_size` parameter set to `1` (single char size in bytes).
213    fn __assertfail(
214        message: *const u8,
215        file: *const u8,
216        line: u32,
217        function: *const u8,
218        char_size: usize,
219    );
220}
221
222/// Syscall to be used whenever the *assert expression produces a `false` value*.
223///
224/// Syscall arguments:
225/// * `message`: The pointer to the string that should be output.
226/// * `file`: The pointer to the file name string associated with the assert.
227/// * `line`: The line number associated with the assert.
228/// * `function`: The pointer to the function name string associated with the assert.
229///
230/// Source:
231/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
232#[inline]
233#[unstable(feature = "stdarch_nvptx", issue = "111199")]
234pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) {
235    __assertfail(message, file, line, function, 1)
236}