|
10 | 10 | #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
|
11 | 11 | #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
|
12 | 12 |
|
| 13 | +typedef enum BatchMemOpType { |
| 14 | + STREAM_WAIT_VALUE_32 = 0x1, |
| 15 | + STREAM_WRITE_VALUE_32 = 0x2, |
| 16 | + STREAM_WAIT_VALUE_64 = 0x4, |
| 17 | + STREAM_WRITE_VALUE_64 = 0x5, |
| 18 | + STREAM_MEM_OP_BARRIER = 0x6, // Currently not supported |
| 19 | + STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 0x3 // Currently not supported |
| 20 | +} BatchMemOpType; |
| 21 | + |
| 22 | +typedef union streamBatchMemOpParams_union { |
| 23 | + BatchMemOpType operation; |
| 24 | + struct streamMemOpWaitValueParams_t{ |
| 25 | + BatchMemOpType operation; |
| 26 | + atomic_ulong* address; |
| 27 | + union { |
| 28 | + uint value; |
| 29 | + ulong value64; |
| 30 | + }; |
| 31 | + uint flags; |
| 32 | + atomic_ulong* alias; // Not valid for AMD backend |
| 33 | + } waitValue; |
| 34 | + struct streamMemOpWriteValueParams_t{ |
| 35 | + BatchMemOpType operation; |
| 36 | + atomic_ulong* address; |
| 37 | + union { |
| 38 | + uint value; |
| 39 | + ulong value64; |
| 40 | + }; |
| 41 | + uint flags; |
| 42 | + atomic_ulong* alias; // Not valid for AMD backend |
| 43 | + } writeValue; |
| 44 | + struct streamMemOpFlushRemoteWritesParams_t{ // Currently not supported |
| 45 | + BatchMemOpType operation; |
| 46 | + uint flags; |
| 47 | + } flushRemoteWrites; |
| 48 | + struct streamMemOpMemoryBarrierParams_t{ // Currently not supported |
| 49 | + BatchMemOpType operation; |
| 50 | + uint flags; |
| 51 | + } memoryBarrier; |
| 52 | + ulong pad[6]; |
| 53 | +} BatchMemOpParams; |
| 54 | + |
13 | 55 |
|
14 | 56 | static const uint SplitCount = 3;
|
15 | 57 |
|
@@ -756,5 +798,35 @@ __amd_streamOpsWait(
|
756 | 798 | break;
|
757 | 799 | }
|
758 | 800 | }
|
759 |
| -#endif |
760 | 801 |
|
| 802 | +// The kernel calling this function must be launched with 'count' workgroups each of size 1 |
| 803 | +__attribute__((always_inline)) void |
| 804 | +__amd_batchMemOp(__global BatchMemOpParams* param, |
| 805 | + uint count) { |
| 806 | + |
| 807 | + ulong id = get_global_id(0); |
| 808 | + |
| 809 | + switch (param[id].operation) { |
| 810 | + case STREAM_WAIT_VALUE_32: |
| 811 | + __amd_streamOpsWait((__global atomic_uint*)param[id].waitValue.address, NULL, |
| 812 | + (uint)param[id].waitValue.value, (uint)param[id].waitValue.flags, |
| 813 | + (ulong)~0UL); |
| 814 | + break; |
| 815 | + case STREAM_WRITE_VALUE_32: |
| 816 | + __amd_streamOpsWrite((__global atomic_uint*)param[id].writeValue.address, NULL, |
| 817 | + (uint)param[id].writeValue.value); |
| 818 | + break; |
| 819 | + case STREAM_WAIT_VALUE_64: |
| 820 | + __amd_streamOpsWait(NULL, (__global atomic_ulong*)param[id].waitValue.address, |
| 821 | + (ulong)param[id].waitValue.value64, (uint)param[id].waitValue.flags, |
| 822 | + (ulong)~0UL); |
| 823 | + break; |
| 824 | + case STREAM_WRITE_VALUE_64: |
| 825 | + __amd_streamOpsWrite(NULL, (__global atomic_ulong*)param[id].writeValue.address, |
| 826 | + (ulong)param[id].writeValue.value64); |
| 827 | + break; |
| 828 | + default: |
| 829 | + break; |
| 830 | + } |
| 831 | +} |
| 832 | +#endif |
0 commit comments