Skip to content

Commit a749aae

Browse files
Sourabh BetigeriSourabh Betigeri
authored andcommitted
SWDEV-440866 - Adds a new kernel to support HIP batch memory operations
Change-Id: I9048c90bc16a6e3ce60d420f86abb32adb4c00f2 (cherry picked from commit 8e06540)
1 parent b946e20 commit a749aae

File tree

1 file changed

+73
-1
lines changed

1 file changed

+73
-1
lines changed

amd/device-libs/opencl/src/misc/amdblit.cl

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,48 @@
1010
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
1111
#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
1212

13+
typedef enum BatchMemOpType {
14+
STREAM_WAIT_VALUE_32 = 0x1,
15+
STREAM_WRITE_VALUE_32 = 0x2,
16+
STREAM_WAIT_VALUE_64 = 0x4,
17+
STREAM_WRITE_VALUE_64 = 0x5,
18+
STREAM_MEM_OP_BARRIER = 0x6, // Currently not supported
19+
STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 0x3 // Currently not supported
20+
} BatchMemOpType;
21+
22+
typedef union streamBatchMemOpParams_union {
23+
BatchMemOpType operation;
24+
struct streamMemOpWaitValueParams_t{
25+
BatchMemOpType operation;
26+
atomic_ulong* address;
27+
union {
28+
uint value;
29+
ulong value64;
30+
};
31+
uint flags;
32+
atomic_ulong* alias; // Not valid for AMD backend
33+
} waitValue;
34+
struct streamMemOpWriteValueParams_t{
35+
BatchMemOpType operation;
36+
atomic_ulong* address;
37+
union {
38+
uint value;
39+
ulong value64;
40+
};
41+
uint flags;
42+
atomic_ulong* alias; // Not valid for AMD backend
43+
} writeValue;
44+
struct streamMemOpFlushRemoteWritesParams_t{ // Currently not supported
45+
BatchMemOpType operation;
46+
uint flags;
47+
} flushRemoteWrites;
48+
struct streamMemOpMemoryBarrierParams_t{ // Currently not supported
49+
BatchMemOpType operation;
50+
uint flags;
51+
} memoryBarrier;
52+
ulong pad[6];
53+
} BatchMemOpParams;
54+
1355

1456
static const uint SplitCount = 3;
1557

@@ -756,5 +798,35 @@ __amd_streamOpsWait(
756798
break;
757799
}
758800
}
759-
#endif
760801

802+
// The kernel calling this function must be launched with 'count' workgroups each of size 1
803+
__attribute__((always_inline)) void
804+
__amd_batchMemOp(__global BatchMemOpParams* param,
805+
uint count) {
806+
807+
ulong id = get_global_id(0);
808+
809+
switch (param[id].operation) {
810+
case STREAM_WAIT_VALUE_32:
811+
__amd_streamOpsWait((__global atomic_uint*)param[id].waitValue.address, NULL,
812+
(uint)param[id].waitValue.value, (uint)param[id].waitValue.flags,
813+
(ulong)~0UL);
814+
break;
815+
case STREAM_WRITE_VALUE_32:
816+
__amd_streamOpsWrite((__global atomic_uint*)param[id].writeValue.address, NULL,
817+
(uint)param[id].writeValue.value);
818+
break;
819+
case STREAM_WAIT_VALUE_64:
820+
__amd_streamOpsWait(NULL, (__global atomic_ulong*)param[id].waitValue.address,
821+
(ulong)param[id].waitValue.value64, (uint)param[id].waitValue.flags,
822+
(ulong)~0UL);
823+
break;
824+
case STREAM_WRITE_VALUE_64:
825+
__amd_streamOpsWrite(NULL, (__global atomic_ulong*)param[id].writeValue.address,
826+
(ulong)param[id].writeValue.value64);
827+
break;
828+
default:
829+
break;
830+
}
831+
}
832+
#endif

0 commit comments

Comments
 (0)