Skip to content

Commit 07f9bcb

Browse files
aratajewigcbot
authored andcommitted
Enable D64 Load Block message for cl_intel_subgroups_long extension
OpenCL C builtins from `cl_intel_subgroup_long` extension require a pointer to be 8-byte aligned, because the pointer is ulong typed, therefore according to https://registry.khronos.org/OpenCL/sdk/3.0/docs/man/html/alignmentOfDataTypes.html it must be aligned to the type size: "A data item declared to be a data type in memory is always aligned to the size of the data type in bytes." So it should be safe and spec-consistent to generate `D64 Load Block` for builtins from `cl_intel_subgroup_long` extension.
1 parent 1ca0735 commit 07f9bcb

File tree

6 files changed

+67
-98
lines changed

6 files changed

+67
-98
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6664,8 +6664,7 @@ void EmitPass::emitLSCSimdBlockRead(llvm::Instruction* inst, llvm::Value* ptrVal
66646664

66656665
// If type size >= 8 bytes, assume 8byte aligned and use D64 Transpose message;
66666666
// otherwise, use D32 transpose message.
6667-
// bool isD64 = (typeSizeInBytes >= 8);
6668-
bool isD64 = false; // blk APIs only requires 4-byte alignment.
6667+
bool isD64 = (typeSizeInBytes >= 8);
66696668
uint32_t blkBits = isD64 ? 64 : 32;
66706669
while (bytesRemaining)
66716670
{

IGC/ocloc_tests/Builtins/cl_intel_subgroup_buffer_prefetch/PVC/block_prefetch.cl

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,13 @@ kernel void test_int(const global uint* buffer) {
5757

5858
__attribute__((intel_reqd_sub_group_size(16)))
5959
kernel void test_long(const global ulong* buffer) {
60-
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x32t flat[{{.*}}]:a64
60+
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x16t flat[{{.*}}]:a64
6161
intel_sub_group_block_prefetch_ul(buffer);
62-
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
62+
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x32t flat[{{.*}}]:a64
6363
intel_sub_group_block_prefetch_ul2(buffer);
64-
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
65-
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}+0x100]:a64
64+
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x64t flat[{{.*}}]:a64
6665
intel_sub_group_block_prefetch_ul4(buffer);
67-
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
68-
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}+0x100]:a64
69-
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}+0x200]:a64
70-
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}+0x300]:a64
66+
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x64t flat[{{.*}}]:a64
67+
// CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x64t flat[{{.*}}+0x200]:a64
7168
intel_sub_group_block_prefetch_ul8(buffer);
7269
}

IGC/ocloc_tests/SPIRV-extenstions/SPV_INTEL_subgroup_buffer_prefetch/block_prefetch.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,17 @@ entry:
8282

8383
; ------------------long variants--------------------
8484

85-
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x32t flat[{{.*}}]:a64
85+
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x16t flat[{{.*}}]:a64
8686
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %ul_in, i32 8)
8787

88-
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
88+
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x32t flat[{{.*}}]:a64
8989
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %ul_in, i32 16)
9090

91-
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
92-
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}+0x100]:a64
91+
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x64t flat[{{.*}}]:a64
9392
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %ul_in, i32 32)
9493

95-
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
96-
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}+0x100]:a64
97-
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}+0x200]:a64
98-
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}+0x300]:a64
94+
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x64t flat[{{.*}}]:a64
95+
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x64t flat[{{.*}}+0x200]:a64
9996
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %ul_in, i32 64)
10097

10198
ret void

IGC/ocloc_tests/features/cache_controls/cl_intel_sub_groups/block_read.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ entry:
4747
store <2 x i32> %ui_value, <2 x i32> addrspace(1)* %ui_out_ptr, align 4
4848

4949
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !0
50-
; CHECK: lsc_load.ugm.uc.uc (M1_NM, 1) V{{[0-9]+}}:d32x64t flat[{{.*}}]:a64
50+
; CHECK: lsc_load.ugm.uc.uc (M1_NM, 1) V{{[0-9]+}}:d64x32t flat[{{.*}}]:a64
5151
%ul_value = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addrspace(1)* %decorated_ul_ptr)
5252
%ul_out_ptr = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %ul_out, i64 %gid
5353
store <2 x i64> %ul_value, <2 x i64> addrspace(1)* %ul_out_ptr, align 8
@@ -84,7 +84,7 @@ entry:
8484
store <2 x i32> %ui_value, <2 x i32> addrspace(1)* %ui_out_ptr, align 4
8585

8686
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !3
87-
; CHECK: lsc_load.ugm.uc.ca (M1_NM, 1) V{{[0-9]+}}:d32x64t flat[{{.*}}]:a64
87+
; CHECK: lsc_load.ugm.uc.ca (M1_NM, 1) V{{[0-9]+}}:d64x32t flat[{{.*}}]:a64
8888
%ul_value = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addrspace(1)* %decorated_ul_ptr)
8989
%ul_out_ptr = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %ul_out, i64 %gid
9090
store <2 x i64> %ul_value, <2 x i64> addrspace(1)* %ul_out_ptr, align 8
@@ -121,7 +121,7 @@ entry:
121121
store <2 x i32> %ui_value, <2 x i32> addrspace(1)* %ui_out_ptr, align 4
122122

123123
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !6
124-
; CHECK: lsc_load.ugm.ca.uc (M1_NM, 1) V{{[0-9]+}}:d32x64t flat[{{.*}}]:a64
124+
; CHECK: lsc_load.ugm.ca.uc (M1_NM, 1) V{{[0-9]+}}:d64x32t flat[{{.*}}]:a64
125125
%ul_value = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addrspace(1)* %decorated_ul_ptr)
126126
%ul_out_ptr = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %ul_out, i64 %gid
127127
store <2 x i64> %ul_value, <2 x i64> addrspace(1)* %ul_out_ptr, align 8
@@ -158,7 +158,7 @@ entry:
158158
store <2 x i32> %ui_value, <2 x i32> addrspace(1)* %ui_out_ptr, align 4
159159

160160
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !9
161-
; CHECK: lsc_load.ugm.ca.ca (M1_NM, 1) V{{[0-9]+}}:d32x64t flat[{{.*}}]:a64
161+
; CHECK: lsc_load.ugm.ca.ca (M1_NM, 1) V{{[0-9]+}}:d64x32t flat[{{.*}}]:a64
162162
%ul_value = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addrspace(1)* %decorated_ul_ptr)
163163
%ul_out_ptr = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %ul_out, i64 %gid
164164
store <2 x i64> %ul_value, <2 x i64> addrspace(1)* %ul_out_ptr, align 8
@@ -195,7 +195,7 @@ entry:
195195
store <2 x i32> %ui_value, <2 x i32> addrspace(1)* %ui_out_ptr, align 4
196196

197197
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !12
198-
; CHECK: lsc_load.ugm.st.uc (M1_NM, 1) V{{[0-9]+}}:d32x64t flat[{{.*}}]:a64
198+
; CHECK: lsc_load.ugm.st.uc (M1_NM, 1) V{{[0-9]+}}:d64x32t flat[{{.*}}]:a64
199199
%ul_value = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addrspace(1)* %decorated_ul_ptr)
200200
%ul_out_ptr = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %ul_out, i64 %gid
201201
store <2 x i64> %ul_value, <2 x i64> addrspace(1)* %ul_out_ptr, align 8
@@ -232,7 +232,7 @@ entry:
232232
store <2 x i32> %ui_value, <2 x i32> addrspace(1)* %ui_out_ptr, align 4
233233

234234
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !15
235-
; CHECK: lsc_load.ugm.st.ca (M1_NM, 1) V{{[0-9]+}}:d32x64t flat[{{.*}}]:a64
235+
; CHECK: lsc_load.ugm.st.ca (M1_NM, 1) V{{[0-9]+}}:d64x32t flat[{{.*}}]:a64
236236
%ul_value = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addrspace(1)* %decorated_ul_ptr)
237237
%ul_out_ptr = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %ul_out, i64 %gid
238238
store <2 x i64> %ul_value, <2 x i64> addrspace(1)* %ul_out_ptr, align 8
@@ -269,7 +269,7 @@ entry:
269269
store <2 x i32> %ui_value, <2 x i32> addrspace(1)* %ui_out_ptr, align 4
270270

271271
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !18
272-
; CHECK: lsc_load.ugm.ri.ca (M1_NM, 1) V{{[0-9]+}}:d32x64t flat[{{.*}}]:a64
272+
; CHECK: lsc_load.ugm.ri.ca (M1_NM, 1) V{{[0-9]+}}:d64x32t flat[{{.*}}]:a64
273273
%ul_value = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addrspace(1)* %decorated_ul_ptr)
274274
%ul_out_ptr = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %ul_out, i64 %gid
275275
store <2 x i64> %ul_value, <2 x i64> addrspace(1)* %ul_out_ptr, align 8
@@ -306,7 +306,7 @@ entry:
306306
store <2 x i32> %ui_value, <2 x i32> addrspace(1)* %ui_out_ptr, align 4
307307

308308
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !21
309-
; CHECK: lsc_load.ugm (M1_NM, 1) V{{[0-9]+}}:d32x64t flat[{{.*}}]:a64
309+
; CHECK: lsc_load.ugm (M1_NM, 1) V{{[0-9]+}}:d64x32t flat[{{.*}}]:a64
310310
%ul_value = call spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(i64 addrspace(1)* %decorated_ul_ptr)
311311
%ul_out_ptr = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %ul_out, i64 %gid
312312
store <2 x i64> %ul_value, <2 x i64> addrspace(1)* %ul_out_ptr, align 8

IGC/ocloc_tests/features/cache_controls/cl_intel_subgroups_buffer_prefetch/SPV_INTEL_subgroup_buffer_prefetch.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ entry:
4141
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kjj(i32 addrspace(1)* %decorated_ui_ptr, i32 8)
4242

4343
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !0
44-
; CHECK: lsc_load.ugm.uc.uc (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
44+
; CHECK: lsc_load.ugm.uc.uc (M1_NM, 1) %null:d64x32t flat[{{.*}}]:a64
4545
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %decorated_ul_ptr, i32 16)
4646

4747
ret void
@@ -69,7 +69,7 @@ entry:
6969
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kjj(i32 addrspace(1)* %decorated_ui_ptr, i32 8)
7070

7171
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !3
72-
; CHECK: lsc_load.ugm.uc.ca (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
72+
; CHECK: lsc_load.ugm.uc.ca (M1_NM, 1) %null:d64x32t flat[{{.*}}]:a64
7373
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %decorated_ul_ptr, i32 16)
7474

7575
ret void
@@ -97,7 +97,7 @@ entry:
9797
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kjj(i32 addrspace(1)* %decorated_ui_ptr, i32 8)
9898

9999
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !6
100-
; CHECK: lsc_load.ugm.ca.uc (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
100+
; CHECK: lsc_load.ugm.ca.uc (M1_NM, 1) %null:d64x32t flat[{{.*}}]:a64
101101
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %decorated_ul_ptr, i32 16)
102102

103103
ret void
@@ -125,7 +125,7 @@ entry:
125125
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kjj(i32 addrspace(1)* %decorated_ui_ptr, i32 8)
126126

127127
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !9
128-
; CHECK: lsc_load.ugm.ca.ca (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
128+
; CHECK: lsc_load.ugm.ca.ca (M1_NM, 1) %null:d64x32t flat[{{.*}}]:a64
129129
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %decorated_ul_ptr, i32 16)
130130

131131
ret void
@@ -153,7 +153,7 @@ entry:
153153
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kjj(i32 addrspace(1)* %decorated_ui_ptr, i32 8)
154154

155155
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !12
156-
; CHECK: lsc_load.ugm.st.uc (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
156+
; CHECK: lsc_load.ugm.st.uc (M1_NM, 1) %null:d64x32t flat[{{.*}}]:a64
157157
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %decorated_ul_ptr, i32 16)
158158

159159
ret void
@@ -181,7 +181,7 @@ entry:
181181
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kjj(i32 addrspace(1)* %decorated_ui_ptr, i32 8)
182182

183183
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !15
184-
; CHECK: lsc_load.ugm.st.ca (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
184+
; CHECK: lsc_load.ugm.st.ca (M1_NM, 1) %null:d64x32t flat[{{.*}}]:a64
185185
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %decorated_ul_ptr, i32 16)
186186

187187
ret void
@@ -209,7 +209,7 @@ entry:
209209
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kjj(i32 addrspace(1)* %decorated_ui_ptr, i32 8)
210210

211211
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !18
212-
; CHECK: lsc_load.ugm.ri.ca (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
212+
; CHECK: lsc_load.ugm.ri.ca (M1_NM, 1) %null:d64x32t flat[{{.*}}]:a64
213213
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %decorated_ul_ptr, i32 16)
214214

215215
ret void
@@ -237,7 +237,7 @@ entry:
237237
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kjj(i32 addrspace(1)* %decorated_ui_ptr, i32 8)
238238

239239
%decorated_ul_ptr = getelementptr inbounds i64, i64 addrspace(1)* %ul_in, i32 0, !spirv.Decorations !21
240-
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d32x64t flat[{{.*}}]:a64
240+
; CHECK: lsc_load.ugm (M1_NM, 1) %null:d64x32t flat[{{.*}}]:a64
241241
call spir_func void @_Z34__spirv_SubgroupBlockPrefetchINTELPU3AS1Kmj(i64 addrspace(1)* %decorated_ul_ptr, i32 16)
242242

243243
ret void

0 commit comments

Comments
 (0)