Skip to content

Commit 96c54ac

Browse files
Nuullllsys-ce-bb
authored andcommitted
Add convergent attribute for GroupNonUniform ops (#2472)
Signed-off-by: Yilong Guo <[email protected]> Original commit: KhronosGroup/SPIRV-LLVM-Translator@6e62da7a514be99
1 parent be3d13a commit 96c54ac

File tree

3 files changed

+219
-35
lines changed

3 files changed

+219
-35
lines changed

llvm-spirv/lib/SPIRV/SPIRVReader.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3207,8 +3207,9 @@ Instruction *SPIRVToLLVM::transBuiltinFromInst(const std::string &FuncName,
32073207
if (isFuncNoUnwind())
32083208
Func->addFnAttr(Attribute::NoUnwind);
32093209
auto OC = BI->getOpCode();
3210-
if (isGroupOpCode(OC) || isIntelSubgroupOpCode(OC) ||
3211-
isSplitBarrierINTELOpCode(OC) || OC == OpControlBarrier)
3210+
if (isGroupOpCode(OC) || isGroupNonUniformOpcode(OC) ||
3211+
isIntelSubgroupOpCode(OC) || isSplitBarrierINTELOpCode(OC) ||
3212+
OC == OpControlBarrier)
32123213
Func->addFnAttr(Attribute::Convergent);
32133214
}
32143215
auto *Call =

llvm-spirv/test/GroupAndSubgroupInstructions.spvasm

Lines changed: 215 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; Generated with:
22
; source.cl:
33
; void foo(int x, int2 coord, uint c, short s, float f, size_t n,
4-
; __global uint* p, read_write image2d_t image) {
4+
; __global uint* p, read_write image2d_t image, uint4 v) {
55
; work_group_all(x);
66
; work_group_any(x);
77
; work_group_broadcast(x, n);
@@ -25,13 +25,44 @@
2525
; intel_sub_group_block_write(p, c);
2626
; intel_sub_group_block_read(image, coord);
2727
; intel_sub_group_block_write(image, coord, c);
28+
; sub_group_elect();
29+
; sub_group_non_uniform_all(x);
30+
; sub_group_non_uniform_any(x);
31+
; sub_group_non_uniform_all_equal(x);
32+
; sub_group_non_uniform_broadcast(x, c);
33+
; sub_group_broadcast_first(x);
34+
; sub_group_ballot(x);
35+
; sub_group_inverse_ballot(v);
36+
; sub_group_ballot_bit_extract(v, c);
37+
; sub_group_ballot_bit_count(v);
38+
; sub_group_ballot_inclusive_scan(v);
39+
; sub_group_ballot_exclusive_scan(v);
40+
; sub_group_ballot_find_lsb(v);
41+
; sub_group_ballot_find_msb(v);
42+
; sub_group_non_uniform_reduce_add(x);
43+
; sub_group_non_uniform_reduce_add(f);
44+
; sub_group_non_uniform_scan_inclusive_min(x);
45+
; sub_group_non_uniform_scan_inclusive_min(c);
46+
; sub_group_non_uniform_scan_inclusive_min(f);
47+
; sub_group_non_uniform_scan_exclusive_max(x);
48+
; sub_group_non_uniform_scan_exclusive_max(c);
49+
; sub_group_non_uniform_scan_exclusive_max(f);
50+
; sub_group_non_uniform_reduce_mul(x);
51+
; sub_group_non_uniform_reduce_mul(f);
52+
; sub_group_non_uniform_reduce_and(x);
53+
; sub_group_non_uniform_scan_inclusive_or(x);
54+
; sub_group_non_uniform_scan_exclusive_xor(x);
55+
; sub_group_non_uniform_reduce_logical_and(x);
56+
; sub_group_non_uniform_scan_inclusive_logical_or(x);
57+
; sub_group_non_uniform_scan_exclusive_logical_xor(x);
58+
; sub_group_rotate(c, x);
2859
; }
2960
; clang -cc1 -O2 -triple spir -cl-std=cl2.0 -finclude-default-header -cl-ext=+all source.cl -emit-llvm-bc -o tmp.bc
3061
; llvm-spirv tmp.bc --spirv-ext=+all -o tmp.spv
3162
; spirv-dis tmp.spv -o llvm-spirv/test/GroupAndSubgroupInstructions.spvasm
3263

3364
; REQUIRES: spirv-as
34-
; RUN: spirv-as %s --target-env spv1.0 -o %t.spv
65+
; RUN: spirv-as %s --target-env spv1.3 -o %t.spv
3566
; RUN: spirv-val %t.spv
3667
; RUN: llvm-spirv -r %t.spv --spirv-target-env=CL1.2 -o %t.bc
3768
; RUN: llvm-dis %t.bc -o %t.ll
@@ -73,7 +104,34 @@
73104
; CHECK-CL-DAG: declare spir_func void @_Z27intel_sub_group_block_writePU3AS1jj(ptr addrspace(1), i32) #[[#Attrs]]
74105
; CHECK-CL-DAG: declare spir_func i32 @_Z26intel_sub_group_block_read14ocl_image2d_rwDv2_i(ptr addrspace(1), <2 x i32>) #[[#Attrs]]
75106
; CHECK-CL-DAG: declare spir_func void @_Z27intel_sub_group_block_write14ocl_image2d_rwDv2_ij(ptr addrspace(1), <2 x i32>, i32) #[[#Attrs]]
76-
107+
; CHECK-CL-DAG: declare spir_func i32 @_Z15sub_group_electv() #[[#Attrs]]
108+
; CHECK-CL-DAG: declare spir_func i32 @_Z25sub_group_non_uniform_alli(i32) #[[#Attrs]]
109+
; CHECK-CL-DAG: declare spir_func i32 @_Z25sub_group_non_uniform_anyi(i32) #[[#Attrs]]
110+
; CHECK-CL-DAG: declare spir_func i32 @_Z31sub_group_non_uniform_all_equali(i32) #[[#Attrs]]
111+
; CHECK-CL-DAG: declare spir_func i32 @_Z31sub_group_non_uniform_broadcastjj(i32, i32) #[[#Attrs]]
112+
; CHECK-CL-DAG: declare spir_func i32 @_Z25sub_group_broadcast_firstj(i32) #[[#Attrs]]
113+
; CHECK-CL-DAG: declare spir_func <4 x i32> @_Z16sub_group_balloti(i32) #[[#Attrs]]
114+
; CHECK-CL-DAG: declare spir_func i32 @_Z31sub_group_ballot_inclusive_scanDv4_j(<4 x i32>) #[[#Attrs]]
115+
; CHECK-CL-DAG: declare spir_func i32 @_Z31sub_group_ballot_exclusive_scanDv4_j(<4 x i32>) #[[#Attrs]]
116+
; CHECK-CL-DAG: declare spir_func i32 @_Z25sub_group_ballot_find_lsbDv4_j(<4 x i32>) #[[#Attrs]]
117+
; CHECK-CL-DAG: declare spir_func i32 @_Z25sub_group_ballot_find_msbDv4_j(<4 x i32>) #[[#Attrs]]
118+
; CHECK-CL-DAG: declare spir_func i32 @_Z32sub_group_non_uniform_reduce_addi(i32) #[[#Attrs]]
119+
; CHECK-CL-DAG: declare spir_func float @_Z32sub_group_non_uniform_reduce_addf(float) #[[#Attrs]]
120+
; CHECK-CL-DAG: declare spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_mini(i32) #[[#Attrs]]
121+
; CHECK-CL-DAG: declare spir_func i32 @_Z40sub_group_non_uniform_scan_inclusive_minj(i32) #[[#Attrs]]
122+
; CHECK-CL-DAG: declare spir_func float @_Z40sub_group_non_uniform_scan_inclusive_minf(float) #[[#Attrs]]
123+
; CHECK-CL-DAG: declare spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_maxi(i32) #[[#Attrs]]
124+
; CHECK-CL-DAG: declare spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_maxj(i32) #[[#Attrs]]
125+
; CHECK-CL-DAG: declare spir_func float @_Z40sub_group_non_uniform_scan_exclusive_maxf(float) #[[#Attrs]]
126+
; CHECK-CL-DAG: declare spir_func i32 @_Z32sub_group_non_uniform_reduce_muli(i32) #[[#Attrs]]
127+
; CHECK-CL-DAG: declare spir_func float @_Z32sub_group_non_uniform_reduce_mulf(float) #[[#Attrs]]
128+
; CHECK-CL-DAG: declare spir_func i32 @_Z32sub_group_non_uniform_reduce_andi(i32) #[[#Attrs]]
129+
; CHECK-CL-DAG: declare spir_func i32 @_Z39sub_group_non_uniform_scan_inclusive_ori(i32) #[[#Attrs]]
130+
; CHECK-CL-DAG: declare spir_func i32 @_Z40sub_group_non_uniform_scan_exclusive_xori(i32) #[[#Attrs]]
131+
; CHECK-CL-DAG: declare spir_func i32 @_Z40sub_group_non_uniform_reduce_logical_andi(i32) #[[#Attrs]]
132+
; CHECK-CL-DAG: declare spir_func i32 @_Z47sub_group_non_uniform_scan_inclusive_logical_ori(i32) #[[#Attrs]]
133+
; CHECK-CL-DAG: declare spir_func i32 @_Z48sub_group_non_uniform_scan_exclusive_logical_xori(i32) #[[#Attrs]]
134+
; CHECK-CL-DAG: declare spir_func i32 @_Z16sub_group_rotateii(i32, i32) #[[#Attrs]]
77135

78136
; CHECK-SPV-IR: declare spir_func i1 @_Z16__spirv_GroupAllib(i32, i1) #[[#Attrs:]]
79137
; CHECK-SPV-IR: declare spir_func i1 @_Z16__spirv_GroupAnyib(i32, i1) #[[#Attrs]]
@@ -95,14 +153,41 @@
95153
; CHECK-SPV-IR: declare spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jj(ptr addrspace(1), i32) #[[#Attrs]]
96154
; CHECK-SPV-IR: declare spir_func i32 @_Z40__spirv_SubgroupImageBlockReadINTEL_RintPU3AS133__spirv_Image__void_1_0_0_0_0_0_2Dv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), <2 x i32>) #[[#Attrs]]
97155
; CHECK-SPV-IR: declare spir_func void @_Z36__spirv_SubgroupImageBlockWriteINTELPU3AS133__spirv_Image__void_1_0_0_0_0_0_2Dv2_ij(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2), <2 x i32>, i32) #[[#Attrs]]
156+
; CHECK-SPV-IR: declare spir_func i1 @_Z28__spirv_GroupNonUniformElecti(i32) #[[#Attrs]]
157+
; CHECK-SPV-IR: declare spir_func i1 @_Z26__spirv_GroupNonUniformAllib(i32, i1) #[[#Attrs]]
158+
; CHECK-SPV-IR: declare spir_func i1 @_Z26__spirv_GroupNonUniformAnyib(i32, i1) #[[#Attrs]]
159+
; CHECK-SPV-IR: declare spir_func i1 @_Z31__spirv_GroupNonUniformAllEqualii(i32, i32) #[[#Attrs]]
160+
; CHECK-SPV-IR: declare spir_func i32 @_Z32__spirv_GroupNonUniformBroadcastiij(i32, i32, i32) #[[#Attrs]]
161+
; CHECK-SPV-IR: declare spir_func i32 @_Z37__spirv_GroupNonUniformBroadcastFirstii(i32, i32) #[[#Attrs]]
162+
; CHECK-SPV-IR: declare spir_func <4 x i32> @_Z29__spirv_GroupNonUniformBallotib(i32, i1) #[[#Attrs]]
163+
; CHECK-SPV-IR: declare spir_func i32 @_Z37__spirv_GroupNonUniformBallotBitCountiiDv4_j(i32, i32, <4 x i32>) #[[#Attrs]]
164+
; CHECK-SPV-IR: declare spir_func i32 @_Z36__spirv_GroupNonUniformBallotFindLSBiDv4_j(i32, <4 x i32>) #[[#Attrs]]
165+
; CHECK-SPV-IR: declare spir_func i32 @_Z36__spirv_GroupNonUniformBallotFindMSBiDv4_j(i32, <4 x i32>) #[[#Attrs]]
166+
; CHECK-SPV-IR: declare spir_func i32 @_Z27__spirv_GroupNonUniformIAddiii(i32, i32, i32) #[[#Attrs]]
167+
; CHECK-SPV-IR: declare spir_func float @_Z27__spirv_GroupNonUniformFAddiif(i32, i32, float) #[[#Attrs]]
168+
; CHECK-SPV-IR: declare spir_func i32 @_Z27__spirv_GroupNonUniformSMiniii(i32, i32, i32) #[[#Attrs]]
169+
; CHECK-SPV-IR: declare spir_func i32 @_Z27__spirv_GroupNonUniformUMiniij(i32, i32, i32) #[[#Attrs]]
170+
; CHECK-SPV-IR: declare spir_func float @_Z27__spirv_GroupNonUniformFMiniif(i32, i32, float) #[[#Attrs]]
171+
; CHECK-SPV-IR: declare spir_func i32 @_Z27__spirv_GroupNonUniformSMaxiii(i32, i32, i32) #[[#Attrs]]
172+
; CHECK-SPV-IR: declare spir_func i32 @_Z27__spirv_GroupNonUniformUMaxiij(i32, i32, i32) #[[#Attrs]]
173+
; CHECK-SPV-IR: declare spir_func float @_Z27__spirv_GroupNonUniformFMaxiif(i32, i32, float) #[[#Attrs]]
174+
; CHECK-SPV-IR: declare spir_func i32 @_Z27__spirv_GroupNonUniformIMuliii(i32, i32, i32) #[[#Attrs]]
175+
; CHECK-SPV-IR: declare spir_func float @_Z27__spirv_GroupNonUniformFMuliif(i32, i32, float) #[[#Attrs]]
176+
; CHECK-SPV-IR: declare spir_func i32 @_Z33__spirv_GroupNonUniformBitwiseAndiii(i32, i32, i32) #[[#Attrs]]
177+
; CHECK-SPV-IR: declare spir_func i32 @_Z32__spirv_GroupNonUniformBitwiseOriii(i32, i32, i32) #[[#Attrs]]
178+
; CHECK-SPV-IR: declare spir_func i32 @_Z33__spirv_GroupNonUniformBitwiseXoriii(i32, i32, i32) #[[#Attrs]]
179+
; CHECK-SPV-IR: declare spir_func i1 @_Z33__spirv_GroupNonUniformLogicalAndiib(i32, i32, i1) #[[#Attrs]]
180+
; CHECK-SPV-IR: declare spir_func i1 @_Z32__spirv_GroupNonUniformLogicalOriib(i32, i32, i1) #[[#Attrs]]
181+
; CHECK-SPV-IR: declare spir_func i1 @_Z33__spirv_GroupNonUniformLogicalXoriib(i32, i32, i1) #[[#Attrs]]
182+
; CHECK-SPV-IR: declare spir_func i32 @_Z32__spirv_GroupNonUniformRotateKHRiii(i32, i32, i32) #[[#Attrs]]
98183

99184
; CHECK-COMMON: attributes #[[#Attrs]] =
100185
; CHECK-COMMON-SAME: convergent
101186

102187
; SPIR-V
103-
; Version: 1.0
188+
; Version: 1.3
104189
; Generator: Khronos LLVM/SPIR-V Translator; 14
105-
; Bound: 60
190+
; Bound: 97
106191
; Schema: 0
107192
OpCapability Addresses
108193
OpCapability Linkage
@@ -111,19 +196,80 @@
111196
OpCapability ImageReadWrite
112197
OpCapability Groups
113198
OpCapability Int16
114-
OpCapability GenericPointer
199+
OpCapability GroupNonUniform
200+
OpCapability GroupNonUniformVote
201+
OpCapability GroupNonUniformArithmetic
202+
OpCapability GroupNonUniformBallot
115203
OpCapability SubgroupShuffleINTEL
116204
OpCapability SubgroupBufferBlockIOINTEL
117205
OpCapability SubgroupImageBlockIOINTEL
206+
OpCapability GroupNonUniformRotateKHR
118207
OpExtension "SPV_INTEL_subgroups"
208+
OpExtension "SPV_KHR_subgroup_rotate"
119209
%1 = OpExtInstImport "OpenCL.std"
120210
OpMemoryModel Physical32 OpenCL
121211
OpSource OpenCL_C 200000
122-
OpDecorate %s FuncParamAttr Sext
123-
OpDecorate %dst FuncParamAttr NoCapture
124-
OpDecorate %src FuncParamAttr NoCapture
125-
OpDecorate %e FuncParamAttr NoCapture
212+
OpName %foo "foo"
213+
OpName %x "x"
214+
OpName %coord "coord"
215+
OpName %c "c"
216+
OpName %s "s"
217+
OpName %f "f"
218+
OpName %n "n"
219+
OpName %p "p"
220+
OpName %image "image"
221+
OpName %v "v"
222+
OpName %entry "entry"
223+
OpName %call "call"
224+
OpName %call1 "call1"
225+
OpName %call2 "call2"
226+
OpName %call3 "call3"
227+
OpName %call4 "call4"
228+
OpName %call5 "call5"
229+
OpName %call6 "call6"
230+
OpName %call7 "call7"
231+
OpName %call8 "call8"
232+
OpName %call9 "call9"
233+
OpName %call10 "call10"
234+
OpName %call11 "call11"
235+
OpName %call12 "call12"
236+
OpName %call13 "call13"
237+
OpName %call14 "call14"
238+
OpName %call15 "call15"
239+
OpName %call16 "call16"
240+
OpName %call17 "call17"
241+
OpName %call18 "call18"
242+
OpName %call19 "call19"
243+
OpName %call20 "call20"
244+
OpName %call21 "call21"
245+
OpName %call22 "call22"
246+
OpName %call23 "call23"
247+
OpName %call24 "call24"
248+
OpName %call25 "call25"
249+
OpName %call26 "call26"
250+
OpName %call30 "call30"
251+
OpName %call31 "call31"
252+
OpName %call32 "call32"
253+
OpName %call33 "call33"
254+
OpName %call34 "call34"
255+
OpName %call35 "call35"
256+
OpName %call36 "call36"
257+
OpName %call37 "call37"
258+
OpName %call38 "call38"
259+
OpName %call39 "call39"
260+
OpName %call40 "call40"
261+
OpName %call41 "call41"
262+
OpName %call42 "call42"
263+
OpName %call43 "call43"
264+
OpName %call44 "call44"
265+
OpName %call45 "call45"
266+
OpName %call46 "call46"
267+
OpName %call47 "call47"
268+
OpName %call48 "call48"
269+
OpName %call49 "call49"
270+
OpName %call50 "call50"
126271
OpDecorate %foo LinkageAttributes "foo" Export
272+
OpDecorate %s FuncParamAttr Sext
127273
%uint = OpTypeInt 32 0
128274
%ushort = OpTypeInt 16 0
129275
%uint_0 = OpConstant %uint 0
@@ -135,39 +281,35 @@
135281
%v2uint = OpTypeVector %uint 2
136282
%float = OpTypeFloat 32
137283
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
138-
%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
139-
%Event = OpTypeEvent
140-
%_ptr_Generic_Event = OpTypePointer Generic %Event
141-
%11 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadWrite
142-
%12 = OpTypeFunction %void %uint %v2uint %uint %ushort %float %uint %_ptr_CrossWorkgroup_uint %_ptr_Workgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_Generic_Event %11
284+
%8 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadWrite
285+
%v4uint = OpTypeVector %uint 4
286+
%10 = OpTypeFunction %void %uint %v2uint %uint %ushort %float %uint %_ptr_CrossWorkgroup_uint %8 %v4uint
143287
%bool = OpTypeBool
144-
%foo = OpFunction %void None %12
288+
%foo = OpFunction %void None %10
145289
%x = OpFunctionParameter %uint
146290
%coord = OpFunctionParameter %v2uint
147291
%c = OpFunctionParameter %uint
148292
%s = OpFunctionParameter %ushort
149293
%f = OpFunctionParameter %float
150294
%n = OpFunctionParameter %uint
151295
%p = OpFunctionParameter %_ptr_CrossWorkgroup_uint
152-
%dst = OpFunctionParameter %_ptr_Workgroup_uint
153-
%src = OpFunctionParameter %_ptr_CrossWorkgroup_uint
154-
%e = OpFunctionParameter %_ptr_Generic_Event
155-
%image = OpFunctionParameter %11
296+
%image = OpFunctionParameter %8
297+
%v = OpFunctionParameter %v4uint
156298
%entry = OpLabel
157-
%28 = OpINotEqual %bool %x %uint_0
158-
%call20 = OpGroupAll %bool %uint_2 %28
159-
%call = OpSelect %uint %call20 %uint_1 %uint_0
160-
%33 = OpINotEqual %bool %x %uint_0
161-
%call121 = OpGroupAny %bool %uint_2 %33
162-
%call1 = OpSelect %uint %call121 %uint_1 %uint_0
299+
%24 = OpINotEqual %bool %x %uint_0
300+
%26 = OpGroupAll %bool %uint_2 %24
301+
%call = OpSelect %uint %26 %uint_1 %uint_0
302+
%29 = OpINotEqual %bool %x %uint_0
303+
%30 = OpGroupAny %bool %uint_2 %29
304+
%call1 = OpSelect %uint %30 %uint_1 %uint_0
163305
%call2 = OpGroupBroadcast %uint %uint_2 %x %n
164306
OpControlBarrier %uint_3 %uint_3 %uint_272
165-
%39 = OpINotEqual %bool %x %uint_0
166-
%call322 = OpGroupAll %bool %uint_3 %39
167-
%call3 = OpSelect %uint %call322 %uint_1 %uint_0
168-
%42 = OpINotEqual %bool %x %uint_0
169-
%call423 = OpGroupAny %bool %uint_3 %42
170-
%call4 = OpSelect %uint %call423 %uint_1 %uint_0
307+
%35 = OpINotEqual %bool %x %uint_0
308+
%36 = OpGroupAll %bool %uint_3 %35
309+
%call3 = OpSelect %uint %36 %uint_1 %uint_0
310+
%38 = OpINotEqual %bool %x %uint_0
311+
%39 = OpGroupAny %bool %uint_3 %38
312+
%call4 = OpSelect %uint %39 %uint_1 %uint_0
171313
%call5 = OpGroupBroadcast %uint %uint_3 %x %c
172314
%call6 = OpGroupIAdd %uint %uint_3 Reduce %x
173315
%call7 = OpGroupFAdd %float %uint_3 Reduce %f
@@ -185,5 +327,46 @@
185327
OpSubgroupBlockWriteINTEL %p %c
186328
%call19 = OpSubgroupImageBlockReadINTEL %uint %image %coord
187329
OpSubgroupImageBlockWriteINTEL %image %coord %c
330+
%56 = OpGroupNonUniformElect %bool %uint_3
331+
%call20 = OpSelect %uint %56 %uint_1 %uint_0
332+
%58 = OpINotEqual %bool %x %uint_0
333+
%59 = OpGroupNonUniformAll %bool %uint_3 %58
334+
%call21 = OpSelect %uint %59 %uint_1 %uint_0
335+
%61 = OpINotEqual %bool %x %uint_0
336+
%62 = OpGroupNonUniformAny %bool %uint_3 %61
337+
%call22 = OpSelect %uint %62 %uint_1 %uint_0
338+
%64 = OpGroupNonUniformAllEqual %bool %uint_3 %x
339+
%call23 = OpSelect %uint %64 %uint_1 %uint_0
340+
%call24 = OpGroupNonUniformBroadcast %uint %uint_3 %x %uint_1
341+
%call25 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %x
342+
%68 = OpINotEqual %bool %x %uint_0
343+
%call26 = OpGroupNonUniformBallot %v4uint %uint_3 %68
344+
%call30 = OpGroupNonUniformBallotBitCount %uint %uint_3 InclusiveScan %v
345+
%call31 = OpGroupNonUniformBallotBitCount %uint %uint_3 ExclusiveScan %v
346+
%call32 = OpGroupNonUniformBallotFindLSB %uint %uint_3 %v
347+
%call33 = OpGroupNonUniformBallotFindMSB %uint %uint_3 %v
348+
%call34 = OpGroupNonUniformIAdd %uint %uint_3 Reduce %x
349+
%call35 = OpGroupNonUniformFAdd %float %uint_3 Reduce %f
350+
%call36 = OpGroupNonUniformSMin %uint %uint_3 InclusiveScan %x
351+
%call37 = OpGroupNonUniformUMin %uint %uint_3 InclusiveScan %c
352+
%call38 = OpGroupNonUniformFMin %float %uint_3 InclusiveScan %f
353+
%call39 = OpGroupNonUniformSMax %uint %uint_3 ExclusiveScan %x
354+
%call40 = OpGroupNonUniformUMax %uint %uint_3 ExclusiveScan %c
355+
%call41 = OpGroupNonUniformFMax %float %uint_3 ExclusiveScan %f
356+
%call42 = OpGroupNonUniformIMul %uint %uint_3 Reduce %x
357+
%call43 = OpGroupNonUniformFMul %float %uint_3 Reduce %f
358+
%call44 = OpGroupNonUniformBitwiseAnd %uint %uint_3 Reduce %x
359+
%call45 = OpGroupNonUniformBitwiseOr %uint %uint_3 InclusiveScan %x
360+
%call46 = OpGroupNonUniformBitwiseXor %uint %uint_3 ExclusiveScan %x
361+
%87 = OpINotEqual %bool %x %uint_0
362+
%88 = OpGroupNonUniformLogicalAnd %bool %uint_3 Reduce %87
363+
%call47 = OpSelect %uint %88 %uint_1 %uint_0
364+
%90 = OpINotEqual %bool %x %uint_0
365+
%91 = OpGroupNonUniformLogicalOr %bool %uint_3 InclusiveScan %90
366+
%call48 = OpSelect %uint %91 %uint_1 %uint_0
367+
%93 = OpINotEqual %bool %x %uint_0
368+
%94 = OpGroupNonUniformLogicalXor %bool %uint_3 ExclusiveScan %93
369+
%call49 = OpSelect %uint %94 %uint_1 %uint_0
370+
%call50 = OpGroupNonUniformRotateKHR %uint %uint_3 %c %x
188371
OpReturn
189372
OpFunctionEnd

llvm-spirv/test/group_non_uniform_shuffle_down.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ entry:
3737
; CHECK-LLVM: [[ALLOCA_1:%[a-z0-9.]+]] = alloca %"class.sycl::_V1::detail::half_impl::half", align 2
3838
; CHECK-LLVM: [[LOAD_0:%[a-z0-9.]+]] = load %"class.sycl::_V1::detail::half_impl::half", ptr [[ALLOCA_1]], align 2
3939
; CHECK-LLVM: [[EXTRACT_0:%[a-z0-9.]+]] = extractvalue %"class.sycl::_V1::detail::half_impl::half" [[LOAD_0]], 0
40-
; CHECK-LLVM: [[CALL_0:%[a-z0-9.]+]] = call spir_func half @_Z22sub_group_shuffle_downDhj(half [[EXTRACT_0]], i32 8) #0
40+
; CHECK-LLVM: [[CALL_0:%[a-z0-9.]+]] = call spir_func half @_Z22sub_group_shuffle_downDhj(half [[EXTRACT_0]], i32 8)
4141
; CHECK-LLVM: [[INSERT_0:%[a-z0-9.]+]] = insertvalue %"class.sycl::_V1::detail::half_impl::half" [[LOAD_0]], half [[CALL_0]], 0
4242
; CHECK-LLVM: store %"class.sycl::_V1::detail::half_impl::half" [[INSERT_0]], ptr [[ALLOCA_1]], align 2
4343

0 commit comments

Comments
 (0)