Skip to content

Commit 1462053

Browse files
committed
[AMDGPU] Propagate constants for llvm.amdgcn.wave.reduce.umin/umax
Reviewed By: arsenm, #amdgpu Differential Revision: https://reviews.llvm.org/D156077
1 parent a32023e commit 1462053

File tree

4 files changed

+83
-78
lines changed

4 files changed

+83
-78
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,6 +1548,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
15481548
case Intrinsic::vector_reduce_umax:
15491549
// Target intrinsics
15501550
case Intrinsic::amdgcn_perm:
1551+
case Intrinsic::amdgcn_wave_reduce_umin:
1552+
case Intrinsic::amdgcn_wave_reduce_umax:
15511553
case Intrinsic::arm_mve_vctp8:
15521554
case Intrinsic::arm_mve_vctp16:
15531555
case Intrinsic::arm_mve_vctp32:
@@ -2839,6 +2841,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
28392841
return Constant::getNullValue(Ty);
28402842

28412843
return ConstantInt::get(Ty, C0->abs());
2844+
case Intrinsic::amdgcn_wave_reduce_umin:
2845+
case Intrinsic::amdgcn_wave_reduce_umax:
2846+
return dyn_cast<Constant>(Operands[0]);
28422847
}
28432848

28442849
return nullptr;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -242,72 +242,34 @@ entry:
242242
define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
243243
; GFX8DAGISEL-LABEL: poison_value:
244244
; GFX8DAGISEL: ; %bb.0: ; %entry
245-
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
246-
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
247-
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
248-
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
249-
; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0
250245
; GFX8DAGISEL-NEXT: s_endpgm
251246
;
252247
; GFX8GISEL-LABEL: poison_value:
253248
; GFX8GISEL: ; %bb.0: ; %entry
254-
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
255-
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
256-
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
257-
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
258-
; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0
259249
; GFX8GISEL-NEXT: s_endpgm
260250
;
261251
; GFX9DAGISEL-LABEL: poison_value:
262252
; GFX9DAGISEL: ; %bb.0: ; %entry
263-
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
264-
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
265-
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
266-
; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
267253
; GFX9DAGISEL-NEXT: s_endpgm
268254
;
269255
; GFX9GISEL-LABEL: poison_value:
270256
; GFX9GISEL: ; %bb.0: ; %entry
271-
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
272-
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
273-
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
274-
; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1]
275257
; GFX9GISEL-NEXT: s_endpgm
276258
;
277259
; GFX10DAGISEL-LABEL: poison_value:
278260
; GFX10DAGISEL: ; %bb.0: ; %entry
279-
; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
280-
; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
281-
; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
282-
; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
283261
; GFX10DAGISEL-NEXT: s_endpgm
284262
;
285263
; GFX10GISEL-LABEL: poison_value:
286264
; GFX10GISEL: ; %bb.0: ; %entry
287-
; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
288-
; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
289-
; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
290-
; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1]
291265
; GFX10GISEL-NEXT: s_endpgm
292266
;
293267
; GFX11DAGISEL-LABEL: poison_value:
294268
; GFX11DAGISEL: ; %bb.0: ; %entry
295-
; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
296-
; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
297-
; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
298-
; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1]
299-
; GFX11DAGISEL-NEXT: s_nop 0
300-
; GFX11DAGISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
301269
; GFX11DAGISEL-NEXT: s_endpgm
302270
;
303271
; GFX11GISEL-LABEL: poison_value:
304272
; GFX11GISEL: ; %bb.0: ; %entry
305-
; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
306-
; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
307-
; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
308-
; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1]
309-
; GFX11GISEL-NEXT: s_nop 0
310-
; GFX11GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
311273
; GFX11GISEL-NEXT: s_endpgm
312274
entry:
313275
%result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1)

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll

Lines changed: 2 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -240,83 +240,45 @@ entry:
240240
ret void
241241
}
242242

243-
define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
243+
define amdgpu_kernel void @poison_value(ptr addrspace(1) %out) {
244244
; GFX8DAGISEL-LABEL: poison_value:
245245
; GFX8DAGISEL: ; %bb.0: ; %entry
246-
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
247-
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
248-
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
249-
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
250-
; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0
251246
; GFX8DAGISEL-NEXT: s_endpgm
252247
;
253248
; GFX8GISEL-LABEL: poison_value:
254249
; GFX8GISEL: ; %bb.0: ; %entry
255-
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
256-
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
257-
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
258-
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
259-
; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0
260250
; GFX8GISEL-NEXT: s_endpgm
261251
;
262252
; GFX9DAGISEL-LABEL: poison_value:
263253
; GFX9DAGISEL: ; %bb.0: ; %entry
264-
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
265-
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
266-
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
267-
; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
268254
; GFX9DAGISEL-NEXT: s_endpgm
269255
;
270256
; GFX9GISEL-LABEL: poison_value:
271257
; GFX9GISEL: ; %bb.0: ; %entry
272-
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
273-
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
274-
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
275-
; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1]
276258
; GFX9GISEL-NEXT: s_endpgm
277259
;
278260
; GFX10DAGISEL-LABEL: poison_value:
279261
; GFX10DAGISEL: ; %bb.0: ; %entry
280-
; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
281-
; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
282-
; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
283-
; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
284262
; GFX10DAGISEL-NEXT: s_endpgm
285263
;
286264
; GFX10GISEL-LABEL: poison_value:
287265
; GFX10GISEL: ; %bb.0: ; %entry
288-
; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
289-
; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
290-
; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
291-
; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1]
292266
; GFX10GISEL-NEXT: s_endpgm
293267
;
294268
; GFX11DAGISEL-LABEL: poison_value:
295269
; GFX11DAGISEL: ; %bb.0: ; %entry
296-
; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
297-
; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
298-
; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
299-
; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1]
300-
; GFX11DAGISEL-NEXT: s_nop 0
301-
; GFX11DAGISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
302270
; GFX11DAGISEL-NEXT: s_endpgm
303271
;
304272
; GFX11GISEL-LABEL: poison_value:
305273
; GFX11GISEL: ; %bb.0: ; %entry
306-
; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
307-
; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
308-
; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
309-
; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1]
310-
; GFX11GISEL-NEXT: s_nop 0
311-
; GFX11GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
312274
; GFX11GISEL-NEXT: s_endpgm
313275
entry:
314276
%result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1)
315277
store i32 %result, ptr addrspace(1) %out
316278
ret void
317279
}
318280

319-
define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
281+
define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
320282
; GFX8DAGISEL-LABEL: divergent_value:
321283
; GFX8DAGISEL: ; %bb.0: ; %entry
322284
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
3+
4+
; --------------------------------------------------------------------
5+
; llvm.amdgcn.wave.reduce.umin.i32
6+
; --------------------------------------------------------------------
7+
8+
declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg)
9+
10+
define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_poison(ptr addrspace(1) %out, i32 %in) {
11+
; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_poison(
12+
; CHECK-NEXT: entry:
13+
; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
14+
; CHECK-NEXT: ret void
15+
;
16+
entry:
17+
%result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1)
18+
store i32 %result, ptr addrspace(1) %out
19+
ret void
20+
}
21+
22+
define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_const(ptr addrspace(1) %out) {
23+
; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_const(
24+
; CHECK-NEXT: entry:
25+
; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
26+
; CHECK-NEXT: ret void
27+
;
28+
entry:
29+
%result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 123, i32 1)
30+
store i32 %result, ptr addrspace(1) %out
31+
ret void
32+
}
33+
34+
; --------------------------------------------------------------------
35+
; llvm.amdgcn.wave.reduce.umin.i32
36+
; --------------------------------------------------------------------
37+
38+
declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg)
39+
40+
define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_poison(ptr addrspace(1) %out, i32 %in) {
41+
; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_poison(
42+
; CHECK-NEXT: entry:
43+
; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
44+
; CHECK-NEXT: ret void
45+
;
46+
entry:
47+
%result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1)
48+
store i32 %result, ptr addrspace(1) %out
49+
ret void
50+
}
51+
52+
define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_const(ptr addrspace(1) %out) {
53+
; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_const(
54+
; CHECK-NEXT: entry:
55+
; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
56+
; CHECK-NEXT: ret void
57+
;
58+
entry:
59+
%result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 123, i32 1)
60+
store i32 %result, ptr addrspace(1) %out
61+
ret void
62+
}
63+
64+
@gv = constant i32 0
65+
define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_gv(ptr addrspace(1) %out) {
66+
; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_gv(
67+
; CHECK-NEXT: entry:
68+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 ptrtoint (ptr @gv to i32), i32 1)
69+
; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT:%.*]], align 4
70+
; CHECK-NEXT: ret void
71+
;
72+
entry:
73+
%result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 ptrtoint (ptr @gv to i32), i32 1)
74+
store i32 %result, ptr addrspace(1) %out
75+
ret void
76+
}

0 commit comments

Comments
 (0)