Skip to content

Commit f85e7ab

Browse files
authored
[AMDGPU] - Add constant folding to s_wqm intrinsic (llvm#72382)
Fold any constant input to the `s_wqm` intrinsic.
1 parent f335883 commit f85e7ab

File tree

2 files changed

+97
-5
lines changed

2 files changed

+97
-5
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,6 +1533,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
15331533
case Intrinsic::amdgcn_perm:
15341534
case Intrinsic::amdgcn_wave_reduce_umin:
15351535
case Intrinsic::amdgcn_wave_reduce_umax:
1536+
case Intrinsic::amdgcn_s_wqm:
15361537
case Intrinsic::amdgcn_s_quadmask:
15371538
case Intrinsic::amdgcn_s_bitreplicate:
15381539
case Intrinsic::arm_mve_vctp8:
@@ -2425,6 +2426,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
24252426
return ConstantFP::get(Ty->getContext(), Val);
24262427
}
24272428

2429+
case Intrinsic::amdgcn_s_wqm: {
2430+
uint64_t Val = Op->getZExtValue();
2431+
Val |= (Val & 0x5555555555555555ULL) << 1 |
2432+
((Val >> 1) & 0x5555555555555555ULL);
2433+
Val |= (Val & 0x3333333333333333ULL) << 2 |
2434+
((Val >> 2) & 0x3333333333333333ULL);
2435+
return ConstantInt::get(Ty, Val);
2436+
}
2437+
24282438
case Intrinsic::amdgcn_s_quadmask: {
24292439
uint64_t Val = Op->getZExtValue();
24302440
uint64_t QuadMask = 0;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,55 @@ define i32 @test_s_wqm_constant_i32() {
99
; GFX11-LABEL: test_s_wqm_constant_i32:
1010
; GFX11: ; %bb.0:
1111
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12-
; GFX11-NEXT: s_wqm_b32 s0, 0x85fe3a92
12+
; GFX11-NEXT: v_mov_b32_e32 v0, 0xff00ff0f
13+
; GFX11-NEXT: s_setpc_b64 s[30:31]
14+
%br = call i32 @llvm.amdgcn.s.wqm.i32(i32 u0x85003A02)
15+
ret i32 %br
16+
}
17+
18+
define i32 @test_s_wqm_constant_zero_i32() {
19+
; GFX11-LABEL: test_s_wqm_constant_zero_i32:
20+
; GFX11: ; %bb.0:
21+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
23+
; GFX11-NEXT: s_setpc_b64 s[30:31]
24+
%br = call i32 @llvm.amdgcn.s.wqm.i32(i32 0)
25+
ret i32 %br
26+
}
27+
28+
define i32 @test_s_wqm_constant_neg_one_i32() {
29+
; GFX11-LABEL: test_s_wqm_constant_neg_one_i32:
30+
; GFX11: ; %bb.0:
31+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32+
; GFX11-NEXT: v_mov_b32_e32 v0, -1
33+
; GFX11-NEXT: s_setpc_b64 s[30:31]
34+
%br = call i32 @llvm.amdgcn.s.wqm.i32(i32 -1)
35+
ret i32 %br
36+
}
37+
38+
define i32 @test_s_wqm_constant_undef_i32() {
39+
; GFX11-LABEL: test_s_wqm_constant_undef_i32:
40+
; GFX11: ; %bb.0:
41+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42+
; GFX11-NEXT: s_wqm_b32 s0, s0
43+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
44+
; GFX11-NEXT: s_setpc_b64 s[30:31]
45+
%br = call i32 @llvm.amdgcn.s.wqm.i32(i32 undef)
46+
ret i32 %br
47+
}
48+
49+
define i32 @test_s_wqm_constant_poison_i32() {
50+
; GFX11-LABEL: test_s_wqm_constant_poison_i32:
51+
; GFX11: ; %bb.0:
52+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53+
; GFX11-NEXT: s_wqm_b32 s0, s0
1354
; GFX11-NEXT: v_mov_b32_e32 v0, s0
1455
; GFX11-NEXT: s_setpc_b64 s[30:31]
15-
%br = call i32 @llvm.amdgcn.s.wqm.i32(i32 u0x85FE3A92)
56+
%br = call i32 @llvm.amdgcn.s.wqm.i32(i32 poison)
1657
ret i32 %br
1758
}
1859

60+
1961
define amdgpu_cs void @test_s_wqm_sgpr_i32(i32 inreg %mask, ptr addrspace(1) %out) {
2062
; GFX11-LABEL: test_s_wqm_sgpr_i32:
2163
; GFX11: ; %bb.0: ; %entry
@@ -48,12 +90,52 @@ define i64 @test_s_wqm_constant_i64() {
4890
; GFX11-LABEL: test_s_wqm_constant_i64:
4991
; GFX11: ; %bb.0:
5092
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51-
; GFX11-NEXT: s_mov_b32 s0, 0x85fe3a92
52-
; GFX11-NEXT: s_mov_b32 s1, 0x3a9285fe
93+
; GFX11-NEXT: v_mov_b32_e32 v0, 0xff00ffff
94+
; GFX11-NEXT: v_mov_b32_e32 v1, 0xffff0fff
95+
; GFX11-NEXT: s_setpc_b64 s[30:31]
96+
%br = call i64 @llvm.amdgcn.s.wqm.i64(i64 u0x12480FDBAC00753E)
97+
ret i64 %br
98+
}
99+
100+
define i64 @test_s_wqm_constant_zero_i64() {
101+
; GFX11-LABEL: test_s_wqm_constant_zero_i64:
102+
; GFX11: ; %bb.0:
103+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104+
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
105+
; GFX11-NEXT: s_setpc_b64 s[30:31]
106+
%br = call i64 @llvm.amdgcn.s.wqm.i64(i64 0)
107+
ret i64 %br
108+
}
109+
110+
define i64 @test_s_wqm_constant_neg_one_i64() {
111+
; GFX11-LABEL: test_s_wqm_constant_neg_one_i64:
112+
; GFX11: ; %bb.0:
113+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114+
; GFX11-NEXT: v_dual_mov_b32 v0, -1 :: v_dual_mov_b32 v1, -1
115+
; GFX11-NEXT: s_setpc_b64 s[30:31]
116+
%br = call i64 @llvm.amdgcn.s.wqm.i64(i64 -1)
117+
ret i64 %br
118+
}
119+
120+
define i64 @test_s_wqm_constant_undef_i64() {
121+
; GFX11-LABEL: test_s_wqm_constant_undef_i64:
122+
; GFX11: ; %bb.0:
123+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124+
; GFX11-NEXT: s_wqm_b64 s[0:1], s[0:1]
125+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
126+
; GFX11-NEXT: s_setpc_b64 s[30:31]
127+
%br = call i64 @llvm.amdgcn.s.wqm.i64(i64 undef)
128+
ret i64 %br
129+
}
130+
131+
define i64 @test_s_wqm_constant_poison_i64() {
132+
; GFX11-LABEL: test_s_wqm_constant_poison_i64:
133+
; GFX11: ; %bb.0:
134+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53135
; GFX11-NEXT: s_wqm_b64 s[0:1], s[0:1]
54136
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
55137
; GFX11-NEXT: s_setpc_b64 s[30:31]
56-
%br = call i64 @llvm.amdgcn.s.wqm.i64(i64 u0x3A9285FE85FE3A92)
138+
%br = call i64 @llvm.amdgcn.s.wqm.i64(i64 poison)
57139
ret i64 %br
58140
}
59141

0 commit comments

Comments
 (0)