Skip to content

Commit b1e039f

Browse files
authored
[AMDGPU] - Add constant folding for s_quadmask (#72381)
If the input is a constant we can constant fold the `s_quadmask` intrinsic.
1 parent 95acb33 commit b1e039f

File tree

2 files changed

+107
-6
lines changed

2 files changed

+107
-6
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,6 +1533,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
15331533
case Intrinsic::amdgcn_perm:
15341534
case Intrinsic::amdgcn_wave_reduce_umin:
15351535
case Intrinsic::amdgcn_wave_reduce_umax:
1536+
case Intrinsic::amdgcn_s_quadmask:
15361537
case Intrinsic::amdgcn_s_bitreplicate:
15371538
case Intrinsic::arm_mve_vctp8:
15381539
case Intrinsic::arm_mve_vctp16:
@@ -2424,6 +2425,18 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
24242425
return ConstantFP::get(Ty->getContext(), Val);
24252426
}
24262427

2428+
case Intrinsic::amdgcn_s_quadmask: {
2429+
uint64_t Val = Op->getZExtValue();
2430+
uint64_t QuadMask = 0;
2431+
for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) {
2432+
if (!(Val & 0xF))
2433+
continue;
2434+
2435+
QuadMask |= (1 << I);
2436+
}
2437+
return ConstantInt::get(Ty, QuadMask);
2438+
}
2439+
24272440
case Intrinsic::amdgcn_s_bitreplicate: {
24282441
uint64_t Val = Op->getZExtValue();
24292442
Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll

Lines changed: 94 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,60 @@
55
declare i32 @llvm.amdgcn.s.quadmask.i32(i32)
66
declare i64 @llvm.amdgcn.s.quadmask.i64(i64)
77

8+
define i32 @test_quadmask_constant_zero_i32() {
9+
; GFX11-LABEL: test_quadmask_constant_zero_i32:
10+
; GFX11: ; %bb.0: ; %entry
11+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
13+
; GFX11-NEXT: s_setpc_b64 s[30:31]
14+
entry:
15+
%qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 0)
16+
ret i32 %qm
17+
}
18+
19+
define i32 @test_quadmask_constant_neg_one_i32() {
20+
; GFX11-LABEL: test_quadmask_constant_neg_one_i32:
21+
; GFX11: ; %bb.0: ; %entry
22+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23+
; GFX11-NEXT: v_mov_b32_e32 v0, 0xff
24+
; GFX11-NEXT: s_setpc_b64 s[30:31]
25+
entry:
26+
%qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 -1)
27+
ret i32 %qm
28+
}
29+
30+
define i32 @test_quadmask_constant_undef_i32() {
31+
; GFX11-LABEL: test_quadmask_constant_undef_i32:
32+
; GFX11: ; %bb.0: ; %entry
33+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34+
; GFX11-NEXT: s_quadmask_b32 s0, s0
35+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
36+
; GFX11-NEXT: s_setpc_b64 s[30:31]
37+
entry:
38+
%qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 undef)
39+
ret i32 %qm
40+
}
41+
42+
define i32 @test_quadmask_constant_poison_i32() {
43+
; GFX11-LABEL: test_quadmask_constant_poison_i32:
44+
; GFX11: ; %bb.0: ; %entry
45+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46+
; GFX11-NEXT: s_quadmask_b32 s0, s0
47+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
48+
; GFX11-NEXT: s_setpc_b64 s[30:31]
49+
entry:
50+
%qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 poison)
51+
ret i32 %qm
52+
}
53+
854
define i32 @test_quadmask_constant_i32() {
955
; GFX11-LABEL: test_quadmask_constant_i32:
1056
; GFX11: ; %bb.0: ; %entry
1157
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12-
; GFX11-NEXT: s_quadmask_b32 s0, 0x85fe3a92
13-
; GFX11-NEXT: v_mov_b32_e32 v0, s0
58+
; GFX11-NEXT: v_mov_b32_e32 v0, 0xcb
1459
; GFX11-NEXT: s_setpc_b64 s[30:31]
1560
entry:
16-
%qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 u0x85FE3A92)
61+
%qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 u0x85003092)
1762
ret i32 %qm
1863
}
1964

@@ -50,13 +95,56 @@ define i64 @test_quadmask_constant_i64() {
5095
; GFX11-LABEL: test_quadmask_constant_i64:
5196
; GFX11: ; %bb.0: ; %entry
5297
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53-
; GFX11-NEXT: s_mov_b32 s0, 0x85fe3a92
54-
; GFX11-NEXT: s_mov_b32 s1, 0x67de48fc
98+
; GFX11-NEXT: v_dual_mov_b32 v0, 0xe3e6 :: v_dual_mov_b32 v1, 0
99+
; GFX11-NEXT: s_setpc_b64 s[30:31]
100+
entry:
101+
%qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 u0x67D000FC85F00A90)
102+
ret i64 %qm
103+
}
104+
105+
define i64 @test_quadmask_constant_zero_i64() {
106+
; GFX11-LABEL: test_quadmask_constant_zero_i64:
107+
; GFX11: ; %bb.0: ; %entry
108+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109+
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
110+
; GFX11-NEXT: s_setpc_b64 s[30:31]
111+
entry:
112+
%qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 0)
113+
ret i64 %qm
114+
}
115+
116+
define i64 @test_quadmask_constant_neg_one_i64() {
117+
; GFX11-LABEL: test_quadmask_constant_neg_one_i64:
118+
; GFX11: ; %bb.0: ; %entry
119+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120+
; GFX11-NEXT: v_dual_mov_b32 v0, 0xffff :: v_dual_mov_b32 v1, 0
121+
; GFX11-NEXT: s_setpc_b64 s[30:31]
122+
entry:
123+
%qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 -1)
124+
ret i64 %qm
125+
}
126+
127+
define i64 @test_quadmask_constant_undef_i64() {
128+
; GFX11-LABEL: test_quadmask_constant_undef_i64:
129+
; GFX11: ; %bb.0: ; %entry
130+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131+
; GFX11-NEXT: s_quadmask_b64 s[0:1], s[0:1]
132+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
133+
; GFX11-NEXT: s_setpc_b64 s[30:31]
134+
entry:
135+
%qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 undef)
136+
ret i64 %qm
137+
}
138+
139+
define i64 @test_quadmask_constant_poison_i64() {
140+
; GFX11-LABEL: test_quadmask_constant_poison_i64:
141+
; GFX11: ; %bb.0: ; %entry
142+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55143
; GFX11-NEXT: s_quadmask_b64 s[0:1], s[0:1]
56144
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
57145
; GFX11-NEXT: s_setpc_b64 s[30:31]
58146
entry:
59-
%qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 u0x67DE48FC85FE3A92)
147+
%qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 poison)
60148
ret i64 %qm
61149
}
62150

0 commit comments

Comments
 (0)