Skip to content

Commit c9eebc7

Browse files
authored
[GlobalISel] Combine redundant sext_inreg (#131624)
1 parent 7734138 commit c9eebc7

File tree

6 files changed

+299
-6
lines changed

6 files changed

+299
-6
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,10 @@ class CombinerHelper {
995995
// overflow sub
996996
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const;
997997

998+
// (sext_inreg (sext_inreg x, K0), K1)
999+
bool matchRedundantSextInReg(MachineInstr &Root, MachineInstr &Other,
1000+
BuildFnTy &MatchInfo) const;
1001+
9981002
private:
9991003
/// Checks for legality of an indexed variant of \p LdSt.
10001004
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1867,6 +1867,13 @@ def anyext_of_anyext : ext_of_ext_opcodes<G_ANYEXT, G_ANYEXT>;
18671867
def anyext_of_zext : ext_of_ext_opcodes<G_ANYEXT, G_ZEXT>;
18681868
def anyext_of_sext : ext_of_ext_opcodes<G_ANYEXT, G_SEXT>;
18691869

1870+
def sext_inreg_of_sext_inreg : GICombineRule<
1871+
(defs root:$dst, build_fn_matchinfo:$matchinfo),
1872+
(match (G_SEXT_INREG $x, $src, $a):$other,
1873+
(G_SEXT_INREG $dst, $x, $b):$root,
1874+
[{ return Helper.matchRedundantSextInReg(*${root}, *${other}, ${matchinfo}); }]),
1875+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
1876+
18701877
// Push cast through build vector.
18711878
class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
18721879
(defs root:$root, build_fn_matchinfo:$matchinfo),
@@ -1914,7 +1921,8 @@ def cast_of_cast_combines: GICombineGroup<[
19141921
sext_of_anyext,
19151922
anyext_of_anyext,
19161923
anyext_of_zext,
1917-
anyext_of_sext
1924+
anyext_of_sext,
1925+
sext_inreg_of_sext_inreg,
19181926
]>;
19191927

19201928
def cast_combines: GICombineGroup<[

llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,3 +378,38 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
378378
return false;
379379
}
380380
}
381+
382+
bool CombinerHelper::matchRedundantSextInReg(MachineInstr &Root,
383+
MachineInstr &Other,
384+
BuildFnTy &MatchInfo) const {
385+
assert(Root.getOpcode() == TargetOpcode::G_SEXT_INREG &&
386+
Other.getOpcode() == TargetOpcode::G_SEXT_INREG);
387+
388+
unsigned RootWidth = Root.getOperand(2).getImm();
389+
unsigned OtherWidth = Other.getOperand(2).getImm();
390+
391+
Register Dst = Root.getOperand(0).getReg();
392+
Register OtherDst = Other.getOperand(0).getReg();
393+
Register Src = Other.getOperand(1).getReg();
394+
395+
if (RootWidth >= OtherWidth) {
396+
// The root sext_inreg is entirely redundant because the other one
397+
// is narrower.
398+
if (!canReplaceReg(Dst, OtherDst, MRI))
399+
return false;
400+
401+
MatchInfo = [=](MachineIRBuilder &B) {
402+
Observer.changingAllUsesOfReg(MRI, Dst);
403+
MRI.replaceRegWith(Dst, OtherDst);
404+
Observer.finishedChangingAllUsesOfReg();
405+
};
406+
} else {
407+
// RootWidth < OtherWidth, rewrite this G_SEXT_INREG with the source of the
408+
// other G_SEXT_INREG.
409+
MatchInfo = [=](MachineIRBuilder &B) {
410+
B.buildSExtInReg(Dst, Src, RootWidth);
411+
};
412+
}
413+
414+
return true;
415+
}
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner %s -o - | FileCheck %s
3+
4+
---
5+
name: inreg8_inreg16
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $vgpr0
10+
; CHECK-LABEL: name: inreg8_inreg16
11+
; CHECK: liveins: $vgpr0
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
14+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
15+
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
16+
%copy:_(s32) = COPY $vgpr0
17+
%inreg:_(s32) = G_SEXT_INREG %copy, 8
18+
%inreg1:_(s32) = G_SEXT_INREG %inreg, 16
19+
$vgpr0 = COPY %inreg1
20+
...
21+
22+
---
23+
name: inreg16_inreg16
24+
tracksRegLiveness: true
25+
body: |
26+
bb.0:
27+
liveins: $vgpr0
28+
; CHECK-LABEL: name: inreg16_inreg16
29+
; CHECK: liveins: $vgpr0
30+
; CHECK-NEXT: {{ $}}
31+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
32+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
33+
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
34+
%copy:_(s32) = COPY $vgpr0
35+
%inreg:_(s32) = G_SEXT_INREG %copy, 16
36+
%inreg1:_(s32) = G_SEXT_INREG %inreg, 16
37+
$vgpr0 = COPY %inreg1
38+
...
39+
40+
---
41+
name: inreg16_inreg8
42+
tracksRegLiveness: true
43+
body: |
44+
bb.0:
45+
liveins: $vgpr0
46+
; CHECK-LABEL: name: inreg16_inreg8
47+
; CHECK: liveins: $vgpr0
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
50+
; CHECK-NEXT: %inreg1:_(s32) = G_SEXT_INREG %copy, 8
51+
; CHECK-NEXT: $vgpr0 = COPY %inreg1(s32)
52+
%copy:_(s32) = COPY $vgpr0
53+
%inreg:_(s32) = G_SEXT_INREG %copy, 16
54+
%inreg1:_(s32) = G_SEXT_INREG %inreg, 8
55+
$vgpr0 = COPY %inreg1
56+
...
57+
58+
---
59+
name: inreg16_inreg32_64bit
60+
tracksRegLiveness: true
61+
body: |
62+
bb.0:
63+
liveins: $vgpr0_vgpr1
64+
; CHECK-LABEL: name: inreg16_inreg32_64bit
65+
; CHECK: liveins: $vgpr0_vgpr1
66+
; CHECK-NEXT: {{ $}}
67+
; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
68+
; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 16
69+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
70+
%copy:_(s64) = COPY $vgpr0_vgpr1
71+
%inreg:_(s64) = G_SEXT_INREG %copy, 16
72+
%inreg1:_(s64) = G_SEXT_INREG %inreg, 32
73+
$vgpr0_vgpr1 = COPY %inreg1
74+
...
75+
76+
---
77+
name: inreg32_inreg32_64bit
78+
tracksRegLiveness: true
79+
body: |
80+
bb.0:
81+
liveins: $vgpr0_vgpr1
82+
; CHECK-LABEL: name: inreg32_inreg32_64bit
83+
; CHECK: liveins: $vgpr0_vgpr1
84+
; CHECK-NEXT: {{ $}}
85+
; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
86+
; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 32
87+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
88+
%copy:_(s64) = COPY $vgpr0_vgpr1
89+
%inreg:_(s64) = G_SEXT_INREG %copy, 32
90+
%inreg1:_(s64) = G_SEXT_INREG %inreg, 32
91+
$vgpr0_vgpr1 = COPY %inreg1
92+
...
93+
94+
---
95+
name: inreg32_inreg16_64bit
96+
tracksRegLiveness: true
97+
body: |
98+
bb.0:
99+
liveins: $vgpr0_vgpr1
100+
; CHECK-LABEL: name: inreg32_inreg16_64bit
101+
; CHECK: liveins: $vgpr0_vgpr1
102+
; CHECK-NEXT: {{ $}}
103+
; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
104+
; CHECK-NEXT: %inreg1:_(s64) = G_SEXT_INREG %copy, 16
105+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg1(s64)
106+
%copy:_(s64) = COPY $vgpr0_vgpr1
107+
%inreg:_(s64) = G_SEXT_INREG %copy, 32
108+
%inreg1:_(s64) = G_SEXT_INREG %inreg, 16
109+
$vgpr0_vgpr1 = COPY %inreg1
110+
...
111+
112+
---
113+
name: vector_inreg8_inreg16
114+
tracksRegLiveness: true
115+
body: |
116+
bb.0:
117+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
118+
; CHECK-LABEL: name: vector_inreg8_inreg16
119+
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
120+
; CHECK-NEXT: {{ $}}
121+
; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
122+
; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
123+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
124+
%copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
125+
%inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
126+
%inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
127+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
128+
...
129+
130+
---
131+
name: vector_inreg16_inreg16
132+
tracksRegLiveness: true
133+
body: |
134+
bb.0:
135+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
136+
; CHECK-LABEL: name: vector_inreg16_inreg16
137+
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
138+
; CHECK-NEXT: {{ $}}
139+
; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
140+
; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
141+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
142+
%copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
143+
%inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
144+
%inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
145+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
146+
...
147+
148+
---
149+
name: vector_inreg16_inreg8
150+
tracksRegLiveness: true
151+
body: |
152+
bb.0:
153+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
154+
; CHECK-LABEL: name: vector_inreg16_inreg8
155+
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
156+
; CHECK-NEXT: {{ $}}
157+
; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
158+
; CHECK-NEXT: %inreg1:_(<4 x s32>) = G_SEXT_INREG %copy, 8
159+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1(<4 x s32>)
160+
%copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
161+
%inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
162+
%inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 8
163+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
164+
...
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner %s -o - | FileCheck %s
3+
4+
# Check (sext (trunc (sext_inreg x))) can be folded, as it's a pattern that can arise when
5+
# CGP widening of uniform i16 ops is disabled.
6+
# Two separate combines make it happen (sext_trunc and sext_inreg_of_sext_inreg).
7+
8+
---
9+
name: trunc_s16_inreg_8
10+
tracksRegLiveness: true
11+
body: |
12+
bb.0:
13+
liveins: $vgpr0
14+
; CHECK-LABEL: name: trunc_s16_inreg_8
15+
; CHECK: liveins: $vgpr0
16+
; CHECK-NEXT: {{ $}}
17+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
18+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
19+
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
20+
%copy:_(s32) = COPY $vgpr0
21+
%inreg:_(s32) = G_SEXT_INREG %copy, 8
22+
%trunc:_(s16) = G_TRUNC %inreg
23+
%sext:_(s32) = G_SEXT %trunc
24+
$vgpr0 = COPY %sext
25+
...
26+
27+
---
28+
name: trunc_s16_inreg_16
29+
tracksRegLiveness: true
30+
body: |
31+
bb.0:
32+
liveins: $vgpr0
33+
; CHECK-LABEL: name: trunc_s16_inreg_16
34+
; CHECK: liveins: $vgpr0
35+
; CHECK-NEXT: {{ $}}
36+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
37+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
38+
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
39+
%copy:_(s32) = COPY $vgpr0
40+
%inreg:_(s32) = G_SEXT_INREG %copy, 16
41+
%trunc:_(s16) = G_TRUNC %inreg
42+
%sext:_(s32) = G_SEXT %trunc
43+
$vgpr0 = COPY %sext
44+
...
45+
46+
---
47+
name: trunc_s8_inreg_16
48+
tracksRegLiveness: true
49+
body: |
50+
bb.0:
51+
liveins: $vgpr0
52+
; CHECK-LABEL: name: trunc_s8_inreg_16
53+
; CHECK: liveins: $vgpr0
54+
; CHECK-NEXT: {{ $}}
55+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
56+
; CHECK-NEXT: %sext:_(s32) = G_SEXT_INREG %copy, 8
57+
; CHECK-NEXT: $vgpr0 = COPY %sext(s32)
58+
%copy:_(s32) = COPY $vgpr0
59+
%inreg:_(s32) = G_SEXT_INREG %copy, 16
60+
%trunc:_(s8) = G_TRUNC %inreg
61+
%sext:_(s32) = G_SEXT %trunc
62+
$vgpr0 = COPY %sext
63+
...
64+
65+
# TODO?: We could handle this by inserting a trunc, but I'm not sure how useful that'd be.
66+
---
67+
name: mismatching_types
68+
tracksRegLiveness: true
69+
body: |
70+
bb.0:
71+
liveins: $vgpr0
72+
; CHECK-LABEL: name: mismatching_types
73+
; CHECK: liveins: $vgpr0
74+
; CHECK-NEXT: {{ $}}
75+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
76+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
77+
; CHECK-NEXT: %trunc:_(s8) = G_TRUNC %inreg(s32)
78+
; CHECK-NEXT: %sext:_(s16) = G_SEXT %trunc(s8)
79+
; CHECK-NEXT: %anyext:_(s32) = G_ANYEXT %sext(s16)
80+
; CHECK-NEXT: $vgpr0 = COPY %anyext(s32)
81+
%copy:_(s32) = COPY $vgpr0
82+
%inreg:_(s32) = G_SEXT_INREG %copy, 8
83+
%trunc:_(s8) = G_TRUNC %inreg
84+
%sext:_(s16) = G_SEXT %trunc
85+
%anyext:_(s32) = G_ANYEXT %sext
86+
$vgpr0 = COPY %anyext
87+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,6 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
223223
; GFX10: ; %bb.0:
224224
; GFX10-NEXT: s_sext_i32_i8 s0, s0
225225
; GFX10-NEXT: s_sext_i32_i8 s1, s1
226-
; GFX10-NEXT: s_sext_i32_i16 s0, s0
227-
; GFX10-NEXT: s_sext_i32_i16 s1, s1
228226
; GFX10-NEXT: s_abs_i32 s0, s0
229227
; GFX10-NEXT: s_abs_i32 s1, s1
230228
; GFX10-NEXT: ; return to shader part epilog
@@ -308,9 +306,6 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
308306
; GFX10-NEXT: s_sext_i32_i8 s0, s0
309307
; GFX10-NEXT: s_sext_i32_i8 s1, s1
310308
; GFX10-NEXT: s_sext_i32_i8 s2, s2
311-
; GFX10-NEXT: s_sext_i32_i16 s0, s0
312-
; GFX10-NEXT: s_sext_i32_i16 s1, s1
313-
; GFX10-NEXT: s_sext_i32_i16 s2, s2
314309
; GFX10-NEXT: s_abs_i32 s0, s0
315310
; GFX10-NEXT: s_abs_i32 s1, s1
316311
; GFX10-NEXT: s_abs_i32 s2, s2

0 commit comments

Comments
 (0)