Skip to content

Commit 3f3c679

Browse files
committed
[GlobalISel] Combine redundant sext_inreg
1 parent 4feac2f commit 3f3c679

File tree

6 files changed

+289
-6
lines changed

6 files changed

+289
-6
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,9 @@ class CombinerHelper {
994994
// overflow sub
995995
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const;
996996

997+
// (sext_inreg (sext_inreg x, K0), K1)
998+
void applyRedundantSextInReg(MachineInstr &Root, MachineInstr &Other) const;
999+
9971000
private:
9981001
/// Checks for legality of an indexed variant of \p LdSt.
9991002
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1849,6 +1849,12 @@ def anyext_of_anyext : ext_of_ext_opcodes<G_ANYEXT, G_ANYEXT>;
18491849
def anyext_of_zext : ext_of_ext_opcodes<G_ANYEXT, G_ZEXT>;
18501850
def anyext_of_sext : ext_of_ext_opcodes<G_ANYEXT, G_SEXT>;
18511851

1852+
def sext_inreg_of_sext_inreg : GICombineRule<
1853+
(defs root:$dst),
1854+
(match (G_SEXT_INREG $x, $src, $a):$other,
1855+
(G_SEXT_INREG $dst, $x, $b):$root),
1856+
(apply [{ Helper.applyRedundantSextInReg(*${root}, *${other}); }])>;
1857+
18521858
// Push cast through build vector.
18531859
class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
18541860
(defs root:$root, build_fn_matchinfo:$matchinfo),
@@ -1896,7 +1902,8 @@ def cast_of_cast_combines: GICombineGroup<[
18961902
sext_of_anyext,
18971903
anyext_of_anyext,
18981904
anyext_of_zext,
1899-
anyext_of_sext
1905+
anyext_of_sext,
1906+
sext_inreg_of_sext_inreg,
19001907
]>;
19011908

19021909
def cast_combines: GICombineGroup<[

llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,3 +378,30 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
378378
return false;
379379
}
380380
}
381+
382+
void CombinerHelper::applyRedundantSextInReg(MachineInstr &Root,
383+
MachineInstr &Other) const {
384+
assert(Root.getOpcode() == TargetOpcode::G_SEXT_INREG &&
385+
Other.getOpcode() == TargetOpcode::G_SEXT_INREG);
386+
387+
unsigned RootWidth = Root.getOperand(2).getImm();
388+
unsigned OtherWidth = Other.getOperand(2).getImm();
389+
390+
Register Dst = Root.getOperand(0).getReg();
391+
Register OtherDst = Other.getOperand(0).getReg();
392+
Register Src = Other.getOperand(1).getReg();
393+
394+
if (RootWidth >= OtherWidth) {
395+
// The root sext_inreg is entirely redundant because the other one
396+
// is narrower.
397+
Observer.changingAllUsesOfReg(MRI, Dst);
398+
MRI.replaceRegWith(Dst, OtherDst);
399+
Observer.finishedChangingAllUsesOfReg();
400+
} else {
401+
// RootWidth < OtherWidth, rewrite this G_SEXT_INREG with the source of the
402+
// other G_SEXT_INREG.
403+
Builder.buildSExtInReg(Dst, Src, RootWidth);
404+
}
405+
406+
Root.eraseFromParent();
407+
}
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: inreg8_inreg16
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $vgpr0
10+
; CHECK-LABEL: name: inreg8_inreg16
11+
; CHECK: liveins: $vgpr0
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
14+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
15+
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
16+
%copy:_(s32) = COPY $vgpr0
17+
%inreg:_(s32) = G_SEXT_INREG %copy, 8
18+
%inreg1:_(s32) = G_SEXT_INREG %inreg, 16
19+
$vgpr0 = COPY %inreg1
20+
...
21+
22+
---
23+
name: inreg16_inreg16
24+
tracksRegLiveness: true
25+
body: |
26+
bb.0:
27+
liveins: $vgpr0
28+
; CHECK-LABEL: name: inreg16_inreg16
29+
; CHECK: liveins: $vgpr0
30+
; CHECK-NEXT: {{ $}}
31+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
32+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
33+
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
34+
%copy:_(s32) = COPY $vgpr0
35+
%inreg:_(s32) = G_SEXT_INREG %copy, 16
36+
%inreg1:_(s32) = G_SEXT_INREG %inreg, 16
37+
$vgpr0 = COPY %inreg1
38+
...
39+
40+
---
41+
name: inreg16_inreg8
42+
tracksRegLiveness: true
43+
body: |
44+
bb.0:
45+
liveins: $vgpr0
46+
; CHECK-LABEL: name: inreg16_inreg8
47+
; CHECK: liveins: $vgpr0
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
50+
; CHECK-NEXT: %inreg1:_(s32) = G_SEXT_INREG %copy, 8
51+
; CHECK-NEXT: $vgpr0 = COPY %inreg1(s32)
52+
%copy:_(s32) = COPY $vgpr0
53+
%inreg:_(s32) = G_SEXT_INREG %copy, 16
54+
%inreg1:_(s32) = G_SEXT_INREG %inreg, 8
55+
$vgpr0 = COPY %inreg1
56+
...
57+
58+
---
59+
name: inreg16_inreg32_64bit
60+
tracksRegLiveness: true
61+
body: |
62+
bb.0:
63+
liveins: $vgpr0_vgpr1
64+
; CHECK-LABEL: name: inreg16_inreg32_64bit
65+
; CHECK: liveins: $vgpr0_vgpr1
66+
; CHECK-NEXT: {{ $}}
67+
; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
68+
; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 16
69+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
70+
%copy:_(s64) = COPY $vgpr0_vgpr1
71+
%inreg:_(s64) = G_SEXT_INREG %copy, 16
72+
%inreg1:_(s64) = G_SEXT_INREG %inreg, 32
73+
$vgpr0_vgpr1 = COPY %inreg1
74+
...
75+
76+
---
77+
name: inreg32_inreg32_64bit
78+
tracksRegLiveness: true
79+
body: |
80+
bb.0:
81+
liveins: $vgpr0_vgpr1
82+
; CHECK-LABEL: name: inreg32_inreg32_64bit
83+
; CHECK: liveins: $vgpr0_vgpr1
84+
; CHECK-NEXT: {{ $}}
85+
; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
86+
; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 32
87+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
88+
%copy:_(s64) = COPY $vgpr0_vgpr1
89+
%inreg:_(s64) = G_SEXT_INREG %copy, 32
90+
%inreg1:_(s64) = G_SEXT_INREG %inreg, 32
91+
$vgpr0_vgpr1 = COPY %inreg1
92+
...
93+
94+
---
95+
name: inreg32_inreg16_64bit
96+
tracksRegLiveness: true
97+
body: |
98+
bb.0:
99+
liveins: $vgpr0_vgpr1
100+
; CHECK-LABEL: name: inreg32_inreg16_64bit
101+
; CHECK: liveins: $vgpr0_vgpr1
102+
; CHECK-NEXT: {{ $}}
103+
; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
104+
; CHECK-NEXT: %inreg1:_(s64) = G_SEXT_INREG %copy, 16
105+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg1(s64)
106+
%copy:_(s64) = COPY $vgpr0_vgpr1
107+
%inreg:_(s64) = G_SEXT_INREG %copy, 32
108+
%inreg1:_(s64) = G_SEXT_INREG %inreg, 16
109+
$vgpr0_vgpr1 = COPY %inreg1
110+
...
111+
112+
---
113+
name: vector_inreg8_inreg16
114+
tracksRegLiveness: true
115+
body: |
116+
bb.0:
117+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
118+
; CHECK-LABEL: name: vector_inreg8_inreg16
119+
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
120+
; CHECK-NEXT: {{ $}}
121+
; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
122+
; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
123+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
124+
%copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
125+
%inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
126+
%inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
127+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
128+
...
129+
130+
---
131+
name: vector_inreg16_inreg16
132+
tracksRegLiveness: true
133+
body: |
134+
bb.0:
135+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
136+
; CHECK-LABEL: name: vector_inreg16_inreg16
137+
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
138+
; CHECK-NEXT: {{ $}}
139+
; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
140+
; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
141+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
142+
%copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
143+
%inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
144+
%inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
145+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
146+
...
147+
148+
---
149+
name: vector_inreg16_inreg8
150+
tracksRegLiveness: true
151+
body: |
152+
bb.0:
153+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
154+
; CHECK-LABEL: name: vector_inreg16_inreg8
155+
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
156+
; CHECK-NEXT: {{ $}}
157+
; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
158+
; CHECK-NEXT: %inreg1:_(<4 x s32>) = G_SEXT_INREG %copy, 8
159+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1(<4 x s32>)
160+
%copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
161+
%inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
162+
%inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 8
163+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
164+
...
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
# Check (sext (trunc (sext_inreg x))) can be folded, as it's a pattern that can arise when
5+
# CGP widening of uniform i16 ops is disabled.
6+
# Two separate combines make it happen (sext_trunc and sext_inreg_of_sext_inreg).
7+
8+
---
9+
name: trunc_s16_inreg_8
10+
tracksRegLiveness: true
11+
body: |
12+
bb.0:
13+
liveins: $vgpr0
14+
; CHECK-LABEL: name: trunc_s16_inreg_8
15+
; CHECK: liveins: $vgpr0
16+
; CHECK-NEXT: {{ $}}
17+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
18+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
19+
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
20+
%copy:_(s32) = COPY $vgpr0
21+
%inreg:_(s32) = G_SEXT_INREG %copy, 8
22+
%trunc:_(s16) = G_TRUNC %inreg
23+
%sext:_(s32) = G_SEXT %trunc
24+
$vgpr0 = COPY %sext
25+
...
26+
27+
---
28+
name: trunc_s16_inreg_16
29+
tracksRegLiveness: true
30+
body: |
31+
bb.0:
32+
liveins: $vgpr0
33+
; CHECK-LABEL: name: trunc_s16_inreg_16
34+
; CHECK: liveins: $vgpr0
35+
; CHECK-NEXT: {{ $}}
36+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
37+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
38+
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
39+
%copy:_(s32) = COPY $vgpr0
40+
%inreg:_(s32) = G_SEXT_INREG %copy, 16
41+
%trunc:_(s16) = G_TRUNC %inreg
42+
%sext:_(s32) = G_SEXT %trunc
43+
$vgpr0 = COPY %sext
44+
...
45+
46+
---
47+
name: trunc_s8_inreg_16
48+
tracksRegLiveness: true
49+
body: |
50+
bb.0:
51+
liveins: $vgpr0
52+
; CHECK-LABEL: name: trunc_s8_inreg_16
53+
; CHECK: liveins: $vgpr0
54+
; CHECK-NEXT: {{ $}}
55+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
56+
; CHECK-NEXT: %sext:_(s32) = G_SEXT_INREG %copy, 8
57+
; CHECK-NEXT: $vgpr0 = COPY %sext(s32)
58+
%copy:_(s32) = COPY $vgpr0
59+
%inreg:_(s32) = G_SEXT_INREG %copy, 16
60+
%trunc:_(s8) = G_TRUNC %inreg
61+
%sext:_(s32) = G_SEXT %trunc
62+
$vgpr0 = COPY %sext
63+
...
64+
65+
# TODO?: We could handle this by inserting a trunc, but I'm not sure how useful that'd be.
66+
---
67+
name: mismatching_types
68+
tracksRegLiveness: true
69+
body: |
70+
bb.0:
71+
liveins: $vgpr0
72+
; CHECK-LABEL: name: mismatching_types
73+
; CHECK: liveins: $vgpr0
74+
; CHECK-NEXT: {{ $}}
75+
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
76+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
77+
; CHECK-NEXT: %trunc:_(s8) = G_TRUNC %inreg(s32)
78+
; CHECK-NEXT: %sext:_(s16) = G_SEXT %trunc(s8)
79+
; CHECK-NEXT: %anyext:_(s32) = G_ANYEXT %sext(s16)
80+
; CHECK-NEXT: $vgpr0 = COPY %anyext(s32)
81+
%copy:_(s32) = COPY $vgpr0
82+
%inreg:_(s32) = G_SEXT_INREG %copy, 8
83+
%trunc:_(s8) = G_TRUNC %inreg
84+
%sext:_(s16) = G_SEXT %trunc
85+
%anyext:_(s32) = G_ANYEXT %sext
86+
$vgpr0 = COPY %anyext
87+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,6 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
223223
; GFX10: ; %bb.0:
224224
; GFX10-NEXT: s_sext_i32_i8 s0, s0
225225
; GFX10-NEXT: s_sext_i32_i8 s1, s1
226-
; GFX10-NEXT: s_sext_i32_i16 s0, s0
227-
; GFX10-NEXT: s_sext_i32_i16 s1, s1
228226
; GFX10-NEXT: s_abs_i32 s0, s0
229227
; GFX10-NEXT: s_abs_i32 s1, s1
230228
; GFX10-NEXT: ; return to shader part epilog
@@ -308,9 +306,6 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
308306
; GFX10-NEXT: s_sext_i32_i8 s0, s0
309307
; GFX10-NEXT: s_sext_i32_i8 s1, s1
310308
; GFX10-NEXT: s_sext_i32_i8 s2, s2
311-
; GFX10-NEXT: s_sext_i32_i16 s0, s0
312-
; GFX10-NEXT: s_sext_i32_i16 s1, s1
313-
; GFX10-NEXT: s_sext_i32_i16 s2, s2
314309
; GFX10-NEXT: s_abs_i32 s0, s0
315310
; GFX10-NEXT: s_abs_i32 s1, s1
316311
; GFX10-NEXT: s_abs_i32 s2, s2

0 commit comments

Comments
 (0)