Skip to content

Commit 3892315

Browse files
committed
[GlobalISel] Combine [a,s,z]ext of undef into 0 or undef
Alternative for llvm#113764 It builds on a minimalistic approach with the legality check in match and a blind apply. The precise patterns are used for better compile-time and modularity. It also moves the pattern check into combiner. While unary_undef_to_zero and propagate_undef_any_op rely on custom C++ code for pattern matching. Is there a limit on the number of patterns? G_ANYEXT of undef -> undef G_SEXT of undef -> 0 G_ZEXT of undef -> 0 The combine is not a member of the post legalizer combiner for AArch64. Test: llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
1 parent 68f7b07 commit 3892315

File tree

7 files changed

+93
-23
lines changed

7 files changed

+93
-23
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ class CombinerHelper {
150150
/// is a legal integer constant type on the target.
151151
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const;
152152

153+
/// \return true if the combine is running prior to legalization, or if \p Ty
154+
/// is a legal undef type on the target.
155+
bool isUndefLegalOrBeforeLegalizer(const LLT Ty) const;
156+
153157
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
154158
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
155159

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ def unary_undef_to_zero: GICombineRule<
428428
// replaced with undef.
429429
def propagate_undef_any_op: GICombineRule<
430430
(defs root:$root),
431-
(match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT):$root,
431+
(match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST):$root,
432432
[{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
433433
(apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
434434

@@ -1857,6 +1857,27 @@ class integer_of_opcode<Instruction castOpcode> : GICombineRule <
18571857

18581858
def integer_of_truncate : integer_of_opcode<G_TRUNC>;
18591859

1860+
def anyext_undef: GICombineRule<
1861+
(defs root:$root),
1862+
(match (G_IMPLICIT_DEF $undef),
1863+
(G_ANYEXT $root, $undef):$Aext,
1864+
[{ return Helper.isUndefLegalOrBeforeLegalizer(MRI.getType(${Aext}->getOperand(0).getReg())); }]),
1865+
(apply [{ Helper.replaceInstWithUndef(*${Aext}); }])>;
1866+
1867+
def zext_undef: GICombineRule<
1868+
(defs root:$root),
1869+
(match (G_IMPLICIT_DEF $undef),
1870+
(G_ZEXT $root, $undef):$Zext,
1871+
[{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Zext}->getOperand(0).getReg())); }]),
1872+
(apply [{ Helper.replaceInstWithConstant(*${Zext}, 0); }])>;
1873+
1874+
def sext_undef: GICombineRule<
1875+
(defs root:$root),
1876+
(match (G_IMPLICIT_DEF $undef),
1877+
(G_SEXT $root, $undef):$Sext,
1878+
[{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Sext}->getOperand(0).getReg())); }]),
1879+
(apply [{ Helper.replaceInstWithConstant(*${Sext}, 0); }])>;
1880+
18601881
def cast_of_cast_combines: GICombineGroup<[
18611882
truncate_of_zext,
18621883
truncate_of_sext,
@@ -1882,7 +1903,10 @@ def cast_combines: GICombineGroup<[
18821903
narrow_binop_and,
18831904
narrow_binop_or,
18841905
narrow_binop_xor,
1885-
integer_of_truncate
1906+
integer_of_truncate,
1907+
anyext_undef,
1908+
sext_undef,
1909+
zext_undef
18861910
]>;
18871911

18881912
def canonicalize_icmp : GICombineRule<

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,10 @@ bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const {
171171
isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
172172
}
173173

174+
bool CombinerHelper::isUndefLegalOrBeforeLegalizer(const LLT Ty) const {
175+
return isPreLegalize() || isLegal({TargetOpcode::G_IMPLICIT_DEF, {Ty}});
176+
}
177+
174178
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
175179
Register ToReg) const {
176180
Observer.changingAllUsesOfReg(MRI, FromReg);

llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,55 @@ body: |
217217
%large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
218218
$q0 = COPY %large(<2 x s64>)
219219
$d0 = COPY %bv(<2 x s32>)
220+
...
221+
---
222+
name: test_combine_anyext_undef
223+
legalized: true
224+
body: |
225+
bb.1:
226+
; CHECK-PRE-LABEL: name: test_combine_anyext_undef
227+
; CHECK-PRE: %aext:_(s64) = G_IMPLICIT_DEF
228+
; CHECK-PRE-NEXT: $x0 = COPY %aext(s64)
229+
;
230+
; CHECK-POST-LABEL: name: test_combine_anyext_undef
231+
; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
232+
; CHECK-POST-NEXT: %aext:_(s64) = G_ANYEXT %undef(s32)
233+
; CHECK-POST-NEXT: $x0 = COPY %aext(s64)
234+
%undef:_(s32) = G_IMPLICIT_DEF
235+
%aext:_(s64) = G_ANYEXT %undef(s32)
236+
$x0 = COPY %aext(s64)
237+
...
238+
---
239+
name: test_combine_sext_undef
240+
legalized: true
241+
body: |
242+
bb.1:
243+
; CHECK-PRE-LABEL: name: test_combine_sext_undef
244+
; CHECK-PRE: %sext:_(s64) = G_CONSTANT i64 0
245+
; CHECK-PRE-NEXT: $x0 = COPY %sext(s64)
246+
;
247+
; CHECK-POST-LABEL: name: test_combine_sext_undef
248+
; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
249+
; CHECK-POST-NEXT: %sext:_(s64) = G_SEXT %undef(s32)
250+
; CHECK-POST-NEXT: $x0 = COPY %sext(s64)
251+
%undef:_(s32) = G_IMPLICIT_DEF
252+
%sext:_(s64) = G_SEXT %undef(s32)
253+
$x0 = COPY %sext(s64)
254+
...
255+
---
256+
name: test_combine_zext_undef
257+
legalized: true
258+
body: |
259+
bb.1:
260+
; CHECK-PRE-LABEL: name: test_combine_zext_undef
261+
; CHECK-PRE: %zext:_(s64) = G_CONSTANT i64 0
262+
; CHECK-PRE-NEXT: $x0 = COPY %zext(s64)
263+
;
264+
; CHECK-POST-LABEL: name: test_combine_zext_undef
265+
; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
266+
; CHECK-POST-NEXT: %zext:_(s64) = G_ZEXT %undef(s32)
267+
; CHECK-POST-NEXT: $x0 = COPY %zext(s64)
268+
%undef:_(s32) = G_IMPLICIT_DEF
269+
%zext:_(s64) = G_ZEXT %undef(s32)
270+
$x0 = COPY %zext(s64)
271+
...

llvm/test/CodeGen/AArch64/extract-vector-elt.ll

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,10 @@
88
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_extract_const
99

1010
define i64 @extract_v2i64_undef_index(<2 x i64> %a, i32 %c) {
11-
; CHECK-SD-LABEL: extract_v2i64_undef_index:
12-
; CHECK-SD: // %bb.0: // %entry
13-
; CHECK-SD-NEXT: fmov x0, d0
14-
; CHECK-SD-NEXT: ret
15-
;
16-
; CHECK-GI-LABEL: extract_v2i64_undef_index:
17-
; CHECK-GI: // %bb.0: // %entry
18-
; CHECK-GI-NEXT: str q0, [sp, #-16]!
19-
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
20-
; CHECK-GI-NEXT: ldr x0, [sp], #16
21-
; CHECK-GI-NEXT: ret
11+
; CHECK-LABEL: extract_v2i64_undef_index:
12+
; CHECK: // %bb.0: // %entry
13+
; CHECK-NEXT: fmov x0, d0
14+
; CHECK-NEXT: ret
2215
entry:
2316
%d = extractelement <2 x i64> %a, i32 undef
2417
ret i64 %d

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,7 @@ body: |
261261
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16
262262
; CHECK: liveins: $vgpr0
263263
; CHECK-NEXT: {{ $}}
264-
; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
265-
; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
264+
; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0
266265
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
267266
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
268267
%arg:_(s32) = COPY $vgpr0
@@ -284,8 +283,7 @@ body: |
284283
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24
285284
; CHECK: liveins: $vgpr0
286285
; CHECK-NEXT: {{ $}}
287-
; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
288-
; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
286+
; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0
289287
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
290288
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
291289
%arg:_(s32) = COPY $vgpr0

llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4074,14 +4074,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out,
40744074
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
40754075
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
40764076
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
4077+
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
40774078
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
40784079
; VI-GISEL-NEXT: v_not_b32_e32 v2, 31
4079-
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
4080-
; VI-GISEL-NEXT: s_and_b32 s0, 0xffff, s0
40814080
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
40824081
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
40834082
; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
4084-
; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2
40854083
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
40864084
; VI-GISEL-NEXT: s_endpgm
40874085
;
@@ -4191,15 +4189,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
41914189
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
41924190
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
41934191
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
4194-
; VI-GISEL-NEXT: s_and_b32 s2, 0xffff, s0
41954192
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
41964193
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
41974194
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
4198-
; VI-GISEL-NEXT: s_lshl_b32 s0, s2, 16
41994195
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
42004196
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
42014197
; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffe0, v3
4202-
; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2
42034198
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
42044199
; VI-GISEL-NEXT: s_endpgm
42054200
;

0 commit comments

Comments
 (0)