Skip to content

Commit b309bc0

Browse files
committed
[GlobalISel] Combine out-of-range shifts to undef.
Differential Revision: https://reviews.llvm.org/D144303
1 parent d6d59e6 commit b309bc0

File tree

9 files changed

+172
-31
lines changed

9 files changed

+172
-31
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,9 @@ class CombinerHelper {
789789
/// (X ^ Y) != X -> Y != 0
790790
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo);
791791

792+
/// Match shifts greater or equal to the bitwidth of the operation.
793+
bool matchShiftsTooBig(MachineInstr &MI);
794+
792795
private:
793796
/// Given a non-indexed load or store instruction \p MI, find an offset that
794797
/// can be usefully and legally folded into it as a post-indexing operation.

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,12 @@ def ptr_add_immed_chain : GICombineRule<
205205
[{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]),
206206
(apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>;
207207

208+
def shifts_too_big : GICombineRule<
209+
(defs root:$root),
210+
(match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$root,
211+
[{ return Helper.matchShiftsTooBig(*${root}); }]),
212+
(apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
213+
208214
// Fold shift (shift base x), y -> shift base, (x+y), if shifts are same
209215
def shift_immed_matchdata : GIDefMatchData<"RegisterImmPair">;
210216
def shift_immed_chain : GICombineRule<
@@ -1089,7 +1095,7 @@ def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
10891095
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
10901096
extract_vec_elt_combines, combines_for_extload,
10911097
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
1092-
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands,
1098+
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
10931099
reassocs, ptr_add_immed_chain,
10941100
shl_ashr_to_sext_inreg, sext_inreg_of_load,
10951101
width_reduction_combines, select_combines,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6194,6 +6194,16 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
61946194
return CmpInst::isEquality(Pred) && Y.isValid();
61956195
}
61966196

6197+
bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
6198+
Register ShiftReg = MI.getOperand(2).getReg();
6199+
LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6200+
auto IsShiftTooBig = [&](const Constant *C) {
6201+
auto *CI = dyn_cast<ConstantInt>(C);
6202+
return CI && CI->uge(ResTy.getScalarSizeInBits());
6203+
};
6204+
return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6205+
}
6206+
61976207
bool CombinerHelper::tryCombine(MachineInstr &MI) {
61986208
if (tryCombineCopy(MI))
61996209
return true;
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
---
4+
name: shl_by_ge_bw
5+
alignment: 4
6+
tracksRegLiveness: true
7+
liveins:
8+
- { reg: '$w0' }
9+
body: |
10+
bb.1:
11+
liveins: $w0
12+
13+
; CHECK-LABEL: name: shl_by_ge_bw
14+
; CHECK: liveins: $w0
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
17+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16)
18+
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
19+
; CHECK-NEXT: RET_ReallyLR implicit $w0
20+
%1:_(s32) = COPY $w0
21+
%0:_(s16) = G_TRUNC %1(s32)
22+
%2:_(s16) = G_CONSTANT i16 20
23+
%3:_(s16) = G_SHL %0, %2(s16)
24+
%4:_(s32) = G_ANYEXT %3(s16)
25+
$w0 = COPY %4(s32)
26+
RET_ReallyLR implicit $w0
27+
28+
...
29+
---
30+
name: lshr_by_ge_bw
31+
alignment: 4
32+
tracksRegLiveness: true
33+
liveins:
34+
- { reg: '$w0' }
35+
body: |
36+
bb.1:
37+
liveins: $w0
38+
39+
; CHECK-LABEL: name: lshr_by_ge_bw
40+
; CHECK: liveins: $w0
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
43+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16)
44+
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
45+
; CHECK-NEXT: RET_ReallyLR implicit $w0
46+
%1:_(s32) = COPY $w0
47+
%0:_(s16) = G_TRUNC %1(s32)
48+
%2:_(s16) = G_CONSTANT i16 16
49+
%3:_(s16) = G_LSHR %0, %2(s16)
50+
%4:_(s32) = G_ANYEXT %3(s16)
51+
$w0 = COPY %4(s32)
52+
RET_ReallyLR implicit $w0
53+
54+
...
55+
---
56+
name: ashr_by_ge_bw
57+
alignment: 4
58+
tracksRegLiveness: true
59+
liveins:
60+
- { reg: '$w0' }
61+
body: |
62+
bb.1:
63+
liveins: $w0
64+
65+
; CHECK-LABEL: name: ashr_by_ge_bw
66+
; CHECK: liveins: $w0
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
69+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16)
70+
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
71+
; CHECK-NEXT: RET_ReallyLR implicit $w0
72+
%1:_(s32) = COPY $w0
73+
%0:_(s16) = G_TRUNC %1(s32)
74+
%2:_(s16) = G_CONSTANT i16 20
75+
%3:_(s16) = G_ASHR %0, %2(s16)
76+
%4:_(s32) = G_ANYEXT %3(s16)
77+
$w0 = COPY %4(s32)
78+
RET_ReallyLR implicit $w0
79+
80+
...
81+
---
82+
name: shl_by_ge_bw_vector
83+
alignment: 4
84+
tracksRegLiveness: true
85+
liveins:
86+
- { reg: '$q0' }
87+
body: |
88+
bb.1:
89+
liveins: $q0
90+
91+
; CHECK-LABEL: name: shl_by_ge_bw_vector
92+
; CHECK: liveins: $q0
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: %shl:_(<4 x s32>) = G_IMPLICIT_DEF
95+
; CHECK-NEXT: $q0 = COPY %shl(<4 x s32>)
96+
; CHECK-NEXT: RET_ReallyLR implicit $q0
97+
%1:_(<4 x s32>) = COPY $q0
98+
%0:_(s32) = G_CONSTANT i32 32
99+
%bv:_(<4 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0
100+
%shl:_(<4 x s32>) = G_SHL %1, %bv(<4 x s32>)
101+
$q0 = COPY %shl(<4 x s32>)
102+
RET_ReallyLR implicit $q0
103+
104+
...
105+
---
106+
name: shl_by_ge_bw_vector_partial
107+
alignment: 4
108+
tracksRegLiveness: true
109+
liveins:
110+
- { reg: '$q0' }
111+
body: |
112+
bb.1:
113+
liveins: $q0
114+
115+
; CHECK-LABEL: name: shl_by_ge_bw_vector_partial
116+
; CHECK: liveins: $q0
117+
; CHECK-NEXT: {{ $}}
118+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
119+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
120+
; CHECK-NEXT: %small:_(s32) = G_CONSTANT i32 4
121+
; CHECK-NEXT: %bv:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), %small(s32)
122+
; CHECK-NEXT: %shl:_(<4 x s32>) = G_SHL [[COPY]], %bv(<4 x s32>)
123+
; CHECK-NEXT: $q0 = COPY %shl(<4 x s32>)
124+
; CHECK-NEXT: RET_ReallyLR implicit $q0
125+
%1:_(<4 x s32>) = COPY $q0
126+
%0:_(s32) = G_CONSTANT i32 32
127+
%small:_(s32) = G_CONSTANT i32 4
128+
%bv:_(<4 x s32>) = G_BUILD_VECTOR %0, %0, %0, %small
129+
%shl:_(<4 x s32>) = G_SHL %1, %bv(<4 x s32>)
130+
$q0 = COPY %shl(<4 x s32>)
131+
RET_ReallyLR implicit $q0
132+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,9 @@ body: |
261261
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16
262262
; CHECK: liveins: $vgpr0
263263
; CHECK-NEXT: {{ $}}
264-
; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0
265-
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %arg
264+
; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
265+
; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
266+
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
266267
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
267268
%arg:_(s32) = COPY $vgpr0
268269
%trunc:_(s16) = G_TRUNC %arg
@@ -283,8 +284,9 @@ body: |
283284
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24
284285
; CHECK: liveins: $vgpr0
285286
; CHECK-NEXT: {{ $}}
286-
; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0
287-
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %arg
287+
; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
288+
; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
289+
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
288290
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
289291
%arg:_(s32) = COPY $vgpr0
290292
%trunc:_(s16) = G_TRUNC %arg

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ashr-narrow.mir

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,8 @@ body: |
121121
; CHECK-LABEL: name: narrow_ashr_s64_64
122122
; CHECK: liveins: $vgpr0_vgpr1
123123
; CHECK-NEXT: {{ $}}
124-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
125-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
126-
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
127-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64)
124+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
125+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
128126
%0:_(s64) = COPY $vgpr0_vgpr1
129127
%1:_(s32) = G_CONSTANT i32 64
130128
%2:_(s64) = G_ASHR %0, %1
@@ -141,10 +139,8 @@ body: |
141139
; CHECK-LABEL: name: narrow_ashr_s64_65
142140
; CHECK: liveins: $vgpr0_vgpr1
143141
; CHECK-NEXT: {{ $}}
144-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
145-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
146-
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
147-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64)
142+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
143+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
148144
%0:_(s64) = COPY $vgpr0_vgpr1
149145
%1:_(s32) = G_CONSTANT i32 65
150146
%2:_(s64) = G_ASHR %0, %1

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-narrow.mir

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,8 @@ body: |
119119
; CHECK-LABEL: name: narrow_lshr_s64_64
120120
; CHECK: liveins: $vgpr0_vgpr1
121121
; CHECK-NEXT: {{ $}}
122-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
123-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
124-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32)
125-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64)
122+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
123+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
126124
%0:_(s64) = COPY $vgpr0_vgpr1
127125
%1:_(s32) = G_CONSTANT i32 64
128126
%2:_(s64) = G_LSHR %0, %1
@@ -139,10 +137,8 @@ body: |
139137
; CHECK-LABEL: name: narrow_lshr_s64_65
140138
; CHECK: liveins: $vgpr0_vgpr1
141139
; CHECK-NEXT: {{ $}}
142-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
143-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
144-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32)
145-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64)
140+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
141+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
146142
%0:_(s64) = COPY $vgpr0_vgpr1
147143
%1:_(s32) = G_CONSTANT i32 65
148144
%2:_(s64) = G_LSHR %0, %1

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,8 @@ body: |
120120
; CHECK-LABEL: name: narrow_shl_s64_64
121121
; CHECK: liveins: $vgpr0_vgpr1
122122
; CHECK-NEXT: {{ $}}
123-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
124-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
125-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
126-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64)
123+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
124+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
127125
%0:_(s64) = COPY $vgpr0_vgpr1
128126
%1:_(s32) = G_CONSTANT i32 64
129127
%2:_(s64) = G_SHL %0, %1
@@ -140,10 +138,8 @@ body: |
140138
; CHECK-LABEL: name: narrow_shl_s64_65
141139
; CHECK: liveins: $vgpr0_vgpr1
142140
; CHECK-NEXT: {{ $}}
143-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
144-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
145-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
146-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64)
141+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
142+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
147143
%0:_(s64) = COPY $vgpr0_vgpr1
148144
%1:_(s32) = G_CONSTANT i32 65
149145
%2:_(s64) = G_SHL %0, %1

llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -889,7 +889,7 @@ define amdgpu_ps <2 x i32> @s_sext_inreg_v4i16_14(<4 x i16> inreg %value) {
889889
;
890890
; GFX8-LABEL: s_sext_inreg_v4i16_14:
891891
; GFX8: ; %bb.0:
892-
; GFX8-NEXT: s_bfe_u32 s0, 0, 0x100000
892+
; GFX8-NEXT: s_bfe_u32 s0, -1, 0x100000
893893
; GFX8-NEXT: s_mov_b32 s1, s0
894894
; GFX8-NEXT: ; return to shader part epilog
895895
;
@@ -978,7 +978,7 @@ define <4 x float> @v_sext_inreg_v8i16_11(<8 x i16> %value) {
978978
; GFX8-LABEL: v_sext_inreg_v8i16_11:
979979
; GFX8: ; %bb.0:
980980
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
981-
; GFX8-NEXT: s_bfe_u32 s4, 0, 0x100000
981+
; GFX8-NEXT: s_bfe_u32 s4, -1, 0x100000
982982
; GFX8-NEXT: v_mov_b32_e32 v0, s4
983983
; GFX8-NEXT: v_mov_b32_e32 v1, s4
984984
; GFX8-NEXT: v_mov_b32_e32 v2, s4

0 commit comments

Comments
 (0)