Skip to content

Commit 3c36de5

Browse files
committed
GlobalISel: Try to fold G_SEXT_INREG to G_AND with mask
Copies the same transform from the DAG. Helps eliminate some codegen diffs to allowed shared checks in a future change. Not sure if apply supports anything better than C++ fragments for the result. It's also not really reasonable that every combine has to set the default insertion point.
1 parent 56c1660 commit 3c36de5

File tree

3 files changed

+133
-1
lines changed

3 files changed

+133
-1
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ class CombinerHelper {
129129
return KB;
130130
}
131131

132+
MachineIRBuilder &getBuilder() const {
133+
return Builder;
134+
}
135+
132136
const TargetLowering &getTargetLowering() const;
133137

134138
/// \returns true if the combiner is running pre-legalization.

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,22 @@ def sext_inreg_of_load : GICombineRule<
169169
[{ return Helper.matchSextInRegOfLoad(*${root}, ${matchinfo}); }]),
170170
(apply [{ Helper.applySextInRegOfLoad(*${root}, ${matchinfo}); }])>;
171171

172+
def sext_inreg_to_zext_inreg : GICombineRule<
173+
(defs root:$dst),
174+
(match
175+
(G_SEXT_INREG $dst, $src, $imm):$root,
176+
[{
177+
unsigned BitWidth = MRI.getType(${src}.getReg()).getScalarSizeInBits();
178+
return Helper.getKnownBits()->maskedValueIsZero(${src}.getReg(),
179+
APInt::getOneBitSet(BitWidth, ${imm}.getImm() - 1)); }]),
180+
(apply [{
181+
Helper.getBuilder().setInstrAndDebugLoc(*${root});
182+
Helper.getBuilder().buildZExtInReg(${dst}, ${src}, ${imm}.getImm());
183+
${root}->eraseFromParent();
184+
return true;
185+
}])
186+
>;
187+
172188
def combine_indexed_load_store : GICombineRule<
173189
(defs root:$root, indexed_load_store_matchdata:$matchinfo),
174190
(match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root,
@@ -1030,7 +1046,8 @@ def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
10301046

10311047
def known_bits_simplifications : GICombineGroup<[
10321048
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
1033-
zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits]>;
1049+
zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
1050+
sext_inreg_to_zext_inreg]>;
10341051

10351052
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
10361053
narrow_binop_feeding_and]>;
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: sext_inreg_i32_8_and_neg255
6+
legalized: true
7+
tracksRegLiveness: true
8+
body: |
9+
bb.0:
10+
liveins: $vgpr0_vgpr1
11+
12+
; CHECK-LABEL: name: sext_inreg_i32_8_and_neg255
13+
; CHECK: liveins: $vgpr0_vgpr1
14+
; CHECK-NEXT: {{ $}}
15+
; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
16+
; CHECK-NEXT: %load:_(s32) = G_LOAD %ptr(p1) :: (volatile load (s32), addrspace 1)
17+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
18+
; CHECK-NEXT: %inreg:_(s32) = G_AND %load, [[C]]
19+
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
20+
%ptr:_(p1) = COPY $vgpr0_vgpr1
21+
%load:_(s32) = G_LOAD %ptr :: (volatile load (s32), addrspace 1, align 4)
22+
%mask:_(s32) = G_CONSTANT i32 -255
23+
%and:_(s32) = G_AND %load, %mask
24+
%inreg:_(s32) = G_SEXT_INREG %and, 8
25+
$vgpr0 = COPY %inreg
26+
27+
...
28+
29+
---
30+
name: sext_inreg_i32_8_and_255
31+
legalized: true
32+
tracksRegLiveness: true
33+
body: |
34+
bb.0:
35+
liveins: $vgpr0_vgpr1
36+
37+
; CHECK-LABEL: name: sext_inreg_i32_8_and_255
38+
; CHECK: liveins: $vgpr0_vgpr1
39+
; CHECK-NEXT: {{ $}}
40+
; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
41+
; CHECK-NEXT: %load:_(s32) = G_LOAD %ptr(p1) :: (volatile load (s32), addrspace 1)
42+
; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 255
43+
; CHECK-NEXT: %and:_(s32) = G_AND %load, %mask
44+
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %and, 8
45+
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
46+
%ptr:_(p1) = COPY $vgpr0_vgpr1
47+
%load:_(s32) = G_LOAD %ptr :: (volatile load (s32), addrspace 1, align 4)
48+
%mask:_(s32) = G_CONSTANT i32 255
49+
%and:_(s32) = G_AND %load, %mask
50+
%inreg:_(s32) = G_SEXT_INREG %and, 8
51+
$vgpr0 = COPY %inreg
52+
53+
...
54+
55+
---
56+
name: sext_inreg_v2i32_8_and_neg255
57+
legalized: true
58+
tracksRegLiveness: true
59+
body: |
60+
bb.0:
61+
liveins: $vgpr0_vgpr1
62+
63+
; CHECK-LABEL: name: sext_inreg_v2i32_8_and_neg255
64+
; CHECK: liveins: $vgpr0_vgpr1
65+
; CHECK-NEXT: {{ $}}
66+
; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
67+
; CHECK-NEXT: %load:_(<2 x s32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x s32>), addrspace 1)
68+
; CHECK-NEXT: %mask_elt:_(s32) = G_CONSTANT i32 -255
69+
; CHECK-NEXT: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt(s32), %mask_elt(s32)
70+
; CHECK-NEXT: %and:_(<2 x s32>) = G_AND %load, %mask
71+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
72+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
73+
; CHECK-NEXT: %inreg:_(<2 x s32>) = G_AND %and, [[BUILD_VECTOR]]
74+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x s32>)
75+
%ptr:_(p1) = COPY $vgpr0_vgpr1
76+
%load:_(<2 x s32>) = G_LOAD %ptr :: (volatile load (<2 x s32>), addrspace 1, align 8)
77+
%mask_elt:_(s32) = G_CONSTANT i32 -255
78+
%mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt, %mask_elt
79+
%and:_(<2 x s32>) = G_AND %load, %mask
80+
%inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8
81+
$vgpr0_vgpr1 = COPY %inreg
82+
83+
...
84+
85+
---
86+
name: sext_inreg_v2i32_8_and_255
87+
legalized: true
88+
tracksRegLiveness: true
89+
body: |
90+
bb.0:
91+
liveins: $vgpr0_vgpr1
92+
93+
; CHECK-LABEL: name: sext_inreg_v2i32_8_and_255
94+
; CHECK: liveins: $vgpr0_vgpr1
95+
; CHECK-NEXT: {{ $}}
96+
; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
97+
; CHECK-NEXT: %load:_(<2 x s32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x s32>), addrspace 1)
98+
; CHECK-NEXT: %mask_elt:_(s32) = G_CONSTANT i32 255
99+
; CHECK-NEXT: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt(s32), %mask_elt(s32)
100+
; CHECK-NEXT: %and:_(<2 x s32>) = G_AND %load, %mask
101+
; CHECK-NEXT: %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8
102+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x s32>)
103+
%ptr:_(p1) = COPY $vgpr0_vgpr1
104+
%load:_(<2 x s32>) = G_LOAD %ptr :: (volatile load (<2 x s32>), addrspace 1, align 8)
105+
%mask_elt:_(s32) = G_CONSTANT i32 255
106+
%mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt, %mask_elt
107+
%and:_(<2 x s32>) = G_AND %load, %mask
108+
%inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8
109+
$vgpr0_vgpr1 = COPY %inreg
110+
111+
...

0 commit comments

Comments
 (0)