Skip to content

Commit d2e66d7

Browse files
[GlobalISel] Add a combine for and(load , mask) -> zextload
This only handles simple masks, not shifted masks, for now. Reviewed By: aemerson Differential Revision: https://reviews.llvm.org/D109357
1 parent e4da0f9 commit d2e66d7

File tree

15 files changed

+782
-793
lines changed

15 files changed

+782
-793
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,9 @@ class CombinerHelper {
172172
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
173173
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
174174

175+
/// Match (and (load x), mask) -> zextload x
176+
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo);
177+
175178
/// Combine \p MI into a pre-indexed or post-indexed load/store operation if
176179
/// legal and the surrounding code makes it useful.
177180
bool tryCombineIndexedLoadStore(MachineInstr &MI);

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,13 @@ def extending_loads : GICombineRule<
130130
(match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD):$root,
131131
[{ return Helper.matchCombineExtendingLoads(*${root}, ${matchinfo}); }]),
132132
(apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>;
133-
def combines_for_extload: GICombineGroup<[extending_loads]>;
133+
134+
def load_and_mask : GICombineRule<
135+
(defs root:$root, build_fn_matchinfo:$matchinfo),
136+
(match (wip_match_opcode G_AND):$root,
137+
[{ return Helper.matchCombineLoadWithAndMask(*${root}, ${matchinfo}); }]),
138+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
139+
def combines_for_extload: GICombineGroup<[extending_loads, load_and_mask]>;
134140

135141
def sext_trunc_sextload : GICombineRule<
136142
(defs root:$d),

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,76 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
633633
Observer.changedInstr(MI);
634634
}
635635

636+
bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
637+
BuildFnTy &MatchInfo) {
638+
assert(MI.getOpcode() == TargetOpcode::G_AND);
639+
640+
// If we have the following code:
641+
// %mask = G_CONSTANT 255
642+
// %ld = G_LOAD %ptr, (load s16)
643+
// %and = G_AND %ld, %mask
644+
//
645+
// Try to fold it into
646+
// %ld = G_ZEXTLOAD %ptr, (load s8)
647+
648+
Register Dst = MI.getOperand(0).getReg();
649+
if (MRI.getType(Dst).isVector())
650+
return false;
651+
652+
auto MaybeMask =
653+
getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
654+
if (!MaybeMask)
655+
return false;
656+
657+
APInt MaskVal = MaybeMask->Value;
658+
659+
if (!MaskVal.isMask())
660+
return false;
661+
662+
Register SrcReg = MI.getOperand(1).getReg();
663+
GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
664+
if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
665+
!LoadMI->isSimple())
666+
return false;
667+
668+
Register LoadReg = LoadMI->getDstReg();
669+
LLT LoadTy = MRI.getType(LoadReg);
670+
Register PtrReg = LoadMI->getPointerReg();
671+
uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
672+
unsigned MaskSizeBits = MaskVal.countTrailingOnes();
673+
674+
// The mask may not be larger than the in-memory type, as it might cover sign
675+
// extended bits
676+
if (MaskSizeBits > LoadSizeBits)
677+
return false;
678+
679+
// If the mask covers the whole destination register, there's nothing to
680+
// extend
681+
if (MaskSizeBits >= LoadTy.getSizeInBits())
682+
return false;
683+
684+
// Most targets cannot deal with loads of size < 8 and need to re-legalize to
685+
// at least byte loads. Avoid creating such loads here
686+
if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
687+
return false;
688+
689+
const MachineMemOperand &MMO = LoadMI->getMMO();
690+
LegalityQuery::MemDesc MemDesc(MMO);
691+
MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
692+
if (!isLegalOrBeforeLegalizer(
693+
{TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
694+
return false;
695+
696+
MatchInfo = [=](MachineIRBuilder &B) {
697+
B.setInstrAndDebugLoc(*LoadMI);
698+
auto &MF = B.getMF();
699+
auto PtrInfo = MMO.getPointerInfo();
700+
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
701+
B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
702+
};
703+
return true;
704+
}
705+
636706
bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
637707
const MachineInstr &UseMI) {
638708
assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -aarch64prelegalizercombinerhelper-only-enable-rule="load_and_mask" -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
# REQUIRES: asserts
5+
6+
# Check that we can fold and ({any,zext,sext}load, mask) -> zextload
7+
8+
---
9+
name: test_anyext_1
10+
tracksRegLiveness: true
11+
body: |
12+
bb.0:
13+
liveins: $x0
14+
; CHECK-LABEL: name: test_anyext_1
15+
; CHECK: liveins: $x0
16+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
17+
; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
18+
; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8))
19+
; CHECK: [[AND:%[0-9]+]]:_(s8) = G_AND [[LOAD]], [[C]]
20+
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s8)
21+
; CHECK: $w0 = COPY [[ANYEXT]](s32)
22+
%0:_(p0) = COPY $x0
23+
%1:_(s8) = G_CONSTANT i8 1
24+
%2:_(s8) = G_LOAD %0 :: (load (s8))
25+
%3:_(s8) = G_AND %2, %1
26+
%4:_(s32) = G_ANYEXT %3
27+
$w0 = COPY %4
28+
...
29+
30+
---
31+
name: test_anyext_s16
32+
tracksRegLiveness: true
33+
body: |
34+
bb.0:
35+
liveins: $x0
36+
; CHECK-LABEL: name: test_anyext_s16
37+
; CHECK: liveins: $x0
38+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
39+
; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s16) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
40+
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXTLOAD]](s16)
41+
; CHECK: $w0 = COPY [[ANYEXT]](s32)
42+
%0:_(p0) = COPY $x0
43+
%1:_(s16) = G_CONSTANT i16 255
44+
%2:_(s16) = G_LOAD %0 :: (load (s8))
45+
%3:_(s16) = G_AND %2, %1
46+
%4:_(s32) = G_ANYEXT %3
47+
$w0 = COPY %4
48+
...
49+
50+
---
51+
name: test_anyext_s32
52+
tracksRegLiveness: true
53+
body: |
54+
bb.0:
55+
liveins: $x0
56+
; CHECK-LABEL: name: test_anyext_s32
57+
; CHECK: liveins: $x0
58+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
59+
; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
60+
; CHECK: $w0 = COPY [[ZEXTLOAD]](s32)
61+
%0:_(p0) = COPY $x0
62+
%1:_(s32) = G_CONSTANT i32 255
63+
%2:_(s32) = G_LOAD %0 :: (load (s8))
64+
%3:_(s32) = G_AND %2, %1
65+
$w0 = COPY %3
66+
...
67+
68+
---
69+
name: test_load_s32
70+
tracksRegLiveness: true
71+
body: |
72+
bb.0:
73+
liveins: $x0
74+
; CHECK-LABEL: name: test_load_s32
75+
; CHECK: liveins: $x0
76+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
77+
; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8), align 4)
78+
; CHECK: $w0 = COPY [[ZEXTLOAD]](s32)
79+
%0:_(p0) = COPY $x0
80+
%1:_(s32) = G_CONSTANT i32 255
81+
%2:_(s32) = G_LOAD %0 :: (load (s32))
82+
%3:_(s32) = G_AND %2, %1
83+
$w0 = COPY %3
84+
...
85+
86+
87+
---
88+
name: test_load_mask_size_equals_dst_size
89+
tracksRegLiveness: true
90+
body: |
91+
bb.0:
92+
liveins: $x0
93+
94+
; The combine should only apply if the mask zeroes actual bits of the dst type
95+
; If it doesn't, the mask is redundant and we have other combines to fold it away
96+
97+
; CHECK-LABEL: name: test_load_mask_size_equals_dst_size
98+
; CHECK: liveins: $x0
99+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
100+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
101+
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
102+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
103+
; CHECK: $w0 = COPY [[AND]](s32)
104+
%0:_(p0) = COPY $x0
105+
%1:_(s32) = G_CONSTANT i32 4294967295
106+
%2:_(s32) = G_LOAD %0 :: (load (s32))
107+
%3:_(s32) = G_AND %2, %1
108+
$w0 = COPY %3
109+
...
110+
111+
---
112+
name: test_zext
113+
tracksRegLiveness: true
114+
body: |
115+
bb.0:
116+
liveins: $x0
117+
; CHECK-LABEL: name: test_zext
118+
; CHECK: liveins: $x0
119+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
120+
; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8), align 2)
121+
; CHECK: $w0 = COPY [[ZEXTLOAD]](s32)
122+
%0:_(p0) = COPY $x0
123+
%1:_(s32) = G_CONSTANT i32 255
124+
%2:_(s32) = G_ZEXTLOAD %0 :: (load (s16))
125+
%3:_(s32) = G_AND %2, %1
126+
$w0 = COPY %3
127+
...
128+
129+
---
130+
name: test_zext_mask_larger_memsize
131+
tracksRegLiveness: true
132+
body: |
133+
bb.0:
134+
liveins: $x0
135+
136+
; The combine should only apply if the mask narrows the memory size.
137+
; We have another combine that folds redundant masks
138+
139+
; CHECK-LABEL: name: test_zext_mask_larger_memsize
140+
; CHECK: liveins: $x0
141+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
142+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
143+
; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
144+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ZEXTLOAD]], [[C]]
145+
; CHECK: $w0 = COPY [[AND]](s32)
146+
%0:_(p0) = COPY $x0
147+
%1:_(s32) = G_CONSTANT i32 65535
148+
%2:_(s32) = G_ZEXTLOAD %0 :: (load (s8))
149+
%3:_(s32) = G_AND %2, %1
150+
$w0 = COPY %3
151+
...
152+
153+
---
154+
name: test_sext
155+
tracksRegLiveness: true
156+
body: |
157+
bb.0:
158+
liveins: $x0
159+
; CHECK-LABEL: name: test_sext
160+
; CHECK: liveins: $x0
161+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
162+
; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8), align 2)
163+
; CHECK: $w0 = COPY [[ZEXTLOAD]](s32)
164+
%0:_(p0) = COPY $x0
165+
%1:_(s32) = G_CONSTANT i32 255
166+
%2:_(s32) = G_SEXTLOAD %0 :: (load (s16))
167+
%3:_(s32) = G_AND %2, %1
168+
$w0 = COPY %3
169+
...
170+
171+
---
172+
name: test_sext_mask_larger_memsize
173+
tracksRegLiveness: true
174+
body: |
175+
bb.0:
176+
liveins: $x0
177+
; CHECK-LABEL: name: test_sext_mask_larger_memsize
178+
; CHECK: liveins: $x0
179+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
180+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
181+
; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
182+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXTLOAD]], [[C]]
183+
; CHECK: $w0 = COPY [[AND]](s32)
184+
%0:_(p0) = COPY $x0
185+
%1:_(s32) = G_CONSTANT i32 65535
186+
%2:_(s32) = G_SEXTLOAD %0 :: (load (s8))
187+
%3:_(s32) = G_AND %2, %1
188+
$w0 = COPY %3
189+
...
190+
191+
---
192+
name: test_non_pow2_memtype
193+
tracksRegLiveness: true
194+
body: |
195+
bb.0:
196+
liveins: $x0
197+
; CHECK-LABEL: name: test_non_pow2_memtype
198+
; CHECK: liveins: $x0
199+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
200+
; CHECK: [[C:%[0-9]+]]:_(s24) = G_CONSTANT i24 7
201+
; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[COPY]](p0) :: (load (s24), align 4)
202+
; CHECK: [[AND:%[0-9]+]]:_(s24) = G_AND [[LOAD]], [[C]]
203+
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s24)
204+
; CHECK: $w0 = COPY [[ANYEXT]](s32)
205+
%0:_(p0) = COPY $x0
206+
%1:_(s24) = G_CONSTANT i24 7
207+
%2:_(s24) = G_LOAD %0 :: (load (s24))
208+
%3:_(s24) = G_AND %2, %1
209+
%4:_(s32) = G_ANYEXT %3
210+
$w0 = COPY %4
211+
...
212+
213+
214+
---
215+
name: test_no_mask
216+
tracksRegLiveness: true
217+
body: |
218+
bb.0:
219+
liveins: $x0
220+
; CHECK-LABEL: name: test_no_mask
221+
; CHECK: liveins: $x0
222+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
223+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 510
224+
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
225+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
226+
; CHECK: $w0 = COPY [[AND]](s32)
227+
%0:_(p0) = COPY $x0
228+
%1:_(s32) = G_CONSTANT i32 510
229+
%2:_(s32) = G_LOAD %0 :: (load (s8))
230+
%3:_(s32) = G_AND %2, %1
231+
$w0 = COPY %3
232+
...
233+
234+
---
235+
name: test_volatile
236+
tracksRegLiveness: true
237+
body: |
238+
bb.0:
239+
liveins: $x0
240+
; CHECK-LABEL: name: test_volatile
241+
; CHECK: liveins: $x0
242+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
243+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
244+
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s8))
245+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
246+
; CHECK: $w0 = COPY [[AND]](s32)
247+
%0:_(p0) = COPY $x0
248+
%1:_(s32) = G_CONSTANT i32 255
249+
%2:_(s32) = G_LOAD %0 :: (volatile load (s8))
250+
%3:_(s32) = G_AND %2, %1
251+
$w0 = COPY %3
252+
...

0 commit comments

Comments
 (0)