Skip to content

Commit d3cbda9

Browse files
committed
[GlobalISel] Look between instructions to be matched
When a pattern is matched in TableGen, a check is run called isObviouslySafeToFold(). One of the condition that it checks for is whether the instructions that are being matched are consecutive, so the instruction's insertion point does not change. This patch allows the movement of the insertion point of a load instruction if none of the intervening instructions are stores or have side-effects.
1 parent 6f62757 commit d3cbda9

File tree

4 files changed

+218
-160
lines changed

4 files changed

+218
-160
lines changed

llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,42 @@ bool GIMatchTableExecutor::isBaseWithConstantOffset(
6161

6262
bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
6363
MachineInstr &IntoMI) const {
64+
auto IntoMIIter = IntoMI.getIterator();
65+
6466
// Immediate neighbours are already folded.
6567
if (MI.getParent() == IntoMI.getParent() &&
66-
std::next(MI.getIterator()) == IntoMI.getIterator())
68+
std::next(MI.getIterator()) == IntoMIIter)
6769
return true;
6870

6971
// Convergent instructions cannot be moved in the CFG.
7072
if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
7173
return false;
7274

73-
return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
74-
!MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
75+
if (MI.isLoadFoldBarrier())
76+
return false;
77+
78+
// If the load is simple, check instructions between MI and IntoMI
79+
if (MI.mayLoad() && MI.getParent() == IntoMI.getParent()) {
80+
if (MI.memoperands_empty())
81+
return false;
82+
auto &MMO = **(MI.memoperands_begin());
83+
if (MMO.isAtomic() || MMO.isVolatile())
84+
return false;
85+
86+
// Ensure instructions between MI and IntoMI are not affected when combined
87+
unsigned Iter = 0;
88+
const unsigned MaxIter = 20;
89+
for (auto &CurrMI :
90+
instructionsWithoutDebug(MI.getIterator(), IntoMI.getIterator())) {
91+
if (CurrMI.isLoadFoldBarrier())
92+
return false;
93+
94+
if (Iter++ == MaxIter)
95+
return false;
96+
}
97+
98+
return true;
99+
}
100+
101+
return true;
75102
}

llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir

Lines changed: 126 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,10 @@
4141
define void @anyext_on_fpr() { ret void }
4242
define void @anyext_on_fpr8() { ret void }
4343

44-
...
44+
define void @load_s32_gpr_LD1() { ret void }
45+
define void @load_s32_gpr_GIM() { ret void }
4546

47+
...
4648
---
4749
name: load_s64_gpr
4850
legalized: true
@@ -57,7 +59,9 @@ body: |
5759
liveins: $x0
5860
5961
; CHECK-LABEL: name: load_s64_gpr
60-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
62+
; CHECK: liveins: $x0
63+
; CHECK-NEXT: {{ $}}
64+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
6165
; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 0 :: (load (s64) from %ir.addr)
6266
; CHECK-NEXT: $x0 = COPY [[LDRXui]]
6367
%0(p0) = COPY $x0
@@ -79,7 +83,9 @@ body: |
7983
liveins: $x0
8084
8185
; CHECK-LABEL: name: load_s32_gpr
82-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
86+
; CHECK: liveins: $x0
87+
; CHECK-NEXT: {{ $}}
88+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
8389
; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from %ir.addr)
8490
; CHECK-NEXT: $w0 = COPY [[LDRWui]]
8591
%0(p0) = COPY $x0
@@ -97,7 +103,9 @@ body: |
97103
liveins: $x0
98104
99105
; CHECK-LABEL: name: load_s16_gpr_anyext
100-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
106+
; CHECK: liveins: $x0
107+
; CHECK-NEXT: {{ $}}
108+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
101109
; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr)
102110
; CHECK-NEXT: $w0 = COPY [[LDRHHui]]
103111
%0:gpr(p0) = COPY $x0
@@ -119,7 +127,9 @@ body: |
119127
liveins: $x0
120128
121129
; CHECK-LABEL: name: load_s16_gpr
122-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
130+
; CHECK: liveins: $x0
131+
; CHECK-NEXT: {{ $}}
132+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
123133
; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr)
124134
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
125135
; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -139,7 +149,9 @@ body: |
139149
liveins: $x0
140150
141151
; CHECK-LABEL: name: load_s8_gpr_anyext
142-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
152+
; CHECK: liveins: $x0
153+
; CHECK-NEXT: {{ $}}
154+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
143155
; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.addr)
144156
; CHECK-NEXT: $w0 = COPY [[LDRBBui]]
145157
%0:gpr(p0) = COPY $x0
@@ -161,7 +173,9 @@ body: |
161173
liveins: $x0
162174
163175
; CHECK-LABEL: name: load_s8_gpr
164-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
176+
; CHECK: liveins: $x0
177+
; CHECK-NEXT: {{ $}}
178+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
165179
; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.addr)
166180
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
167181
; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -188,7 +202,9 @@ body: |
188202
liveins: $x0
189203
190204
; CHECK-LABEL: name: load_fi_s64_gpr
191-
; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load (s64))
205+
; CHECK: liveins: $x0
206+
; CHECK-NEXT: {{ $}}
207+
; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load (s64))
192208
; CHECK-NEXT: $x0 = COPY [[LDRXui]]
193209
%0(p0) = G_FRAME_INDEX %stack.0.ptr0
194210
%1(s64) = G_LOAD %0 :: (load (s64))
@@ -211,7 +227,9 @@ body: |
211227
liveins: $x0
212228
213229
; CHECK-LABEL: name: load_gep_128_s64_gpr
214-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
230+
; CHECK: liveins: $x0
231+
; CHECK-NEXT: {{ $}}
232+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
215233
; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 16 :: (load (s64) from %ir.addr)
216234
; CHECK-NEXT: $x0 = COPY [[LDRXui]]
217235
%0(p0) = COPY $x0
@@ -237,7 +255,9 @@ body: |
237255
liveins: $x0
238256
239257
; CHECK-LABEL: name: load_gep_512_s32_gpr
240-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
258+
; CHECK: liveins: $x0
259+
; CHECK-NEXT: {{ $}}
260+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
241261
; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 128 :: (load (s32) from %ir.addr)
242262
; CHECK-NEXT: $w0 = COPY [[LDRWui]]
243263
%0(p0) = COPY $x0
@@ -263,7 +283,9 @@ body: |
263283
liveins: $x0
264284
265285
; CHECK-LABEL: name: load_gep_64_s16_gpr
266-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
286+
; CHECK: liveins: $x0
287+
; CHECK-NEXT: {{ $}}
288+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
267289
; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 32 :: (load (s16) from %ir.addr)
268290
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
269291
; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -291,7 +313,9 @@ body: |
291313
liveins: $x0
292314
293315
; CHECK-LABEL: name: load_gep_1_s8_gpr
294-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
316+
; CHECK: liveins: $x0
317+
; CHECK-NEXT: {{ $}}
318+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
295319
; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 1 :: (load (s8) from %ir.addr)
296320
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
297321
; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -317,7 +341,9 @@ body: |
317341
liveins: $x0
318342
319343
; CHECK-LABEL: name: load_s64_fpr
320-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
344+
; CHECK: liveins: $x0
345+
; CHECK-NEXT: {{ $}}
346+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
321347
; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (s64) from %ir.addr)
322348
; CHECK-NEXT: $d0 = COPY [[LDRDui]]
323349
%0(p0) = COPY $x0
@@ -339,7 +365,9 @@ body: |
339365
liveins: $x0
340366
341367
; CHECK-LABEL: name: load_s32_fpr
342-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
368+
; CHECK: liveins: $x0
369+
; CHECK-NEXT: {{ $}}
370+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
343371
; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 0 :: (load (s32) from %ir.addr)
344372
; CHECK-NEXT: $s0 = COPY [[LDRSui]]
345373
%0(p0) = COPY $x0
@@ -361,7 +389,9 @@ body: |
361389
liveins: $x0
362390
363391
; CHECK-LABEL: name: load_s16_fpr
364-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
392+
; CHECK: liveins: $x0
393+
; CHECK-NEXT: {{ $}}
394+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
365395
; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load (s16) from %ir.addr)
366396
; CHECK-NEXT: $h0 = COPY [[LDRHui]]
367397
%0(p0) = COPY $x0
@@ -383,7 +413,9 @@ body: |
383413
liveins: $x0
384414
385415
; CHECK-LABEL: name: load_s8_fpr
386-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
416+
; CHECK: liveins: $x0
417+
; CHECK-NEXT: {{ $}}
418+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
387419
; CHECK-NEXT: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8) from %ir.addr)
388420
; CHECK-NEXT: $b0 = COPY [[LDRBui]]
389421
%0(p0) = COPY $x0
@@ -407,7 +439,9 @@ body: |
407439
liveins: $x0
408440
409441
; CHECK-LABEL: name: load_gep_8_s64_fpr
410-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
442+
; CHECK: liveins: $x0
443+
; CHECK-NEXT: {{ $}}
444+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
411445
; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 1 :: (load (s64) from %ir.addr)
412446
; CHECK-NEXT: $d0 = COPY [[LDRDui]]
413447
%0(p0) = COPY $x0
@@ -433,7 +467,9 @@ body: |
433467
liveins: $x0
434468
435469
; CHECK-LABEL: name: load_gep_16_s32_fpr
436-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
470+
; CHECK: liveins: $x0
471+
; CHECK-NEXT: {{ $}}
472+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
437473
; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 4 :: (load (s32) from %ir.addr)
438474
; CHECK-NEXT: $s0 = COPY [[LDRSui]]
439475
%0(p0) = COPY $x0
@@ -459,7 +495,9 @@ body: |
459495
liveins: $x0
460496
461497
; CHECK-LABEL: name: load_gep_64_s16_fpr
462-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
498+
; CHECK: liveins: $x0
499+
; CHECK-NEXT: {{ $}}
500+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
463501
; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 32 :: (load (s16) from %ir.addr)
464502
; CHECK-NEXT: $h0 = COPY [[LDRHui]]
465503
%0(p0) = COPY $x0
@@ -485,7 +523,9 @@ body: |
485523
liveins: $x0
486524
487525
; CHECK-LABEL: name: load_gep_32_s8_fpr
488-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
526+
; CHECK: liveins: $x0
527+
; CHECK-NEXT: {{ $}}
528+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
489529
; CHECK-NEXT: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 32 :: (load (s8) from %ir.addr)
490530
; CHECK-NEXT: $b0 = COPY [[LDRBui]]
491531
%0(p0) = COPY $x0
@@ -508,7 +548,9 @@ body: |
508548
liveins: $x0
509549
510550
; CHECK-LABEL: name: load_v2s32
511-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
551+
; CHECK: liveins: $x0
552+
; CHECK-NEXT: {{ $}}
553+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
512554
; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (<2 x s32>) from %ir.addr)
513555
; CHECK-NEXT: $d0 = COPY [[LDRDui]]
514556
%0(p0) = COPY $x0
@@ -529,7 +571,9 @@ body: |
529571
liveins: $x0
530572
531573
; CHECK-LABEL: name: load_v2s64
532-
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
574+
; CHECK: liveins: $x0
575+
; CHECK-NEXT: {{ $}}
576+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
533577
; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<2 x s64>) from %ir.addr)
534578
; CHECK-NEXT: $q0 = COPY [[LDRQui]]
535579
%0(p0) = COPY $x0
@@ -712,3 +756,63 @@ body: |
712756
RET_ReallyLR
713757
714758
...
759+
---
760+
name: load_s32_gpr_LD1
761+
legalized: true
762+
regBankSelected: true
763+
764+
body: |
765+
bb.0:
766+
liveins: $q0, $x0
767+
768+
; CHECK-LABEL: name: load_s32_gpr_LD1
769+
; CHECK: liveins: $q0, $x0
770+
; CHECK-NEXT: {{ $}}
771+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
772+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0
773+
; CHECK-NEXT: [[LD1i32_:%[0-9]+]]:fpr128 = LD1i32 [[COPY]], 0, [[COPY1]] :: (load (s32))
774+
; CHECK-NEXT: $q0 = COPY [[LD1i32_]]
775+
; CHECK-NEXT: RET_ReallyLR implicit $q0
776+
%0:fpr(<4 x s32>) = COPY $q0
777+
%1:gpr(p0) = COPY $x0
778+
%2:fpr(s32) = G_LOAD %1(p0) :: (load (s32))
779+
%3:gpr(s32) = G_CONSTANT i32 3
780+
%5:gpr(s64) = G_CONSTANT i64 0
781+
%4:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %2(s32), %5(s64)
782+
$q0 = COPY %4(<4 x s32>)
783+
RET_ReallyLR implicit $q0
784+
785+
...
786+
---
787+
788+
name: load_s32_gpr_GIM
789+
legalized: true
790+
regBankSelected: true
791+
792+
body: |
793+
bb.0:
794+
liveins: $q0, $x0
795+
;This test should not select an LD1 instruction as there is a store instruction between G_INSERT_VECTOR_ELT and G_LOAD
796+
; CHECK-LABEL: name: load_s32_gpr_GIM
797+
; CHECK: liveins: $q0, $x0
798+
; CHECK-NEXT: {{ $}}
799+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
800+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0
801+
; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY1]], 0 :: (load (s32))
802+
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 3
803+
; CHECK-NEXT: STRWui [[MOVi32imm]], [[COPY1]], 0 :: (store (s32))
804+
; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
805+
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[LDRSui]], %subreg.ssub
806+
; CHECK-NEXT: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[COPY]], 0, [[INSERT_SUBREG]], 0
807+
; CHECK-NEXT: $q0 = COPY [[INSvi32lane]]
808+
; CHECK-NEXT: RET_ReallyLR implicit $q0
809+
%0:fpr(<4 x s32>) = COPY $q0
810+
%1:gpr(p0) = COPY $x0
811+
%2:fpr(s32) = G_LOAD %1(p0) :: (load (s32))
812+
%3:gpr(s32) = G_CONSTANT i32 3
813+
G_STORE %3(s32), %1(p0) :: (store (s32))
814+
%5:gpr(s64) = G_CONSTANT i64 0
815+
%4:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %2(s32), %5(s64)
816+
$q0 = COPY %4(<4 x s32>)
817+
RET_ReallyLR implicit $q0
818+
...

0 commit comments

Comments
 (0)