Skip to content

Commit a959ad1

Browse files
authored
[AArch64][GISel] Assign G_LOAD defs into FPR if they feed FP instructions indirectly via PHI (#94618)
If G_LOAD def is used by a PHI, recursively check if the def of PHI is used by any instruction that only uses FP oprand. If so, assign the def of G_LOAD to FPR. In other words, this change helps RBS to assign G_LOAD defs into FPR if they are later on used in an FP instruction **indirectly via PHI**, to avoid unnecessary copies. Before this patch, we only considered direct usages. It helps to fix GISel regression in TSVC kernel s3110. GISel was 50% slower than SDAG before this change. Now GISel and SDAG have the same runtime.
1 parent 0079835 commit a959ad1

File tree

4 files changed

+253
-15
lines changed

4 files changed

+253
-15
lines changed

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,20 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
496496
}
497497
}
498498

499+
bool AArch64RegisterBankInfo::isPHIWithFPContraints(
500+
const MachineInstr &MI, const MachineRegisterInfo &MRI,
501+
const TargetRegisterInfo &TRI, const unsigned Depth) const {
502+
if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
503+
return false;
504+
505+
return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
506+
[&](const MachineInstr &UseMI) {
507+
if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1))
508+
return true;
509+
return isPHIWithFPContraints(UseMI, MRI, TRI, Depth + 1);
510+
});
511+
}
512+
499513
bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
500514
const MachineRegisterInfo &MRI,
501515
const TargetRegisterInfo &TRI,
@@ -851,13 +865,18 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
851865
// instead of blind map every scalar to GPR.
852866
if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
853867
[&](const MachineInstr &UseMI) {
854-
// If we have at least one direct use in a FP instruction,
868+
// If we have at least one direct or indirect use
869+
// in a FP instruction,
855870
// assume this was a floating point load in the IR. If it was
856871
// not, we would have had a bitcast before reaching that
857872
// instruction.
858873
//
859874
// Int->FP conversion operations are also captured in
860875
// onlyDefinesFP().
876+
877+
if (isPHIWithFPContraints(UseMI, MRI, TRI))
878+
return true;
879+
861880
return onlyUsesFP(UseMI, MRI, TRI) ||
862881
onlyDefinesFP(UseMI, MRI, TRI);
863882
}))

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,13 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
120120
/// Maximum recursion depth for hasFPConstraints.
121121
const unsigned MaxFPRSearchDepth = 2;
122122

123+
/// \returns true if \p MI is a PHI that its def is used by
124+
/// any instruction that onlyUsesFP.
125+
bool isPHIWithFPContraints(const MachineInstr &MI,
126+
const MachineRegisterInfo &MRI,
127+
const TargetRegisterInfo &TRI,
128+
unsigned Depth = 0) const;
129+
123130
/// \returns true if \p MI only uses and defines FPRs.
124131
bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
125132
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;

llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ body: |
150150
151151
...
152152
---
153-
name: load_used_by_phi_gpr
153+
name: load_used_by_phi_gpr_copy_gpr
154154
legalized: true
155155
tracksRegLiveness: true
156156
body: |
@@ -173,6 +173,50 @@ body: |
173173
; CHECK-NEXT: {{ $}}
174174
; CHECK-NEXT: bb.2:
175175
; CHECK-NEXT: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
176+
; CHECK-NEXT: $w2 = COPY %phi(s32)
177+
; CHECK-NEXT: RET_ReallyLR implicit $w2
178+
bb.0:
179+
successors: %bb.1(0x40000000), %bb.2(0x40000000)
180+
liveins: $x0, $s0, $s1, $w0, $w1
181+
%cond_wide:_(s32) = COPY $w0
182+
%gpr_copy:_(s32) = COPY $w1
183+
%ptr:_(p0) = COPY $x0
184+
G_BRCOND %cond_wide, %bb.1
185+
G_BR %bb.2
186+
bb.1:
187+
successors: %bb.2
188+
%load:_(s32) = G_LOAD %ptr(p0) :: (load (s32))
189+
G_BR %bb.2
190+
bb.2:
191+
%phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
192+
$w2 = COPY %phi(s32)
193+
RET_ReallyLR implicit $w2
194+
195+
...
196+
---
197+
name: load_used_by_phi_gpr_copy_fpr
198+
legalized: true
199+
tracksRegLiveness: true
200+
body: |
201+
; CHECK-LABEL: name: load_used_by_phi_gpr
202+
; CHECK: bb.0:
203+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
204+
; CHECK-NEXT: liveins: $x0, $s0, $s1, $w0, $w1
205+
; CHECK-NEXT: {{ $}}
206+
; CHECK-NEXT: %cond_wide:gpr(s32) = COPY $w0
207+
; CHECK-NEXT: %gpr_copy:gpr(s32) = COPY $w1
208+
; CHECK-NEXT: %ptr:gpr(p0) = COPY $x0
209+
; CHECK-NEXT: G_BRCOND %cond_wide(s32), %bb.1
210+
; CHECK-NEXT: G_BR %bb.2
211+
; CHECK-NEXT: {{ $}}
212+
; CHECK-NEXT: bb.1:
213+
; CHECK-NEXT: successors: %bb.2(0x80000000)
214+
; CHECK-NEXT: {{ $}}
215+
; CHECK-NEXT: %load:fpr(s32) = G_LOAD %ptr(p0) :: (load (s32))
216+
; CHECK-NEXT: G_BR %bb.2
217+
; CHECK-NEXT: {{ $}}
218+
; CHECK-NEXT: bb.2:
219+
; CHECK-NEXT: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
176220
; CHECK-NEXT: $s0 = COPY %phi(s32)
177221
; CHECK-NEXT: RET_ReallyLR implicit $s0
178222
bb.0:
@@ -189,7 +233,7 @@ body: |
189233
G_BR %bb.2
190234
bb.2:
191235
%phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
192-
$s0 = COPY %phi(s32)
236+
$s0 = COPY %phi(s32) ; G_LOAD should consider this FPR constraint and assign %load FPR
193237
RET_ReallyLR implicit $s0
194238
195239
...

0 commit comments

Comments
 (0)