Skip to content

Commit e8425b2

Browse files
author
Chen Zheng
committed
[PowerPC] add store (load float*) pattern to isProfitableToHoist
store (load float*) can be optimized to store(load i32*) in InstCombine pass. Add store (load float*) to isProfitableToHoist to make sure we don't break the opt in InstCombine pass. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D82341
1 parent cf55866 commit e8425b2

File tree

2 files changed

+46
-20
lines changed

2 files changed

+46
-20
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16381,31 +16381,56 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
1638116381
}
1638216382
}
1638316383

16384-
// Currently this is a copy from AArch64TargetLowering::isProfitableToHoist.
16385-
// FIXME: add more patterns which are profitable to hoist.
16384+
// FIXME: add more patterns which are not profitable to hoist.
1638616385
bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
16387-
if (I->getOpcode() != Instruction::FMul)
16388-
return true;
16389-
1639016386
if (!I->hasOneUse())
1639116387
return true;
1639216388

1639316389
Instruction *User = I->user_back();
1639416390
assert(User && "A single use instruction with no uses.");
1639516391

16396-
if (User->getOpcode() != Instruction::FSub &&
16397-
User->getOpcode() != Instruction::FAdd)
16398-
return true;
16392+
switch (I->getOpcode()) {
16393+
case Instruction::FMul: {
16394+
// Don't break FMA, PowerPC prefers FMA.
16395+
if (User->getOpcode() != Instruction::FSub &&
16396+
User->getOpcode() != Instruction::FAdd)
16397+
return true;
1639916398

16400-
const TargetOptions &Options = getTargetMachine().Options;
16401-
const Function *F = I->getFunction();
16402-
const DataLayout &DL = F->getParent()->getDataLayout();
16403-
Type *Ty = User->getOperand(0)->getType();
16404-
16405-
return !(
16406-
isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16407-
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
16408-
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16399+
const TargetOptions &Options = getTargetMachine().Options;
16400+
const Function *F = I->getFunction();
16401+
const DataLayout &DL = F->getParent()->getDataLayout();
16402+
Type *Ty = User->getOperand(0)->getType();
16403+
16404+
return !(
16405+
isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16406+
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
16407+
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16408+
}
16409+
case Instruction::Load: {
16410+
// Don't break "store (load float*)" pattern, this pattern will be combined
16411+
// to "store (load int32)" in later InstCombine pass. See function
16412+
// combineLoadToOperationType. On PowerPC, loading a float point takes more
16413+
// cycles than loading a 32 bit integer.
16414+
LoadInst *LI = cast<LoadInst>(I);
16415+
// For the loads that combineLoadToOperationType does nothing, like
16416+
// ordered load, it should be profitable to hoist them.
16417+
// For swifterror load, it can only be used for pointer to pointer type, so
16418+
// later type check should get rid of this case.
16419+
if (!LI->isUnordered())
16420+
return true;
16421+
16422+
if (User->getOpcode() != Instruction::Store)
16423+
return true;
16424+
16425+
if (I->getType()->getTypeID() != Type::FloatTyID)
16426+
return true;
16427+
16428+
return false;
16429+
}
16430+
default:
16431+
return true;
16432+
}
16433+
return true;
1640916434
}
1641016435

1641116436
const MCPhysReg *

llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,23 @@
33
define float @foo(float* %src, float* %dest, i32 signext %count, i32 signext %cond) {
44
; CHECK-LABEL: @foo(
55
; CHECK-LABEL: entry:
6-
; CHECK: %0 = load float, float* %arrayidx, align 4
6+
; CHECK-NOT: load float
77
entry:
88
%cmp = icmp sgt i32 %cond, 10
99
%idxprom = sext i32 %count to i64
1010
%arrayidx = getelementptr inbounds float, float* %src, i64 %idxprom
1111
br i1 %cmp, label %if.then, label %if.else
1212

1313
; CHECK-LABEL: if.then:
14-
; CHECK-NOT: load float
14+
; CHECK: %0 = load float, float* %arrayidx, align 4
1515
if.then: ; preds = %entry
1616
%0 = load float, float* %arrayidx, align 4
1717
%res = fmul float %0, 3.000000e+00
1818
br label %if.end
1919

2020
; CHECK-LABEL: if.else:
21-
; CHECK-NOT: load float
21+
; CHECK: %1 = load float, float* %arrayidx, align 4
22+
; CHECK: store float %1, float* %arrayidx4, align 4
2223
if.else: ; preds = %entry
2324
%1 = load float, float* %arrayidx, align 4
2425
%idxprom3 = sext i32 %count to i64

0 commit comments

Comments
 (0)