Skip to content

Commit dfdccbb

Browse files
author
Chen Zheng
committed
[PowerPC] exclude ICmpZero in LSR if icmp can be replaced in later hardware loop.
Differential Revision: https://reviews.llvm.org/D63477 llvm-svn: 364993
1 parent ab758ba commit dfdccbb

File tree

13 files changed

+142
-77
lines changed

13 files changed

+142
-77
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "llvm/Analysis/LoopInfo.h"
3131
#include "llvm/Analysis/ScalarEvolution.h"
3232
#include "llvm/IR/Dominators.h"
33+
#include "llvm/Analysis/AssumptionCache.h"
3334
#include <functional>
3435

3536
namespace llvm {
@@ -528,6 +529,12 @@ class TargetTransformInfo {
528529
/// calculation for the instructions in a loop.
529530
bool canMacroFuseCmp() const;
530531

532+
/// Return true if the target can save a compare for loop count, for example
533+
/// hardware loop saves a compare.
534+
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
535+
DominatorTree *DT, AssumptionCache *AC,
536+
TargetLibraryInfo *LibInfo) const;
537+
531538
/// \return True is LSR should make efforts to create/preserve post-inc
532539
/// addressing mode expressions.
533540
bool shouldFavorPostInc() const;
@@ -1152,6 +1159,9 @@ class TargetTransformInfo::Concept {
11521159
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
11531160
TargetTransformInfo::LSRCost &C2) = 0;
11541161
virtual bool canMacroFuseCmp() = 0;
1162+
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1163+
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1164+
TargetLibraryInfo *LibInfo) = 0;
11551165
virtual bool shouldFavorPostInc() const = 0;
11561166
virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
11571167
virtual bool isLegalMaskedStore(Type *DataType) = 0;
@@ -1402,6 +1412,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
14021412
bool canMacroFuseCmp() override {
14031413
return Impl.canMacroFuseCmp();
14041414
}
1415+
bool canSaveCmp(Loop *L, BranchInst **BI,
1416+
ScalarEvolution *SE,
1417+
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1418+
TargetLibraryInfo *LibInfo) override {
1419+
return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1420+
}
14051421
bool shouldFavorPostInc() const override {
14061422
return Impl.shouldFavorPostInc();
14071423
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,12 @@ class TargetTransformInfoImplBase {
221221

222222
bool canMacroFuseCmp() { return false; }
223223

224+
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
225+
DominatorTree *DT, AssumptionCache *AC,
226+
TargetLibraryInfo *LibInfo) {
227+
return false;
228+
}
229+
224230
bool shouldFavorPostInc() const { return false; }
225231

226232
bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,13 @@ bool TargetTransformInfo::canMacroFuseCmp() const {
264264
return TTIImpl->canMacroFuseCmp();
265265
}
266266

267+
bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI,
268+
ScalarEvolution *SE, LoopInfo *LI,
269+
DominatorTree *DT, AssumptionCache *AC,
270+
TargetLibraryInfo *LibInfo) const {
271+
return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
272+
}
273+
267274
bool TargetTransformInfo::shouldFavorPostInc() const {
268275
return TTIImpl->shouldFavorPostInc();
269276
}

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -875,3 +875,25 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
875875
return Cost;
876876
}
877877

878+
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
879+
LoopInfo *LI, DominatorTree *DT,
880+
AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
881+
// Process nested loops first.
882+
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
883+
if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))
884+
return false; // Stop search.
885+
886+
HardwareLoopInfo HWLoopInfo(L);
887+
888+
if (!HWLoopInfo.canAnalyze(*LI))
889+
return false;
890+
891+
if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
892+
return false;
893+
894+
if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
895+
return false;
896+
897+
*BI = HWLoopInfo.ExitBranch;
898+
return true;
899+
}

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
5757
AssumptionCache &AC,
5858
TargetLibraryInfo *LibInfo,
5959
HardwareLoopInfo &HWLoopInfo);
60+
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
61+
DominatorTree *DT, AssumptionCache *AC,
62+
TargetLibraryInfo *LibInfo);
6063
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
6164
TTI::UnrollingPreferences &UP);
6265

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1909,6 +1909,8 @@ class LSRInstance {
19091909
ScalarEvolution &SE;
19101910
DominatorTree &DT;
19111911
LoopInfo &LI;
1912+
AssumptionCache &AC;
1913+
TargetLibraryInfo &LibInfo;
19121914
const TargetTransformInfo &TTI;
19131915
Loop *const L;
19141916
bool FavorBackedgeIndex = false;
@@ -2047,7 +2049,8 @@ class LSRInstance {
20472049

20482050
public:
20492051
LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2050-
LoopInfo &LI, const TargetTransformInfo &TTI);
2052+
LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
2053+
TargetLibraryInfo &LibInfo);
20512054

20522055
bool getChanged() const { return Changed; }
20532056

@@ -3232,6 +3235,9 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
32323235
}
32333236

32343237
void LSRInstance::CollectFixupsAndInitialFormulae() {
3238+
BranchInst *ExitBranch = nullptr;
3239+
bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &LibInfo);
3240+
32353241
for (const IVStrideUse &U : IU) {
32363242
Instruction *UserInst = U.getUser();
32373243
// Skip IV users that are part of profitable IV Chains.
@@ -3261,6 +3267,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
32613267
// equality icmps, thanks to IndVarSimplify.
32623268
if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
32633269
if (CI->isEquality()) {
3270+
// If CI can be saved in some target, like replaced inside hardware loop
3271+
// in PowerPC, no need to generate initial formulae for it.
3272+
if (SaveCmp && CI == cast<ICmpInst>(ExitBranch->getCondition()))
3273+
continue;
32643274
// Swap the operands if needed to put the OperandValToReplace on the
32653275
// left, for consistency.
32663276
Value *NV = CI->getOperand(1);
@@ -5479,8 +5489,9 @@ void LSRInstance::ImplementSolution(
54795489

54805490
LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
54815491
DominatorTree &DT, LoopInfo &LI,
5482-
const TargetTransformInfo &TTI)
5483-
: IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L),
5492+
const TargetTransformInfo &TTI, AssumptionCache &AC,
5493+
TargetLibraryInfo &LibInfo)
5494+
: IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), LibInfo(LibInfo), TTI(TTI), L(L),
54845495
FavorBackedgeIndex(EnableBackedgeIndexing &&
54855496
TTI.shouldFavorBackedgeIndex(L)) {
54865497
// If LoopSimplify form is not available, stay out of trouble.
@@ -5677,6 +5688,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
56775688
AU.addPreserved<DominatorTreeWrapperPass>();
56785689
AU.addRequired<ScalarEvolutionWrapperPass>();
56795690
AU.addPreserved<ScalarEvolutionWrapperPass>();
5691+
AU.addRequired<AssumptionCacheTracker>();
5692+
AU.addRequired<TargetLibraryInfoWrapperPass>();
56805693
// Requiring LoopSimplify a second time here prevents IVUsers from running
56815694
// twice, since LoopSimplify was invalidated by running ScalarEvolution.
56825695
AU.addRequiredID(LoopSimplifyID);
@@ -5687,11 +5700,14 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
56875700

56885701
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
56895702
DominatorTree &DT, LoopInfo &LI,
5690-
const TargetTransformInfo &TTI) {
5703+
const TargetTransformInfo &TTI,
5704+
AssumptionCache &AC,
5705+
TargetLibraryInfo &LibInfo) {
5706+
56915707
bool Changed = false;
56925708

56935709
// Run the main LSR transformation.
5694-
Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
5710+
Changed |= LSRInstance(L, IU, SE, DT, LI, TTI, AC, LibInfo).getChanged();
56955711

56965712
// Remove any extra phis created by processing inner loops.
56975713
Changed |= DeleteDeadPHIs(L->getHeader());
@@ -5722,14 +5738,17 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
57225738
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
57235739
const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
57245740
*L->getHeader()->getParent());
5725-
return ReduceLoopStrength(L, IU, SE, DT, LI, TTI);
5741+
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
5742+
*L->getHeader()->getParent());
5743+
auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
5744+
return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, LibInfo);
57265745
}
57275746

57285747
PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
57295748
LoopStandardAnalysisResults &AR,
57305749
LPMUpdater &) {
57315750
if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
5732-
AR.DT, AR.LI, AR.TTI))
5751+
AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI))
57335752
return PreservedAnalyses::all();
57345753

57355754
return getLoopPassPreservedAnalyses();

llvm/test/CodeGen/PowerPC/addi-licm.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ entry:
1818
; CHECK: addi [[REG1:[0-9]+]], 1,
1919
; CHECK: addi [[REG2:[0-9]+]], 1,
2020
; CHECK: %for.body.i
21-
; CHECK-DAG: lfsx {{[0-9]+}}, [[REG1]],
22-
; CHECK-DAG: lfsx {{[0-9]+}}, [[REG2]],
21+
; CHECK-DAG: lfs {{[0-9]+}}, 0([[REG1]])
22+
; CHECK-DAG: lfs {{[0-9]+}}, 0([[REG2]])
2323
; CHECK: blr
2424

2525
; PIP-LABEL: @foo

llvm/test/CodeGen/PowerPC/ctrloop-ne.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ for.end: ; preds = %for.body, %entry
3232

3333

3434
; CHECK: test_pos2_ir_ne
35-
; FIXME: Support this loop!
36-
; CHECK-NOT: bdnz
35+
; CHECK: bdnz
3736
; a < b
3837
define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
3938
entry:
@@ -62,8 +61,7 @@ for.end: ; preds = %for.body, %entry
6261

6362

6463
; CHECK: test_pos4_ir_ne
65-
; FIXME: Support this loop!
66-
; CHECK-NOT: bdnz
64+
; CHECK: bdnz
6765
; a < b
6866
define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
6967
entry:
@@ -92,8 +90,7 @@ for.end: ; preds = %for.body, %entry
9290

9391

9492
; CHECK: test_pos8_ir_ne
95-
; FIXME: Support this loop!
96-
; CHECK-NOT: bdnz
93+
; CHECK: bdnz
9794
; a < b
9895
define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
9996
entry:
@@ -122,8 +119,7 @@ for.end: ; preds = %for.body, %entry
122119

123120

124121
; CHECK: test_pos16_ir_ne
125-
; FIXME: Support this loop!
126-
; CHECK-NOT: bdnz
122+
; CHECK: bdnz
127123
; a < b
128124
define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
129125
entry:
@@ -181,8 +177,7 @@ for.end: ; preds = %for.body, %entry
181177

182178

183179
; CHECK: test_pos2_ri_ne
184-
; FIXME: Support this loop!
185-
; CHECK-NOT: bdnz
180+
; CHECK: bdnz
186181
; a < b
187182
define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
188183
entry:
@@ -211,8 +206,7 @@ for.end: ; preds = %for.body, %entry
211206

212207

213208
; CHECK: test_pos4_ri_ne
214-
; FIXME: Support this loop!
215-
; CHECK-NOT: bdnz
209+
; CHECK: bdnz
216210
; a < b
217211
define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
218212
entry:
@@ -241,8 +235,7 @@ for.end: ; preds = %for.body, %entry
241235

242236

243237
; CHECK: test_pos8_ri_ne
244-
; FIXME: Support this loop!
245-
; CHECK-NOT: bdnz
238+
; CHECK: bdnz
246239
; a < b
247240
define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
248241
entry:
@@ -271,8 +264,7 @@ for.end: ; preds = %for.body, %entry
271264

272265

273266
; CHECK: test_pos16_ri_ne
274-
; FIXME: Support this loop!
275-
; CHECK-NOT: bdnz
267+
; CHECK: bdnz
276268
; a < b
277269
define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
278270
entry:
@@ -330,8 +322,7 @@ for.end: ; preds = %for.body, %entry
330322

331323

332324
; CHECK: test_pos2_rr_ne
333-
; FIXME: Support this loop!
334-
; CHECK-NOT: bdnz
325+
; CHECK: bdnz
335326
; a < b
336327
define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
337328
entry:
@@ -360,8 +351,7 @@ for.end: ; preds = %for.body, %entry
360351

361352

362353
; CHECK: test_pos4_rr_ne
363-
; FIXME: Support this loop!
364-
; CHECK-NOT: bdnz
354+
; CHECK: bdnz
365355
; a < b
366356
define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
367357
entry:
@@ -390,8 +380,7 @@ for.end: ; preds = %for.body, %entry
390380

391381

392382
; CHECK: test_pos8_rr_ne
393-
; FIXME: Support this loop!
394-
; CHECK-NOT: bdnz
383+
; CHECK: bdnz
395384
; a < b
396385
define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
397386
entry:
@@ -420,8 +409,7 @@ for.end: ; preds = %for.body, %entry
420409

421410

422411
; CHECK: test_pos16_rr_ne
423-
; FIXME: Support this loop!
424-
; CHECK-NOT: bdnz
412+
; CHECK: bdnz
425413
; a < b
426414
define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
427415
entry:

llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,12 @@ for.body: ; preds = %entry, %for.body
8686
}
8787

8888
; Function Attrs: norecurse nounwind
89+
; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8.
90+
; a2q should use mtctr, but pwr8 should not use mtctr.
8991
define signext i32 @testTripCount2NonSmallLoop() {
9092
; CHECK-LABEL: testTripCount2NonSmallLoop:
91-
; CHECK: blt
92-
; CHECK: beq
93+
; CHECK-A2Q: mtctr
94+
; CHECK-PWR8-NOT: mtctr
9395
; CHECK: blr
9496

9597
entry:

0 commit comments

Comments
 (0)