Skip to content

Commit d53b4be

Browse files
author
Sjoerd Meijer
committed
[LoopFlatten] Add a loop-flattening pass
This is a simple pass that flattens nested loops. The intention is to optimise loop nests like this, which together access an array linearly: for (int i = 0; i < N; ++i) for (int j = 0; j < M; ++j) f(A[i*M+j]); into one loop: for (int i = 0; i < (N*M); ++i) f(A[i]); It can also flatten loops where the induction variables are not used in the loop. This can help with codesize and runtime, especially on simple cpus without advanced branch prediction. This is only worth flattening if the induction variables are only used in an expression like i*M+j. If they had any other uses, we would have to insert a div/mod to reconstruct the original values, so this wouldn't be profitable. This partially fixes PR40581 as this pass triggers on one of the two cases. I will follow up on this to learn LoopFlatten a few more (small) tricks. Please note that LoopFlatten is not yet enabled by default. Patch by Oliver Stannard, with minor tweaks from Dave Green and myself. Differential Revision: https://reviews.llvm.org/D42365
1 parent 7e02bc8 commit d53b4be

File tree

13 files changed

+1764
-0
lines changed

13 files changed

+1764
-0
lines changed

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
242242
void initializeLoopInfoWrapperPassPass(PassRegistry&);
243243
void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&);
244244
void initializeLoopInterchangePass(PassRegistry&);
245+
void initializeLoopFlattenLegacyPassPass(PassRegistry&);
245246
void initializeLoopLoadEliminationPass(PassRegistry&);
246247
void initializeLoopPassPass(PassRegistry&);
247248
void initializeLoopPredicationLegacyPassPass(PassRegistry&);

llvm/include/llvm/LinkAllPasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ namespace {
127127
(void) llvm::createLazyValueInfoPass();
128128
(void) llvm::createLoopExtractorPass();
129129
(void) llvm::createLoopInterchangePass();
130+
(void) llvm::createLoopFlattenPass();
130131
(void) llvm::createLoopPredicationPass();
131132
(void) llvm::createLoopSimplifyPass();
132133
(void) llvm::createLoopSimplifyCFGPass();

llvm/include/llvm/Transforms/Scalar.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,12 @@ Pass *createLoopPredicationPass();
149149
//
150150
Pass *createLoopInterchangePass();
151151

152+
//===----------------------------------------------------------------------===//
153+
//
154+
// LoopFlatten - This pass flattens nested loops into a single loop.
155+
//
156+
Pass *createLoopFlattenPass();
157+
152158
//===----------------------------------------------------------------------===//
153159
//
154160
// LoopStrengthReduce - This pass is strength reduces GEP instructions that use
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//===- LoopFlatten.h - Loop Flatten ---------------- -----------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file provides the interface for the Loop Flatten Pass.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H
14+
#define LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H
15+
16+
#include "llvm/Analysis/LoopAnalysisManager.h"
17+
#include "llvm/Analysis/LoopInfo.h"
18+
#include "llvm/IR/PassManager.h"
19+
#include "llvm/Transforms/Scalar/LoopPassManager.h"
20+
21+
namespace llvm {
22+
23+
class LoopFlattenPass : public PassInfoMixin<LoopFlattenPass> {
24+
public:
25+
LoopFlattenPass() = default;
26+
27+
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
28+
LoopStandardAnalysisResults &AR, LPMUpdater &U);
29+
};
30+
31+
} // end namespace llvm
32+
33+
#endif // LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@
150150
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
151151
#include "llvm/Transforms/Scalar/LoopDeletion.h"
152152
#include "llvm/Transforms/Scalar/LoopDistribute.h"
153+
#include "llvm/Transforms/Scalar/LoopFlatten.h"
153154
#include "llvm/Transforms/Scalar/LoopFuse.h"
154155
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
155156
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
@@ -250,6 +251,10 @@ static cl::opt<bool> EnableUnrollAndJam(
250251
"enable-npm-unroll-and-jam", cl::init(false), cl::Hidden,
251252
cl::desc("Enable the Unroll and Jam pass for the new PM (default = off)"));
252253

254+
static cl::opt<bool> EnableLoopFlatten(
255+
"enable-npm-loop-flatten", cl::init(false), cl::Hidden,
256+
cl::desc("Enable the Loop flattening pass for the new PM (default = off)"));
257+
253258
static cl::opt<bool> EnableSyntheticCounts(
254259
"enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
255260
cl::desc("Run synthetic function entry count generation "
@@ -510,6 +515,8 @@ FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline(
510515
C(LPM2, Level);
511516

512517
LPM2.addPass(LoopDeletionPass());
518+
if (EnableLoopFlatten)
519+
LPM2.addPass(LoopFlattenPass());
513520
// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
514521
// because it changes IR to makes profile annotation in back compile
515522
// inaccurate. The normal unroller doesn't pay attention to forced full unroll

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@ LOOP_PASS("loop-rotate", LoopRotatePass())
359359
LOOP_PASS("no-op-loop", NoOpLoopPass())
360360
LOOP_PASS("print", PrintLoopPass(dbgs()))
361361
LOOP_PASS("loop-deletion", LoopDeletionPass())
362+
LOOP_PASS("loop-flatten", LoopFlattenPass())
362363
LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass())
363364
LOOP_PASS("loop-reduce", LoopStrengthReducePass())
364365
LOOP_PASS("indvars", IndVarSimplifyPass())

llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
9292
cl::init(false), cl::Hidden,
9393
cl::desc("Enable Unroll And Jam Pass"));
9494

95+
static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
96+
cl::Hidden,
97+
cl::desc("Enable the LoopFlatten Pass"));
98+
9599
static cl::opt<bool>
96100
EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
97101
cl::desc("Enable preparation for ThinLTO."));
@@ -444,6 +448,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
444448

445449
if (EnableLoopInterchange)
446450
MPM.add(createLoopInterchangePass()); // Interchange loops
451+
if (EnableLoopFlatten) {
452+
MPM.add(createLoopFlattenPass()); // Flatten loops
453+
MPM.add(createLoopSimplifyCFGPass());
454+
}
447455

448456
// Unroll small loops
449457
MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
@@ -1035,6 +1043,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
10351043
PM.add(createLoopDeletionPass());
10361044
if (EnableLoopInterchange)
10371045
PM.add(createLoopInterchangePass());
1046+
if (EnableLoopFlatten)
1047+
PM.add(createLoopFlattenPass());
10381048

10391049
// Unroll small loops
10401050
PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,

llvm/lib/Transforms/Scalar/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ add_llvm_component_library(LLVMScalarOpts
3232
LoopIdiomRecognize.cpp
3333
LoopInstSimplify.cpp
3434
LoopInterchange.cpp
35+
LoopFlatten.cpp
3536
LoopLoadElimination.cpp
3637
LoopPassManager.cpp
3738
LoopPredication.cpp

0 commit comments

Comments
 (0)