Skip to content

Commit a4d7dc9

Browse files
committed
merge main into amd-staging
2 parents 9c23605 + 536fe74 commit a4d7dc9

File tree

15 files changed

+254
-98
lines changed

15 files changed

+254
-98
lines changed

clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,26 +61,27 @@ struct RemoveRedundantBranches : public OpRewritePattern<BrOp> {
6161
}
6262
};
6363

64-
struct RemoveEmptyScope
65-
: public OpRewritePattern<ScopeOp>::SplitMatchAndRewrite {
66-
using SplitMatchAndRewrite::SplitMatchAndRewrite;
64+
struct RemoveEmptyScope : public OpRewritePattern<ScopeOp> {
65+
using OpRewritePattern<ScopeOp>::OpRewritePattern;
6766

68-
LogicalResult match(ScopeOp op) const final {
67+
LogicalResult matchAndRewrite(ScopeOp op,
68+
PatternRewriter &rewriter) const final {
6969
// TODO: Remove this logic once CIR uses MLIR infrastructure to remove
7070
// trivially dead operations
71-
if (op.isEmpty())
71+
if (op.isEmpty()) {
72+
rewriter.eraseOp(op);
7273
return success();
74+
}
7375

7476
Region &region = op.getScopeRegion();
75-
if (region.getBlocks().front().getOperations().size() == 1)
76-
return success(isa<YieldOp>(region.getBlocks().front().front()));
77+
if (region.getBlocks().front().getOperations().size() == 1 &&
78+
isa<YieldOp>(region.getBlocks().front().front())) {
79+
rewriter.eraseOp(op);
80+
return success();
81+
}
7782

7883
return failure();
7984
}
80-
81-
void rewrite(ScopeOp op, PatternRewriter &rewriter) const final {
82-
rewriter.eraseOp(op);
83-
}
8485
};
8586

8687
//===----------------------------------------------------------------------===//

clang/test/CodeGenCXX/pragma-loop.cpp

Lines changed: 51 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -203,60 +203,70 @@ void for_test_scalable_1(int *List, int Length) {
203203
}
204204
}
205205

206-
// CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], [[MP:![0-9]+]], ![[UNROLL_FULL:.*]]}
207-
// CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"}
206+
// CHECK-DAG: ![[MP:[0-9]+]] = !{!"llvm.loop.mustprogress"}
208207

209-
// CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2]], [[MP]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_8:.*]], ![[FIXED_VEC:.*]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]}
210-
// CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
211-
// CHECK: ![[DISTRIBUTE_DISABLE]] = !{!"llvm.loop.distribute.enable", i1 false}
212-
// CHECK: ![[WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
213-
// CHECK: ![[FIXED_VEC]] = !{!"llvm.loop.vectorize.scalable.enable", i1 false}
214-
// CHECK: ![[INTERLEAVE_4]] = !{!"llvm.loop.interleave.count", i32 4}
215-
// CHECK: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
208+
// CHECK-DAG: ![[UNROLL_DISABLE:[0-9]+]] = !{!"llvm.loop.unroll.disable"}
209+
// CHECK-DAG: ![[UNROLL_8:[0-9]+]] = !{!"llvm.loop.unroll.count", i32 8}
210+
// CHECK-DAG: ![[UNROLL_24:[0-9]+]] = !{!"llvm.loop.unroll.count", i32 24}
211+
// CHECK-DAG: ![[UNROLL_32:[0-9]+]] = !{!"llvm.loop.unroll.count", i32 32}
212+
// CHECK-DAG: ![[UNROLL_FULL:[0-9]+]] = !{!"llvm.loop.unroll.full"}
216213

217-
// CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], [[MP]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_3:.*]]}
218-
// CHECK: ![[FOLLOWUP_VECTOR_3]] = !{!"llvm.loop.vectorize.followup_all", [[MP]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
219-
// CHECK: ![[ISVECTORIZED]] = !{!"llvm.loop.isvectorized"}
220-
// CHECK: ![[UNROLL_8]] = !{!"llvm.loop.unroll.count", i32 8}
214+
// CHECK-DAG: ![[DISTRIBUTE_DISABLE:[0-9]+]] = !{!"llvm.loop.distribute.enable", i1 false}
221215

222-
// CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[VECTORIZE_ENABLE]]}
223-
// CHECK: ![[WIDTH_2]] = !{!"llvm.loop.vectorize.width", i32 2}
224-
// CHECK: ![[INTERLEAVE_2]] = !{!"llvm.loop.interleave.count", i32 2}
216+
// CHECK-DAG: ![[INTERLEAVE_2:[0-9]+]] = !{!"llvm.loop.interleave.count", i32 2}
217+
// CHECK-DAG: ![[INTERLEAVE_4:[0-9]+]] = !{!"llvm.loop.interleave.count", i32 4}
218+
// CHECK-DAG: ![[INTERLEAVE_8:[0-9]+]] = !{!"llvm.loop.interleave.count", i32 8}
219+
// CHECK-DAG: ![[INTERLEAVE_10:[0-9]+]] = !{!"llvm.loop.interleave.count", i32 10}
220+
// CHECK-DAG: ![[INTERLEAVE_16:[0-9]+]] = !{!"llvm.loop.interleave.count", i32 16}
225221

226-
// CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_1:.*]]}
227-
// CHECK: ![[WIDTH_1]] = !{!"llvm.loop.vectorize.width", i32 1}
222+
// CHECK-DAG: ![[VECTORIZE_ENABLE:[0-9]+]] = !{!"llvm.loop.vectorize.enable", i1 true}
223+
// CHECK-DAG: ![[FIXED_VEC:[0-9]+]] = !{!"llvm.loop.vectorize.scalable.enable", i1 false}
224+
// CHECK-DAG: ![[SCALABLE_VEC:[0-9]+]] = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
225+
// CHECK-DAG: ![[WIDTH_1:[0-9]+]] = !{!"llvm.loop.vectorize.width", i32 1}
226+
// CHECK-DAG: ![[WIDTH_2:[0-9]+]] = !{!"llvm.loop.vectorize.width", i32 2}
227+
// CHECK-DAG: ![[WIDTH_5:[0-9]+]] = !{!"llvm.loop.vectorize.width", i32 5}
228+
// CHECK-DAG: ![[WIDTH_6:[0-9]+]] = !{!"llvm.loop.vectorize.width", i32 6}
229+
// CHECK-DAG: ![[WIDTH_8:[0-9]+]] = !{!"llvm.loop.vectorize.width", i32 8}
230+
// CHECK-DAG: ![[WIDTH_10:[0-9]+]] = !{!"llvm.loop.vectorize.width", i32 10}
231+
// CHECK-DAG: ![[WIDTH_16:[0-9]+]] = !{!"llvm.loop.vectorize.width", i32 16}
228232

229-
// CHECK: ![[LOOP_6]] = distinct !{![[LOOP_6]], [[MP]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_3]]}
233+
// CHECK-DAG: ![[ISVECTORIZED:[0-9]+]] = !{!"llvm.loop.isvectorized"}
230234

231-
// CHECK: ![[LOOP_7]] = distinct !{![[LOOP_7]], [[MP]], ![[WIDTH_5:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
232-
// CHECK: ![[WIDTH_5]] = !{!"llvm.loop.vectorize.width", i32 5}
235+
// CHECK-DAG: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[MP]], ![[UNROLL_FULL]]}
233236

234-
// CHECK: ![[LOOP_8]] = distinct !{![[LOOP_8]], [[MP]], ![[WIDTH_5:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
237+
// CHECK-DAG: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[WIDTH_8]], ![[FIXED_VEC]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]]}
235238

236-
// CHECK: ![[LOOP_9]] = distinct !{![[LOOP_9]], ![[WIDTH_8:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_8:.*]], ![[FOLLOWUP_VECTOR_3]]}
239+
// CHECK-DAG: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[MP]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_3:[0-9]+]]}
240+
// CHECK-DAG: ![[FOLLOWUP_VECTOR_3]] = !{!"llvm.loop.vectorize.followup_all", ![[MP]], ![[ISVECTORIZED]], ![[UNROLL_8]]}
237241

238-
// CHECK: ![[LOOP_10]] = distinct !{![[LOOP_10]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_3]]}
242+
// CHECK-DAG: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[WIDTH_2]], ![[FIXED_VEC]], ![[INTERLEAVE_2]], ![[VECTORIZE_ENABLE]]}
239243

240-
// CHECK: ![[LOOP_11]] = distinct !{![[LOOP_11]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_4:.*]], ![[FOLLOWUP_VECTOR_3]]}
244+
// CHECK-DAG: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[WIDTH_1]]}
241245

242-
// CHECK: ![[LOOP_12]] = distinct !{![[LOOP_12]], ![[WIDTH_6:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_10:.*]], ![[FOLLOWUP_VECTOR_12:.*]]}
243-
// CHECK: ![[FOLLOWUP_VECTOR_12]] = !{!"llvm.loop.vectorize.followup_all", ![[ISVECTORIZED:.*]], ![[UNROLL_24:.*]]}
244-
// CHECK: ![[UNROLL_24]] = !{!"llvm.loop.unroll.count", i32 24}
246+
// CHECK-DAG: ![[LOOP_6]] = distinct !{![[LOOP_6]], ![[MP]], ![[WIDTH_2]], ![[FIXED_VEC]], ![[INTERLEAVE_2]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_3]]}
245247

246-
// CHECK: ![[LOOP_13]] = distinct !{![[LOOP_13]], ![[WIDTH_8:.*]], ![[INTERLEAVE_16:.*]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_13:.*]]}
247-
// CHECK: ![[INTERLEAVE_16]] = !{!"llvm.loop.interleave.count", i32 16}
248-
// CHECK: ![[FOLLOWUP_VECTOR_13]] = !{!"llvm.loop.vectorize.followup_all", ![[ISVECTORIZED:.*]], ![[UNROLL_32:.*]]}
249-
// CHECK: ![[UNROLL_32]] = !{!"llvm.loop.unroll.count", i32 32}
248+
// CHECK-DAG: ![[LOOP_7]] = distinct !{![[LOOP_7]], ![[MP]], ![[WIDTH_5]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
250249

251-
// CHECK: ![[LOOP_14]] = distinct !{![[LOOP_14]], [[MP]], ![[WIDTH_10:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
252-
// CHECK: ![[WIDTH_10]] = !{!"llvm.loop.vectorize.width", i32 10}
250+
// CHECK-DAG: ![[LOOP_8]] = distinct !{![[LOOP_8]], ![[MP]], ![[WIDTH_5]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
253251

254-
// CHECK: ![[LOOP_15]] = distinct !{![[LOOP_15]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_16:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]}
255-
// CHECK: ![[WIDTH_16]] = !{!"llvm.loop.vectorize.width", i32 16}
252+
// CHECK-DAG: ![[LOOP_9]] = distinct !{![[LOOP_9]], ![[MP]], ![[WIDTH_8]], ![[FIXED_VEC]], ![[INTERLEAVE_8]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_3]]}
256253

257-
// CHECK: ![[LOOP_16]] = distinct !{![[LOOP_16]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_16]], ![[SCALABLE_VEC:.*]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]}
258-
// CHECK: ![[SCALABLE_VEC]] = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
254+
// CHECK-DAG: ![[LOOP_10]] = distinct !{![[LOOP_10]], ![[MP]], ![[WIDTH_2]], ![[FIXED_VEC]], ![[INTERLEAVE_2]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_3]]}
259255

260-
// CHECK: ![[LOOP_17]] = distinct !{![[LOOP_17]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]}
261-
// CHECK: ![[LOOP_18]] = distinct !{![[LOOP_18]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[SCALABLE_VEC]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]}
262-
// CHECK: ![[LOOP_19]] = distinct !{![[LOOP_19]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_1]], ![[SCALABLE_VEC]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE:.*]]}
256+
// CHECK-DAG: ![[LOOP_11]] = distinct !{![[LOOP_11]], ![[MP]], ![[WIDTH_2]], ![[FIXED_VEC]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_3]]}
257+
258+
// CHECK-DAG: ![[LOOP_12]] = distinct !{![[LOOP_12]], ![[MP]], ![[WIDTH_6]], ![[FIXED_VEC]], ![[INTERLEAVE_10]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_12:[0-9]+]]}
259+
// CHECK-DAG: ![[FOLLOWUP_VECTOR_12]] = !{!"llvm.loop.vectorize.followup_all", ![[MP]], ![[ISVECTORIZED]], ![[UNROLL_24]]}
260+
261+
// CHECK-DAG: ![[LOOP_13]] = distinct !{![[LOOP_13]], ![[MP]], ![[WIDTH_8]], ![[FIXED_VEC]], ![[INTERLEAVE_16]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_13:[0-9]+]]}
262+
// CHECK-DAG: ![[FOLLOWUP_VECTOR_13]] = !{!"llvm.loop.vectorize.followup_all", ![[MP]], ![[ISVECTORIZED]], ![[UNROLL_32]]}
263+
264+
// CHECK-DAG: ![[LOOP_14]] = distinct !{![[LOOP_14]], ![[MP]], ![[WIDTH_10]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
265+
266+
// CHECK-DAG: ![[LOOP_15]] = distinct !{![[LOOP_15]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[WIDTH_16]], ![[FIXED_VEC]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]]}
267+
268+
// CHECK-DAG: ![[LOOP_16]] = distinct !{![[LOOP_16]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[WIDTH_16]], ![[SCALABLE_VEC]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]]}
269+
270+
// CHECK-DAG: ![[LOOP_17]] = distinct !{![[LOOP_17]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[FIXED_VEC]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]]}
271+
// CHECK-DAG: ![[LOOP_18]] = distinct !{![[LOOP_18]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[SCALABLE_VEC]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]]}
272+
// CHECK-DAG: ![[LOOP_19]] = distinct !{![[LOOP_19]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[WIDTH_1]], ![[SCALABLE_VEC]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]]}

llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,16 @@ DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, uint32_t Address,
197197
return DecodeGPRNoX0RegisterClass(Inst, RegNo, Address, Decoder);
198198
}
199199

200+
static DecodeStatus DecodeGPRNoX31RegisterClass(MCInst &Inst, uint32_t RegNo,
201+
uint64_t Address,
202+
const MCDisassembler *Decoder) {
203+
if (RegNo == 31) {
204+
return MCDisassembler::Fail;
205+
}
206+
207+
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
208+
}
209+
200210
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint32_t RegNo,
201211
uint64_t Address,
202212
const MCDisassembler *Decoder) {

llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -584,15 +584,15 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
584584
def QC_INSBPR : QCIRVInstRR<0b00010, GPR, "qc.insbpr">;
585585
def QC_INSBPRH : QCIRVInstRR<0b00011, GPR, "qc.insbprh">;
586586
def QC_EXTU : QCIBitManipRII<0b010, 0b00, GPRNoX0, "qc.extu">;
587-
def QC_EXTDU : QCIBitManipRII<0b010, 0b10, GPR, "qc.extdu">;
588-
def QC_EXTDUR : QCIRVInstRR<0b00100, GPR, "qc.extdur">;
589-
def QC_EXTDUPR : QCIRVInstRR<0b00110, GPR, "qc.extdupr">;
590-
def QC_EXTDUPRH : QCIRVInstRR<0b00111, GPR, "qc.extduprh">;
587+
def QC_EXTDU : QCIBitManipRII<0b010, 0b10, GPRNoX31, "qc.extdu">;
588+
def QC_EXTDUR : QCIRVInstRR<0b00100, GPRNoX31, "qc.extdur">;
589+
def QC_EXTDUPR : QCIRVInstRR<0b00110, GPRNoX31, "qc.extdupr">;
590+
def QC_EXTDUPRH : QCIRVInstRR<0b00111, GPRNoX31, "qc.extduprh">;
591591
def QC_EXT : QCIBitManipRII<0b010, 0b01, GPRNoX0, "qc.ext">;
592-
def QC_EXTD : QCIBitManipRII<0b010, 0b11, GPR, "qc.extd">;
593-
def QC_EXTDR : QCIRVInstRR<0b00101, GPR, "qc.extdr">;
594-
def QC_EXTDPR : QCIRVInstRR<0b01000, GPR, "qc.extdpr">;
595-
def QC_EXTDPRH : QCIRVInstRR<0b01001, GPR, "qc.extdprh">;
592+
def QC_EXTD : QCIBitManipRII<0b010, 0b11, GPRNoX31, "qc.extd">;
593+
def QC_EXTDR : QCIRVInstRR<0b00101, GPRNoX31, "qc.extdr">;
594+
def QC_EXTDPR : QCIRVInstRR<0b01000, GPRNoX31, "qc.extdpr">;
595+
def QC_EXTDPRH : QCIRVInstRR<0b01001, GPRNoX31, "qc.extdprh">;
596596
def QC_COMPRESS2 : QCIRVInstI<0b0000, "qc.compress2">;
597597
def QC_COMPRESS3 : QCIRVInstI<0b0001, "qc.compress3">;
598598
def QC_EXPAND2 : QCIRVInstI<0b0010, "qc.expand2">;

llvm/lib/Target/RISCV/RISCVRegisterInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,11 @@ def GPRX1X5 : GPRRegisterClass<(add X1, X5)> {
302302
let DiagnosticString = "register must be ra or t0 (x1 or x5)";
303303
}
304304

305+
def GPRNoX31 : GPRRegisterClass<(sub GPR, X31)> {
306+
let DiagnosticType = "InvalidRegClassGPRX31";
307+
let DiagnosticString = "register must be a GPR excluding t6 (x31)";
308+
}
309+
305310
//===----------------------------------------------------------------------===//
306311
// Even-Odd GPR Pairs
307312
//===----------------------------------------------------------------------===//

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "llvm/Transforms/Scalar/LoopInterchange.h"
1616
#include "llvm/ADT/STLExtras.h"
17+
#include "llvm/ADT/SmallSet.h"
1718
#include "llvm/ADT/SmallVector.h"
1819
#include "llvm/ADT/Statistic.h"
1920
#include "llvm/ADT/StringRef.h"
@@ -72,6 +73,13 @@ using LoopVector = SmallVector<Loop *, 8>;
7273
// TODO: Check if we can use a sparse matrix here.
7374
using CharMatrix = std::vector<std::vector<char>>;
7475

76+
/// Types of rules used in profitability check.
77+
enum class RuleTy {
78+
PerLoopCacheAnalysis,
79+
PerInstrOrderCost,
80+
ForVectorization,
81+
};
82+
7583
} // end anonymous namespace
7684

7785
// Minimum loop depth supported.
@@ -84,12 +92,31 @@ static cl::opt<unsigned int> MaxLoopNestDepth(
8492
"loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
8593
cl::desc("Maximum depth of loop nest considered for the transform"));
8694

87-
static cl::opt<bool> PrioritizeVectorization(
88-
"loop-interchange-prioritize-vectorization", cl::init(false), cl::Hidden,
89-
cl::desc("Prioritize increasing vectorization opportunity over cache cost "
90-
"when determining profitability"));
95+
// We prefer cache cost to vectorization by default.
96+
static cl::list<RuleTy> Profitabilities(
97+
"loop-interchange-profitabilities", cl::ZeroOrMore,
98+
cl::MiscFlags::CommaSeparated, cl::Hidden,
99+
cl::desc("List of profitability heuristics to be used. They are applied in "
100+
"the given order"),
101+
cl::list_init<RuleTy>({RuleTy::PerLoopCacheAnalysis,
102+
RuleTy::PerInstrOrderCost,
103+
RuleTy::ForVectorization}),
104+
cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache",
105+
"Prioritize loop cache cost"),
106+
clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
107+
"Prioritize the IVs order of each instruction"),
108+
clEnumValN(RuleTy::ForVectorization, "vectorize",
109+
"Prioritize vectorization")));
91110

92111
#ifndef NDEBUG
112+
static bool noDuplicateRules(ArrayRef<RuleTy> Rules) {
113+
SmallSet<RuleTy, 4> Set;
114+
for (RuleTy Rule : Rules)
115+
if (!Set.insert(Rule).second)
116+
return false;
117+
return true;
118+
}
119+
93120
static void printDepMatrix(CharMatrix &DepMatrix) {
94121
for (auto &Row : DepMatrix) {
95122
for (auto D : Row)
@@ -1204,26 +1231,9 @@ bool LoopInterchangeProfitability::isProfitable(
12041231
// second highest priority rule (isProfitablePerInstrOrderCost by default).
12051232
// Likewise, if it failed to analysis the profitability then only, the last
12061233
// rule (isProfitableForVectorization by default) will decide.
1207-
enum class RuleTy {
1208-
PerLoopCacheAnalysis,
1209-
PerInstrOrderCost,
1210-
ForVectorization,
1211-
};
1212-
1213-
// We prefer cache cost to vectorization by default.
1214-
RuleTy RuleOrder[3] = {RuleTy::PerLoopCacheAnalysis,
1215-
RuleTy::PerInstrOrderCost, RuleTy::ForVectorization};
1216-
1217-
// If we prefer vectorization to cache cost, change the order of application
1218-
// of each rule.
1219-
if (PrioritizeVectorization) {
1220-
RuleOrder[0] = RuleTy::ForVectorization;
1221-
RuleOrder[1] = RuleTy::PerLoopCacheAnalysis;
1222-
RuleOrder[2] = RuleTy::PerInstrOrderCost;
1223-
}
1224-
1234+
assert(noDuplicateRules(Profitabilities) && "Detect duplicate rules");
12251235
std::optional<bool> shouldInterchange;
1226-
for (RuleTy RT : RuleOrder) {
1236+
for (RuleTy RT : Profitabilities) {
12271237
switch (RT) {
12281238
case RuleTy::PerLoopCacheAnalysis:
12291239
shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-zve64f.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ body: |
2424
; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 undef renamable $v8, [[COPY1]], 1, 6 /* e64 */, 2 /* tu, ma */, implicit $vl, implicit $vtype :: (load unknown-size, align 8)
2525
; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 8, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
2626
; CHECK-NEXT: renamable $v9 = PseudoVLE32_V_M1 undef renamable $v9, [[COPY]], 8, 5 /* e32 */, 2 /* tu, ma */, implicit $vl, implicit $vtype :: (load unknown-size, align 4)
27-
; CHECK-NEXT: INLINEASM &"# use $0 $1 $2 $3", 1 /* sideeffect attdialect */, 3145737 /* reguse:VR */, killed renamable $v10, 3145737 /* reguse:VR */, killed renamable $v11, 3145737 /* reguse:VR */, killed renamable $v8, 3145737 /* reguse:VR */, killed renamable $v9
27+
; CHECK-NEXT: INLINEASM &"# use $0 $1 $2 $3", 1 /* sideeffect attdialect */, 3997705 /* reguse:VR */, killed renamable $v10, 3997705 /* reguse:VR */, killed renamable $v11, 3997705 /* reguse:VR */, killed renamable $v8, 3997705 /* reguse:VR */, killed renamable $v9
2828
; CHECK-NEXT: PseudoRET
2929
%3:gpr = COPY $x12
3030
%2:gpr = COPY $x11
@@ -34,7 +34,7 @@ body: |
3434
renamable $v11 = PseudoVMV_S_X undef renamable $v11, %1, 8, 5 /* e32 */
3535
renamable $v8 = PseudoVLE64_V_M1 undef renamable $v8, %2, 1, 6 /* e64 */, 2 /* tu, ma */ :: (load unknown-size, align 8)
3636
renamable $v9 = PseudoVLE32_V_M1 undef renamable $v9, %3, 8, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size, align 4)
37-
INLINEASM &"# use $0 $1 $2 $3", 1 /* sideeffect attdialect */, 3145737 /* reguse:VR */, killed renamable $v10, 3145737 /* reguse:VR */, killed renamable $v11, 3145737 /* reguse:VR */, killed renamable $v8, 3145737 /* reguse:VR */, killed renamable $v9
37+
INLINEASM &"# use $0 $1 $2 $3", 1 /* sideeffect attdialect */, 3997705 /* reguse:VR */, killed renamable $v10, 3997705 /* reguse:VR */, killed renamable $v11, 3997705 /* reguse:VR */, killed renamable $v8, 3997705 /* reguse:VR */, killed renamable $v9
3838
PseudoRET
3939
4040
...

0 commit comments

Comments
 (0)