@@ -43,25 +43,22 @@ DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false),
43
43
cl::desc(" Disable the ARM Parallel DSP pass" ));
44
44
45
45
namespace {
46
- struct OpChain ;
47
46
struct MulCandidate ;
48
47
class Reduction ;
49
48
50
- using MulCandList = SmallVector<std::unique_ptr<MulCandidate>, 8 >;
51
- using ReductionList = SmallVector<Reduction, 8 >;
52
- using MemInstList = SmallVector<LoadInst*, 8 >;
53
- using PMACPair = std::pair<MulCandidate*,MulCandidate*>;
54
- using PMACPairList = SmallVector<PMACPair, 8 >;
49
+ using MulCandList = SmallVector<std::unique_ptr<MulCandidate>, 8 >;
50
+ using MemInstList = SmallVectorImpl<LoadInst*>;
51
+ using MulPairList = SmallVector<std::pair<MulCandidate*, MulCandidate*>, 8 >;
55
52
56
53
// 'MulCandidate' holds the multiplication instructions that are candidates
57
54
// for parallel execution.
58
55
struct MulCandidate {
59
56
Instruction *Root;
60
- MemInstList VecLd; // Container for loads to widen.
61
57
Value* LHS;
62
58
Value* RHS;
63
59
bool Exchange = false ;
64
60
bool ReadOnly = true ;
61
+ SmallVector<LoadInst*, 2 > VecLd; // Container for loads to widen.
65
62
66
63
MulCandidate (Instruction *I, Value *lhs, Value *rhs) :
67
64
Root (I), LHS(lhs), RHS(rhs) { }
@@ -81,7 +78,7 @@ namespace {
81
78
Instruction *Root = nullptr ;
82
79
Value *Acc = nullptr ;
83
80
MulCandList Muls;
84
- PMACPairList MulPairs;
81
+ MulPairList MulPairs;
85
82
SmallPtrSet<Instruction*, 4 > Adds;
86
83
87
84
public:
@@ -135,7 +132,7 @@ namespace {
135
132
136
133
// / Return the MulCandidate, rooted at mul instructions, that have been
137
134
// / paired for parallel execution.
138
- PMACPairList &getMulPairs () { return MulPairs; }
135
+ MulPairList &getMulPairs () { return MulPairs; }
139
136
140
137
// / To finalise, replace the uses of the root with the intrinsic call.
141
138
void UpdateRoot (Instruction *SMLAD) {
@@ -175,8 +172,7 @@ namespace {
175
172
bool RecordMemoryOps (BasicBlock *BB);
176
173
void InsertParallelMACs (Reduction &Reduction);
177
174
bool AreSequentialLoads (LoadInst *Ld0, LoadInst *Ld1, MemInstList &VecMem);
178
- LoadInst* CreateWideLoad (SmallVectorImpl<LoadInst*> &Loads,
179
- IntegerType *LoadTy);
175
+ LoadInst* CreateWideLoad (MemInstList &Loads, IntegerType *LoadTy);
180
176
bool CreateParallelPairs (Reduction &R);
181
177
182
178
// / Try to match and generate: SMLAD, SMLADX - Signed Multiply Accumulate
@@ -349,7 +345,6 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
349
345
InstSet &WritesBefore = RAWDeps[Dominated];
350
346
351
347
for (auto Before : WritesBefore) {
352
-
353
348
// We can't move the second load backward, past a write, to merge
354
349
// with the first load.
355
350
if (DT->dominates (Dominator, Before))
@@ -648,7 +643,7 @@ void ARMParallelDSP::InsertParallelMACs(Reduction &R) {
648
643
R.UpdateRoot (cast<Instruction>(Acc));
649
644
}
650
645
651
- LoadInst* ARMParallelDSP::CreateWideLoad (SmallVectorImpl<LoadInst*> &Loads,
646
+ LoadInst* ARMParallelDSP::CreateWideLoad (MemInstList &Loads,
652
647
IntegerType *LoadTy) {
653
648
assert (Loads.size () == 2 && " currently only support widening two loads" );
654
649
0 commit comments