@@ -16906,9 +16906,9 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
16906
16906
return true;
16907
16907
}
16908
16908
16909
- bool getDeinterleave2Values(Value *DI,
16910
- SmallVectorImpl<Instruction *> &DeinterleavedValues,
16911
- SmallVectorImpl<Instruction *> &DeadInsts ) {
16909
+ bool getDeinterleave2Values(
16910
+ Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
16911
+ SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts ) {
16912
16912
if (!DI->hasNUses(2))
16913
16913
return false;
16914
16914
auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
@@ -16930,8 +16930,9 @@ bool getDeinterleave2Values(Value *DI,
16930
16930
return false;
16931
16931
}
16932
16932
// DeinterleavedValues will be replace by output of ld2
16933
- DeadInsts.insert(DeadInsts.end(), DeinterleavedValues.begin(),
16934
- DeinterleavedValues.end());
16933
+ DeInterleaveDeadInsts.insert(DeInterleaveDeadInsts.end(),
16934
+ DeinterleavedValues.begin(),
16935
+ DeinterleavedValues.end());
16935
16936
return true;
16936
16937
}
16937
16938
@@ -16952,9 +16953,9 @@ vector.deinterleave4 intrinsic. When true is returned, `DeinterleavedValues`
16952
16953
vector is populated with the results such an intrinsic would return: (i.e. {A,
16953
16954
B, C, D } = vector.deinterleave4(...))
16954
16955
*/
16955
- bool getDeinterleave4Values(Value *DI,
16956
- SmallVectorImpl<Instruction *> &DeinterleavedValues,
16957
- SmallVectorImpl<Instruction *> &DeadInsts ) {
16956
+ bool getDeinterleave4Values(
16957
+ Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
16958
+ SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts ) {
16958
16959
if (!DI->hasNUses(2))
16959
16960
return false;
16960
16961
auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
@@ -17008,22 +17009,23 @@ bool getDeinterleave4Values(Value *DI,
17008
17009
17009
17010
// These Values will not be used anymore,
17010
17011
// DI4 will be created instead of nested DI1 and DI2
17011
- DeadInsts.insert(DeadInsts.end(), DeinterleavedValues.begin(),
17012
- DeinterleavedValues.end());
17013
- DeadInsts.push_back(cast<Instruction>(DI1));
17014
- DeadInsts.push_back(cast<Instruction>(Extr1));
17015
- DeadInsts.push_back(cast<Instruction>(DI2));
17016
- DeadInsts.push_back(cast<Instruction>(Extr2));
17012
+ DeInterleaveDeadInsts.insert(DeInterleaveDeadInsts.end(),
17013
+ DeinterleavedValues.begin(),
17014
+ DeinterleavedValues.end());
17015
+ DeInterleaveDeadInsts.push_back(cast<Instruction>(DI1));
17016
+ DeInterleaveDeadInsts.push_back(cast<Instruction>(Extr1));
17017
+ DeInterleaveDeadInsts.push_back(cast<Instruction>(DI2));
17018
+ DeInterleaveDeadInsts.push_back(cast<Instruction>(Extr2));
17017
17019
17018
17020
return true;
17019
17021
}
17020
17022
17021
- bool getDeinterleavedValues(Value *DI,
17022
- SmallVectorImpl<Instruction *> &DeinterleavedValues,
17023
- SmallVectorImpl<Instruction *> &DeadInsts ) {
17024
- if (getDeinterleave4Values(DI, DeinterleavedValues, DeadInsts ))
17023
+ bool getDeinterleavedValues(
17024
+ Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
17025
+ SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts ) {
17026
+ if (getDeinterleave4Values(DI, DeinterleavedValues, DeInterleaveDeadInsts ))
17025
17027
return true;
17026
- return getDeinterleave2Values(DI, DeinterleavedValues, DeadInsts );
17028
+ return getDeinterleave2Values(DI, DeinterleavedValues, DeInterleaveDeadInsts );
17027
17029
}
17028
17030
17029
17031
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
@@ -17034,9 +17036,9 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
17034
17036
return false;
17035
17037
17036
17038
SmallVector<Instruction *, 4> DeinterleavedValues;
17037
- const DataLayout &DL = DI->getModule()->getDataLayout() ;
17039
+ SmallVector<Instruction *, 4> DeInterleaveDeadInsts ;
17038
17040
17039
- if (!getDeinterleavedValues(DI, DeinterleavedValues, DeadInsts )) {
17041
+ if (!getDeinterleavedValues(DI, DeinterleavedValues, DeInterleaveDeadInsts )) {
17040
17042
LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
17041
17043
return false;
17042
17044
}
@@ -17045,18 +17047,15 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
17045
17047
"Currently supported Factor is 2 or 4 only");
17046
17048
VectorType *VTy = cast<VectorType>(DeinterleavedValues[0]->getType());
17047
17049
17050
+ const DataLayout &DL = DI->getModule()->getDataLayout();
17048
17051
bool UseScalable;
17049
- if (!isLegalInterleavedAccessType(VTy, DL, UseScalable)) {
17050
- DeadInsts.clear();
17052
+ if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
17051
17053
return false;
17052
- }
17053
17054
17054
17055
// TODO: Add support for using SVE instructions with fixed types later, using
17055
17056
// the code from lowerInterleavedLoad to obtain the correct container type.
17056
- if (UseScalable && !VTy->isScalableTy()) {
17057
- DeadInsts.clear();
17057
+ if (UseScalable && !VTy->isScalableTy())
17058
17058
return false;
17059
- }
17060
17059
17061
17060
unsigned NumLoads = getNumInterleavedAccesses(VTy, DL, UseScalable);
17062
17061
VectorType *LdTy =
@@ -17094,7 +17093,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
17094
17093
}
17095
17094
LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump());
17096
17095
}
17097
- // Replcae output of deinterleave2 intrinsic by output of ldN2/ldN4
17096
+ // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
17098
17097
for (unsigned J = 0; J < Factor; ++J)
17099
17098
DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]);
17100
17099
} else {
@@ -17103,12 +17102,14 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
17103
17102
Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN");
17104
17103
else
17105
17104
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
17106
- // Replcae output of deinterleave2 intrinsic by output of ldN2/ldN4
17105
+ // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
17107
17106
for (unsigned I = 0; I < DeinterleavedValues.size(); I++) {
17108
17107
Value *NewExtract = Builder.CreateExtractValue(Result, I);
17109
17108
DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
17110
17109
}
17111
17110
}
17111
+ DeadInsts.insert(DeadInsts.end(), DeInterleaveDeadInsts.begin(),
17112
+ DeInterleaveDeadInsts.end());
17112
17113
return true;
17113
17114
}
17114
17115
@@ -17126,9 +17127,9 @@ vector.interleave4 intrinsic. When true is returned, `ValuesToInterleave` vector
17126
17127
is populated with the inputs such an intrinsic would take: (i.e.
17127
17128
vector.interleave4(A, B, C, D)).
17128
17129
*/
17129
- bool getValuesToInterleave(Value *II,
17130
- SmallVectorImpl<Value *> &ValuesToInterleave,
17131
- SmallVectorImpl<Instruction *> &DeadInsts ) {
17130
+ bool getValuesToInterleave(
17131
+ Value *II, SmallVectorImpl<Value *> &ValuesToInterleave,
17132
+ SmallVectorImpl<Instruction *> &InterleaveDeadInsts ) {
17132
17133
Value *A, *B, *C, *D;
17133
17134
// Try to match interleave of Factor 4
17134
17135
if (match(II, m_Interleave2(m_Interleave2(m_Value(A), m_Value(C)),
@@ -17138,11 +17139,10 @@ bool getValuesToInterleave(Value *II,
17138
17139
ValuesToInterleave.push_back(C);
17139
17140
ValuesToInterleave.push_back(D);
17140
17141
// intermediate II will not be needed anymore
17141
- Value *II1, *II2;
17142
- assert(match(II, m_Interleave2(m_Value(II1), m_Value(II2))) &&
17143
- "II tree is expected");
17144
- DeadInsts.push_back(cast<Instruction>(II1));
17145
- DeadInsts.push_back(cast<Instruction>(II2));
17142
+ InterleaveDeadInsts.push_back(
17143
+ cast<Instruction>(cast<Instruction>(II)->getOperand(0)));
17144
+ InterleaveDeadInsts.push_back(
17145
+ cast<Instruction>(cast<Instruction>(II)->getOperand(1)));
17146
17146
return true;
17147
17147
}
17148
17148
@@ -17164,7 +17164,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
17164
17164
return false;
17165
17165
17166
17166
SmallVector<Value *, 4> ValuesToInterleave;
17167
- if (!getValuesToInterleave(II, ValuesToInterleave, DeadInsts)) {
17167
+ SmallVector<Instruction *, 4> InterleaveDeadInsts;
17168
+ if (!getValuesToInterleave(II, ValuesToInterleave, InterleaveDeadInsts)) {
17168
17169
LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n");
17169
17170
return false;
17170
17171
}
@@ -17175,17 +17176,13 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
17175
17176
const DataLayout &DL = II->getModule()->getDataLayout();
17176
17177
17177
17178
bool UseScalable;
17178
- if (!isLegalInterleavedAccessType(VTy, DL, UseScalable)) {
17179
- DeadInsts.clear();
17179
+ if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
17180
17180
return false;
17181
- }
17182
17181
17183
17182
// TODO: Add support for using SVE instructions with fixed types later, using
17184
17183
// the code from lowerInterleavedStore to obtain the correct container type.
17185
- if (UseScalable && !VTy->isScalableTy()) {
17186
- DeadInsts.clear();
17184
+ if (UseScalable && !VTy->isScalableTy())
17187
17185
return false;
17188
- }
17189
17186
17190
17187
unsigned NumStores = getNumInterleavedAccesses(VTy, DL, UseScalable);
17191
17188
@@ -17226,7 +17223,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
17226
17223
}
17227
17224
Builder.CreateCall(StNFunc, ValuesToInterleave);
17228
17225
}
17229
-
17226
+ DeadInsts.insert(DeadInsts.end(), InterleaveDeadInsts.begin(),
17227
+ InterleaveDeadInsts.end());
17230
17228
return true;
17231
17229
}
17232
17230
0 commit comments