Skip to content

Commit 7b13365

Browse files
committed
Resolve review comments.
- remove dead instructions of interleave tree. - code improvement. Change-Id: I2020bf850c987829bf6e2255425eacee9361a980
1 parent 85d159b commit 7b13365

File tree

11 files changed

+311
-200
lines changed

11 files changed

+311
-200
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3156,7 +3156,8 @@ class TargetLoweringBase {
31563156
/// \p DI is the deinterleave intrinsic.
31573157
/// \p LI is the accompanying load instruction
31583158
virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
3159-
LoadInst *LI) const {
3159+
LoadInst *LI,
3160+
SmallVectorImpl<Instruction *> &DeadInsts) const {
31603161
return false;
31613162
}
31623163

@@ -3167,7 +3168,8 @@ class TargetLoweringBase {
31673168
/// \p II is the interleave intrinsic.
31683169
/// \p SI is the accompanying store instruction
31693170
virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
3170-
StoreInst *SI) const {
3171+
StoreInst *SI,
3172+
SmallVectorImpl<Instruction *> &DeadInstructions) const {
31713173
return false;
31723174
}
31733175

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
489489
LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
490490

491491
// Try and match this with target specific intrinsics.
492-
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
492+
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI, DeadInsts))
493493
return false;
494494

495495
// We now have a target-specific load, so delete the old one.
@@ -510,13 +510,15 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
510510

511511
LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
512512

513+
SmallVector<Instruction *, 32> InterleaveDeadInsts;
513514
// Try and match this with target specific intrinsics.
514-
if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
515+
if (!TLI->lowerInterleaveIntrinsicToStore(II, SI, InterleaveDeadInsts))
515516
return false;
516517

517518
// We now have a target-specific store, so delete the old one.
518519
DeadInsts.push_back(SI);
519520
DeadInsts.push_back(II);
521+
DeadInsts.insert(DeadInsts.end(), InterleaveDeadInsts.begin(), InterleaveDeadInsts.end());
520522
return true;
521523
}
522524

@@ -537,7 +539,7 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
537539
// with a factor of 2.
538540
if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2)
539541
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
540-
if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
542+
else if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
541543
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
542544
}
543545
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 65 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -16907,7 +16907,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
1690716907
}
1690816908

1690916909
bool getDeinterleave2Values(
16910-
Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues) {
16910+
Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
16911+
SmallVectorImpl<Instruction *> &DeadInsts) {
1691116912
if (!DI->hasNUses(2))
1691216913
return false;
1691316914
auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
@@ -16928,13 +16929,13 @@ bool getDeinterleave2Values(
1692816929
LLVM_DEBUG(dbgs() << "matching deinterleave2 failed\n");
1692916930
return false;
1693016931
}
16932+
// DeinterleavedValues will be replace by output of ld2
16933+
DeadInsts.insert(DeadInsts.end(), DeinterleavedValues.begin(), DeinterleavedValues.end());
1693116934
return true;
1693216935
}
1693316936

1693416937
/*
16935-
Diagram for DI tree.
16936-
[LOAD]
16937-
|
16938+
DeinterleaveIntrinsic tree:
1693816939
[DI]
1693916940
/ \
1694016941
[Extr<0>] [Extr<1>]
@@ -16944,23 +16945,22 @@ Diagram for DI tree.
1694416945
[Extr<0>][Extr<1>] [Extr<0>][Extr<1>]
1694516946
| | | |
1694616947
roots: A C B D
16947-
roots in correct order of DI4: A B C D.
16948-
If there is a pattern matches the deinterleave tree above, then we can construct
16949-
DI4 out of that pattern. This function tries to match the deinterleave tree
16950-
pattern, and fetch the tree roots, so that in further steps they can be replaced
16951-
by the output of DI4.
16948+
roots in correct order of DI4 will be: A B C D.
16949+
Returns true if `DI` is the top of an IR tree that represents a theoretical vector.deinterleave4 intrinsic.
16950+
When true is returned, `DeinterleavedValues` vector is populated with the results such an intrinsic would return:
16951+
(i.e. {A, B, C, D } = vector.deinterleave4(...))
1695216952
*/
1695316953
bool getDeinterleave4Values(Value *DI,
1695416954
SmallVectorImpl<Instruction *> &DeinterleavedValues,
16955-
SmallVectorImpl<Instruction *> &DeadInstructions) {
16955+
SmallVectorImpl<Instruction *> &DeadInsts) {
1695616956
if (!DI->hasNUses(2))
1695716957
return false;
1695816958
auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
1695916959
auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->user_begin()));
1696016960
if (!Extr1 || !Extr2)
1696116961
return false;
1696216962

16963-
if (!Extr1->hasNUses(1) || !Extr2->hasNUses(1))
16963+
if (!Extr1->hasOneUse() || !Extr2->hasOneUse())
1696416964
return false;
1696516965
auto *DI1 = *(Extr1->user_begin());
1696616966
auto *DI2 = *(Extr2->user_begin());
@@ -16972,8 +16972,7 @@ bool getDeinterleave4Values(Value *DI,
1697216972
auto *C = dyn_cast<ExtractValueInst>(*(++DI1->user_begin()));
1697316973
auto *B = dyn_cast<ExtractValueInst>(*(DI2->user_begin()));
1697416974
auto *D = dyn_cast<ExtractValueInst>(*(++DI2->user_begin()));
16975-
// Make sure that the A,B,C,D are instructions of ExtractValue,
16976-
// before getting the extract index
16975+
// Make sure that the A,B,C and D are ExtractValue instructions before getting the extract index
1697716976
if (!A || !B || !C || !D)
1697816977
return false;
1697916978

@@ -17004,35 +17003,35 @@ bool getDeinterleave4Values(Value *DI,
1700417003
return false;
1700517004
}
1700617005

17007-
// These Values will not be used anymre,
17006+
// These Values will not be used anymore,
1700817007
// DI4 will be created instead of nested DI1 and DI2
17009-
DeadInstructions.push_back(cast<Instruction>(DI1));
17010-
DeadInstructions.push_back(cast<Instruction>(Extr1));
17011-
DeadInstructions.push_back(cast<Instruction>(DI2));
17012-
DeadInstructions.push_back(cast<Instruction>(Extr2));
17008+
DeadInsts.insert(DeadInsts.end(), DeinterleavedValues.begin(), DeinterleavedValues.end());
17009+
DeadInsts.push_back(cast<Instruction>(DI1));
17010+
DeadInsts.push_back(cast<Instruction>(Extr1));
17011+
DeadInsts.push_back(cast<Instruction>(DI2));
17012+
DeadInsts.push_back(cast<Instruction>(Extr2));
1701317013

1701417014
return true;
1701517015
}
1701617016

1701717017
bool getDeinterleavedValues(Value *DI,
1701817018
SmallVectorImpl<Instruction *> &DeinterleavedValues,
17019-
SmallVectorImpl<Instruction *> &DeadInstructions) {
17020-
if (getDeinterleave4Values(DI, DeinterleavedValues, DeadInstructions))
17019+
SmallVectorImpl<Instruction *> &DeadInsts) {
17020+
if (getDeinterleave4Values(DI, DeinterleavedValues, DeadInsts))
1702117021
return true;
17022-
return getDeinterleave2Values(DI, DeinterleavedValues);
17022+
return getDeinterleave2Values(DI, DeinterleavedValues, DeadInsts);
1702317023
}
1702417024

1702517025
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
17026-
IntrinsicInst *DI, LoadInst *LI) const {
17026+
IntrinsicInst *DI, LoadInst *LI, SmallVectorImpl<Instruction *> &DeadInsts) const {
1702717027
// Only deinterleave2 supported at present.
1702817028
if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
1702917029
return false;
1703017030

1703117031
SmallVector<Instruction *, 4> DeinterleavedValues;
17032-
SmallVector<Instruction *, 4> DeadInstructions;
1703317032
const DataLayout &DL = DI->getModule()->getDataLayout();
1703417033

17035-
if (!getDeinterleavedValues(DI, DeinterleavedValues, DeadInstructions)) {
17034+
if (!getDeinterleavedValues(DI, DeinterleavedValues, DeadInsts)) {
1703617035
LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
1703717036
return false;
1703817037
}
@@ -17042,13 +17041,17 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1704217041
VectorType *VTy = cast<VectorType>(DeinterleavedValues[0]->getType());
1704317042

1704417043
bool UseScalable;
17045-
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
17044+
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable)) {
17045+
DeadInsts.clear();
1704617046
return false;
17047+
}
1704717048

1704817049
// TODO: Add support for using SVE instructions with fixed types later, using
1704917050
// the code from lowerInterleavedLoad to obtain the correct container type.
17050-
if (UseScalable && !VTy->isScalableTy())
17051+
if (UseScalable && !VTy->isScalableTy()) {
17052+
DeadInsts.clear();
1705117053
return false;
17054+
}
1705217055

1705317056
unsigned NumLoads = getNumInterleavedAccesses(VTy, DL, UseScalable);
1705417057
VectorType *LdTy =
@@ -17066,10 +17069,9 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1706617069
Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue());
1706717070

1706817071
Value *BaseAddr = LI->getPointerOperand();
17069-
Value *Result;
1707017072
if (NumLoads > 1) {
17071-
// Create multiple legal small ldN instead of a wide one.
17072-
SmallVector<Value *, 4> WideValues(Factor, (PoisonValue::get(VTy)));
17073+
// Create multiple legal small ldN.
17074+
SmallVector<Value *, 4> ExtractedLdValues(Factor, PoisonValue::get(VTy));
1707317075
for (unsigned I = 0; I < NumLoads; ++I) {
1707417076
Value *Offset = Builder.getInt64(I * Factor);
1707517077

@@ -17082,53 +17084,45 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1708217084
Value *Idx =
1708317085
Builder.getInt64(I * LdTy->getElementCount().getKnownMinValue());
1708417086
for (unsigned J = 0; J < Factor; ++J) {
17085-
WideValues[J] = Builder.CreateInsertVector(
17086-
VTy, WideValues[J], Builder.CreateExtractValue(LdN, J), Idx);
17087+
ExtractedLdValues[J] = Builder.CreateInsertVector(
17088+
VTy, ExtractedLdValues[J], Builder.CreateExtractValue(LdN, J), Idx);
1708717089
}
17090+
LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump());
1708817091
}
17089-
if (Factor == 2)
17090-
Result = PoisonValue::get(StructType::get(VTy, VTy));
17091-
else
17092-
Result = PoisonValue::get(StructType::get(VTy, VTy, VTy, VTy));
17093-
// Construct the wide result out of the small results.
17094-
for (unsigned J = 0; J < Factor; ++J) {
17095-
Result = Builder.CreateInsertValue(Result, WideValues[J], J);
17096-
}
17092+
// Replcae output of deinterleave2 intrinsic by output of ldN2/ldN4
17093+
for (unsigned J = 0; J < Factor; ++J)
17094+
DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]);
1709717095
} else {
17096+
Value *Result;
1709817097
if (UseScalable)
1709917098
Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN");
1710017099
else
1710117100
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
17101+
// Replcae output of deinterleave2 intrinsic by output of ldN2/ldN4
17102+
for (unsigned I = 0; I < DeinterleavedValues.size(); I++) {
17103+
Value *NewExtract = Builder.CreateExtractValue(Result, I);
17104+
DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
17105+
}
1710217106
}
17103-
// Itereate over old deinterleaved values to replace it by
17104-
// the new values.
17105-
for (unsigned I = 0; I < DeinterleavedValues.size(); I++) {
17106-
Value *NewExtract = Builder.CreateExtractValue(Result, I);
17107-
DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
17108-
cast<Instruction>(DeinterleavedValues[I])->eraseFromParent();
17109-
}
17110-
for (auto &dead : DeadInstructions)
17111-
dead->eraseFromParent();
1711217107
return true;
1711317108
}
1711417109

1711517110
/*
17116-
Diagram for Interleave tree.
17111+
InterleaveIntrinsic tree.
1711717112
A C B D
1711817113
\ / \ /
17119-
[Interleave] [Interleave]
17114+
[II] [II]
1712017115
\ /
17121-
[Interleave]
17122-
|
17123-
[Store]
17116+
[II]
17117+
1712417118
values in correct order of interleave4: A B C D.
17125-
If there is a pattern matches the interleave tree above, then we can construct
17126-
Interleave4 out of that pattern. This function tries to match the interleave
17127-
tree pattern, and fetch the values that we want to interleave, so that in
17128-
further steps they can be replaced by the output of Inteleave4.
17119+
Returns true if `II` is the root of an IR tree that represents a theoretical vector.interleave4 intrinsic.
17120+
When true is returned, `ValuesToInterleave` vector is populated with the inputs such an intrinsic would take:
17121+
(i.e. vector.interleave4(A, B, C, D)).
1712917122
*/
1713017123
bool getValuesToInterleave(Value *II,
17131-
SmallVectorImpl<Value *> &ValuesToInterleave) {
17124+
SmallVectorImpl<Value *> &ValuesToInterleave,
17125+
SmallVectorImpl<Instruction *> &DeadInsts) {
1713217126
Value *A, *B, *C, *D;
1713317127
// Try to match interleave of Factor 4
1713417128
if (match(II, m_Interleave2(m_Interleave2(m_Value(A), m_Value(C)),
@@ -17137,6 +17131,11 @@ bool getValuesToInterleave(Value *II,
1713717131
ValuesToInterleave.push_back(B);
1713817132
ValuesToInterleave.push_back(C);
1713917133
ValuesToInterleave.push_back(D);
17134+
// intermediate II will not be needed anymore
17135+
Value *II1, *II2;
17136+
assert(match(II, m_Interleave2(m_Value(II1), m_Value(II2))) && "II tree is expected");
17137+
DeadInsts.push_back(cast<Instruction>(II1));
17138+
DeadInsts.push_back(cast<Instruction>(II2));
1714017139
return true;
1714117140
}
1714217141

@@ -17151,13 +17150,13 @@ bool getValuesToInterleave(Value *II,
1715117150
}
1715217151

1715317152
bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
17154-
IntrinsicInst *II, StoreInst *SI) const {
17153+
IntrinsicInst *II, StoreInst *SI, SmallVectorImpl<Instruction *> &DeadInsts) const {
1715517154
// Only interleave2 supported at present.
1715617155
if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
1715717156
return false;
1715817157

1715917158
SmallVector<Value *, 4> ValuesToInterleave;
17160-
if (!getValuesToInterleave(II, ValuesToInterleave)) {
17159+
if (!getValuesToInterleave(II, ValuesToInterleave, DeadInsts)) {
1716117160
LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n");
1716217161
return false;
1716317162
}
@@ -17168,13 +17167,17 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
1716817167
const DataLayout &DL = II->getModule()->getDataLayout();
1716917168

1717017169
bool UseScalable;
17171-
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
17170+
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable)) {
17171+
DeadInsts.clear();
1717217172
return false;
17173+
}
1717317174

1717417175
// TODO: Add support for using SVE instructions with fixed types later, using
1717517176
// the code from lowerInterleavedStore to obtain the correct container type.
17176-
if (UseScalable && !VTy->isScalableTy())
17177+
if (UseScalable && !VTy->isScalableTy()) {
17178+
DeadInsts.clear();
1717717179
return false;
17180+
}
1717817181

1717917182
unsigned NumStores = getNumInterleavedAccesses(VTy, DL, UseScalable);
1718017183

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -704,10 +704,12 @@ class AArch64TargetLowering : public TargetLowering {
704704
unsigned Factor) const override;
705705

706706
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
707-
LoadInst *LI) const override;
707+
LoadInst *LI,
708+
SmallVectorImpl<Instruction *> &DeadInsts) const override;
708709

709710
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
710-
StoreInst *SI) const override;
711+
StoreInst *SI,
712+
SmallVectorImpl<Instruction *> &DeadInsts) const override;
711713

712714
bool isLegalAddImmediate(int64_t) const override;
713715
bool isLegalAddScalableImmediate(int64_t) const override;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21776,7 +21776,8 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
2177621776
}
2177721777

2177821778
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
21779-
LoadInst *LI) const {
21779+
LoadInst *LI,
21780+
SmallVectorImpl<Instruction *> &DeadInsts) const {
2178021781
assert(LI->isSimple());
2178121782
IRBuilder<> Builder(LI);
2178221783

@@ -21826,7 +21827,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
2182621827
}
2182721828

2182821829
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
21829-
StoreInst *SI) const {
21830+
StoreInst *SI,
21831+
SmallVectorImpl<Instruction *> &DeadInstructions) const {
2183021832
assert(SI->isSimple());
2183121833
IRBuilder<> Builder(SI);
2183221834

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -877,10 +877,12 @@ class RISCVTargetLowering : public TargetLowering {
877877
unsigned Factor) const override;
878878

879879
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
880-
LoadInst *LI) const override;
880+
LoadInst *LI,
881+
SmallVectorImpl<Instruction *> &DeadInsts) const override;
881882

882883
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
883-
StoreInst *SI) const override;
884+
StoreInst *SI,
885+
SmallVectorImpl<Instruction *> &DeadInsts) const override;
884886

885887
bool supportKCFIBundles() const override { return true; }
886888

0 commit comments

Comments
 (0)