Skip to content

Commit 571ecd3

Browse files
committed
[IA]: Construct (de)interleave4 out of (de)interleave2
- InterleavedAccess pass is updated to spot load/store (de)interleave4 like sequences, and emit equivalent sve.ld4 or sve.st4 intrinsics through targets that support SV. - Tests are added for targets that support SV. Change-Id: I76ef31080ddd72b182c1a3b1752a6178dc78ea84
1 parent 08a6494 commit 571ecd3

File tree

8 files changed

+296
-30
lines changed

8 files changed

+296
-30
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
#include <cstdint>
5858
#include <iterator>
5959
#include <map>
60+
#include <queue>
61+
#include <stack>
6062
#include <string>
6163
#include <utility>
6264
#include <vector>
@@ -3156,6 +3158,7 @@ class TargetLoweringBase {
31563158
/// \p DI is the deinterleave intrinsic.
31573159
/// \p LI is the accompanying load instruction
31583160
virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
3161+
SmallVector<Value *> &LeafNodes,
31593162
LoadInst *LI) const {
31603163
return false;
31613164
}
@@ -3167,6 +3170,7 @@ class TargetLoweringBase {
31673170
/// \p II is the interleave intrinsic.
31683171
/// \p SI is the accompanying store instruction
31693172
virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
3173+
SmallVector<Value *> &LeafNodes,
31703174
StoreInst *SI) const {
31713175
return false;
31723176
}

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#include "llvm/Target/TargetMachine.h"
7171
#include "llvm/Transforms/Utils/Local.h"
7272
#include <cassert>
73+
#include <queue>
7374
#include <utility>
7475

7576
using namespace llvm;
@@ -488,12 +489,57 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
488489

489490
LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
490491

492+
std::stack<IntrinsicInst *> DeinterleaveTreeQueue;
493+
SmallVector<Value *> TempLeafNodes, LeafNodes;
494+
std::map<IntrinsicInst *, bool> mp;
495+
SmallVector<Instruction *> TempDeadInsts;
496+
497+
DeinterleaveTreeQueue.push(DI);
498+
while (!DeinterleaveTreeQueue.empty()) {
499+
auto CurrentDI = DeinterleaveTreeQueue.top();
500+
DeinterleaveTreeQueue.pop();
501+
TempDeadInsts.push_back(CurrentDI);
502+
// iterate over extract users of deinterleave
503+
for (auto UserExtract : CurrentDI->users()) {
504+
Instruction *Extract = dyn_cast<Instruction>(UserExtract);
505+
if (!Extract || Extract->getOpcode() != Instruction::ExtractValue)
506+
continue;
507+
bool IsLeaf = true;
508+
// iterate over deinterleave users of extract
509+
for (auto UserDI : UserExtract->users()) {
510+
IntrinsicInst *Child_DI = dyn_cast<IntrinsicInst>(UserDI);
511+
if (!Child_DI || Child_DI->getIntrinsicID() !=
512+
Intrinsic::vector_deinterleave2)
513+
continue;
514+
IsLeaf = false;
515+
if (mp.count(Child_DI) == 0) {
516+
DeinterleaveTreeQueue.push(Child_DI);
517+
}
518+
continue;
519+
}
520+
if (IsLeaf) {
521+
TempLeafNodes.push_back(UserExtract);
522+
TempDeadInsts.push_back(Extract);
523+
} else {
524+
TempDeadInsts.push_back(Extract);
525+
}
526+
}
527+
}
528+
// sort the deinterleaved nodes in the order that
529+
// they will be extracted from the target-specific intrinsic.
530+
for (unsigned I = 1; I < TempLeafNodes.size(); I += 2)
531+
LeafNodes.push_back(TempLeafNodes[I]);
532+
533+
for (unsigned I = 0; I < TempLeafNodes.size(); I += 2)
534+
LeafNodes.push_back(TempLeafNodes[I]);
535+
491536
// Try and match this with target specific intrinsics.
492-
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
537+
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LeafNodes, LI))
493538
return false;
494539

495540
// We now have a target-specific load, so delete the old one.
496-
DeadInsts.push_back(DI);
541+
DeadInsts.insert(DeadInsts.end(), TempDeadInsts.rbegin(),
542+
TempDeadInsts.rend());
497543
DeadInsts.push_back(LI);
498544
return true;
499545
}
@@ -509,14 +555,38 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
509555
return false;
510556

511557
LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
512-
558+
std::queue<IntrinsicInst *> IeinterleaveTreeQueue;
559+
SmallVector<Value *> TempLeafNodes, LeafNodes;
560+
SmallVector<Instruction *> TempDeadInsts;
561+
562+
IeinterleaveTreeQueue.push(II);
563+
while (!IeinterleaveTreeQueue.empty()) {
564+
auto node = IeinterleaveTreeQueue.front();
565+
TempDeadInsts.push_back(node);
566+
IeinterleaveTreeQueue.pop();
567+
for (unsigned i = 0; i < 2; i++) {
568+
auto op = node->getOperand(i);
569+
if (auto CurrentII = dyn_cast<IntrinsicInst>(op)) {
570+
if (CurrentII->getIntrinsicID() !=
571+
Intrinsic::vector_interleave2)
572+
continue;
573+
IeinterleaveTreeQueue.push(CurrentII);
574+
continue;
575+
}
576+
TempLeafNodes.push_back(op);
577+
}
578+
}
579+
for (unsigned I = 0; I < TempLeafNodes.size(); I += 2)
580+
LeafNodes.push_back(TempLeafNodes[I]);
581+
for (unsigned I = 1; I < TempLeafNodes.size(); I += 2)
582+
LeafNodes.push_back(TempLeafNodes[I]);
513583
// Try and match this with target specific intrinsics.
514-
if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
584+
if (!TLI->lowerInterleaveIntrinsicToStore(II, LeafNodes, SI))
515585
return false;
516586

517587
// We now have a target-specific store, so delete the old one.
518588
DeadInsts.push_back(SI);
519-
DeadInsts.push_back(II);
589+
DeadInsts.insert(DeadInsts.end(), TempDeadInsts.begin(), TempDeadInsts.end());
520590
return true;
521591
}
522592

@@ -537,7 +607,8 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
537607
// with a factor of 2.
538608
if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2)
539609
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
540-
if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
610+
611+
else if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
541612
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
542613
}
543614
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16907,16 +16907,17 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
1690716907
}
1690816908

1690916909
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
16910-
IntrinsicInst *DI, LoadInst *LI) const {
16910+
IntrinsicInst *DI, SmallVector<Value *> &LeafNodes, LoadInst *LI) const {
1691116911
// Only deinterleave2 supported at present.
1691216912
if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
1691316913
return false;
1691416914

16915-
// Only a factor of 2 supported at present.
16916-
const unsigned Factor = 2;
16915+
const unsigned Factor = std::max(2, (int)LeafNodes.size());
1691716916

16918-
VectorType *VTy = cast<VectorType>(DI->getType()->getContainedType(0));
16919-
const DataLayout &DL = DI->getDataLayout();
16917+
VectorType *VTy = (LeafNodes.size() > 0)
16918+
? cast<VectorType>(LeafNodes.front()->getType())
16919+
: cast<VectorType>(DI->getType()->getContainedType(0));
16920+
const DataLayout &DL = DI->getModule()->getDataLayout();
1692016921
bool UseScalable;
1692116922
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
1692216923
return false;
@@ -16971,9 +16972,19 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1697116972
Result = Builder.CreateInsertValue(Result, Left, 0);
1697216973
Result = Builder.CreateInsertValue(Result, Right, 1);
1697316974
} else {
16974-
if (UseScalable)
16975+
if (UseScalable) {
1697516976
Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN");
16976-
else
16977+
if (Factor == 2) {
16978+
DI->replaceAllUsesWith(Result);
16979+
return true;
16980+
}
16981+
for (unsigned I = 0; I < LeafNodes.size(); I++) {
16982+
llvm::Value *CurrentExtract = LeafNodes[I];
16983+
Value *Newextrct = Builder.CreateExtractValue(Result, I);
16984+
CurrentExtract->replaceAllUsesWith(Newextrct);
16985+
}
16986+
return true;
16987+
} else
1697716988
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
1697816989
}
1697916990

@@ -16982,16 +16993,16 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1698216993
}
1698316994

1698416995
bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
16985-
IntrinsicInst *II, StoreInst *SI) const {
16996+
IntrinsicInst *II, SmallVector<Value *> &LeafNodes, StoreInst *SI) const {
1698616997
// Only interleave2 supported at present.
1698716998
if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
1698816999
return false;
1698917000

16990-
// Only a factor of 2 supported at present.
16991-
const unsigned Factor = 2;
17001+
// leaf nodes are the nodes that will be interleaved
17002+
const unsigned Factor = LeafNodes.size();
1699217003

16993-
VectorType *VTy = cast<VectorType>(II->getOperand(0)->getType());
16994-
const DataLayout &DL = II->getDataLayout();
17004+
VectorType *VTy = cast<VectorType>(LeafNodes.front()->getType());
17005+
const DataLayout &DL = II->getModule()->getDataLayout();
1699517006
bool UseScalable;
1699617007
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
1699717008
return false;
@@ -17035,9 +17046,12 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
1703517046
R = Builder.CreateExtractVector(StTy, II->getOperand(1), Idx);
1703617047
}
1703717048

17038-
if (UseScalable)
17039-
Builder.CreateCall(StNFunc, {L, R, Pred, Address});
17040-
else
17049+
if (UseScalable) {
17050+
SmallVector<Value *> Args(LeafNodes);
17051+
Args.push_back(Pred);
17052+
Args.push_back(Address);
17053+
Builder.CreateCall(StNFunc, Args);
17054+
} else
1704117055
Builder.CreateCall(StNFunc, {L, R, Address});
1704217056
}
1704317057

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,9 +704,11 @@ class AArch64TargetLowering : public TargetLowering {
704704
unsigned Factor) const override;
705705

706706
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
707+
SmallVector<Value *> &LeafNodes,
707708
LoadInst *LI) const override;
708709

709710
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
711+
SmallVector<Value *> &LeafNodes,
710712
StoreInst *SI) const override;
711713

712714
bool isLegalAddImmediate(int64_t) const override;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21775,19 +21775,22 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
2177521775
return true;
2177621776
}
2177721777

21778-
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
21779-
LoadInst *LI) const {
21778+
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
21779+
IntrinsicInst *DI, SmallVector<Value *> &LeafNodes, LoadInst *LI) const {
2178021780
assert(LI->isSimple());
2178121781
IRBuilder<> Builder(LI);
2178221782

2178321783
// Only deinterleave2 supported at present.
2178421784
if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
2178521785
return false;
2178621786

21787-
unsigned Factor = 2;
21787+
unsigned Factor = std::max(2, (int)LeafNodes.size());
2178821788

2178921789
VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21790-
VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21790+
VectorType *ResVTy =
21791+
(LeafNodes.size() > 0)
21792+
? cast<VectorType>(LeafNodes.front()->getType())
21793+
: cast<VectorType>(DI->getType()->getContainedType(0));
2179121794

2179221795
if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
2179321796
LI->getPointerAddressSpace(),
@@ -21815,6 +21818,19 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
2181521818
{ResVTy, XLenTy});
2181621819
VL = Constant::getAllOnesValue(XLenTy);
2181721820
Ops.append(Factor, PoisonValue::get(ResVTy));
21821+
Ops.append({LI->getPointerOperand(), VL});
21822+
Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21823+
//-----------
21824+
if (Factor == 2) {
21825+
DI->replaceAllUsesWith(Vlseg);
21826+
return true;
21827+
}
21828+
for (unsigned I = 0; I < LeafNodes.size(); I++) {
21829+
auto CurrentExtract = LeafNodes[I];
21830+
Value *NewExtract = Builder.CreateExtractValue(Vlseg, I);
21831+
CurrentExtract->replaceAllUsesWith(NewExtract);
21832+
}
21833+
return true;
2181821834
}
2181921835

2182021836
Ops.append({LI->getPointerOperand(), VL});
@@ -21825,19 +21841,19 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
2182521841
return true;
2182621842
}
2182721843

21828-
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
21829-
StoreInst *SI) const {
21844+
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
21845+
IntrinsicInst *II, SmallVector<Value *> &LeafNodes, StoreInst *SI) const {
2183021846
assert(SI->isSimple());
2183121847
IRBuilder<> Builder(SI);
2183221848

2183321849
// Only interleave2 supported at present.
2183421850
if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
2183521851
return false;
2183621852

21837-
unsigned Factor = 2;
21853+
unsigned Factor = LeafNodes.size();
2183821854

2183921855
VectorType *VTy = cast<VectorType>(II->getType());
21840-
VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21856+
VectorType *InVTy = cast<VectorType>(LeafNodes.front()->getType());
2184121857

2184221858
if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
2184321859
SI->getPointerAddressSpace(),
@@ -21863,6 +21879,11 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
2186321879
VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
2186421880
{InVTy, XLenTy});
2186521881
VL = Constant::getAllOnesValue(XLenTy);
21882+
SmallVector<Value *> Args(LeafNodes);
21883+
Args.push_back(SI->getPointerOperand());
21884+
Args.push_back(VL);
21885+
Builder.CreateCall(VssegNFunc, Args);
21886+
return true;
2186621887
}
2186721888

2186821889
Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -876,10 +876,12 @@ class RISCVTargetLowering : public TargetLowering {
876876
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
877877
unsigned Factor) const override;
878878

879-
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
879+
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
880+
SmallVector<Value *> &LeafNodes,
880881
LoadInst *LI) const override;
881882

882883
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
884+
SmallVector<Value *> &LeafNodes,
883885
StoreInst *SI) const override;
884886

885887
bool supportKCFIBundles() const override { return true; }

0 commit comments

Comments
 (0)