Skip to content

Commit 393ec7f

Browse files
committed
[IA]: Construct (de)interleave4 out of (de)interleave2
- InterleavedAccess pass is updated to spot load/store (de)interleave4 like sequences, and emit equivalent sve.ld4 or sve.st4 intrinsics through targets that support SV. - Tests are added for targets that support SV. Change-Id: I76ef31080ddd72b182c1a3b1752a6178dc78ea84
1 parent 4547f27 commit 393ec7f

File tree

8 files changed

+294
-28
lines changed

8 files changed

+294
-28
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
#include <cstdint>
5757
#include <iterator>
5858
#include <map>
59+
#include <queue>
60+
#include <stack>
5961
#include <string>
6062
#include <utility>
6163
#include <vector>
@@ -3158,6 +3160,7 @@ class TargetLoweringBase {
31583160
/// \p DI is the deinterleave intrinsic.
31593161
/// \p LI is the accompanying load instruction
31603162
virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
3163+
SmallVector<Value *> &LeafNodes,
31613164
LoadInst *LI) const {
31623165
return false;
31633166
}
@@ -3169,6 +3172,7 @@ class TargetLoweringBase {
31693172
/// \p II is the interleave intrinsic.
31703173
/// \p SI is the accompanying store instruction
31713174
virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
3175+
SmallVector<Value *> &LeafNodes,
31723176
StoreInst *SI) const {
31733177
return false;
31743178
}

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#include "llvm/Target/TargetMachine.h"
7171
#include "llvm/Transforms/Utils/Local.h"
7272
#include <cassert>
73+
#include <queue>
7374
#include <utility>
7475

7576
using namespace llvm;
@@ -488,12 +489,57 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
488489

489490
LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
490491

492+
std::stack<IntrinsicInst *> DeinterleaveTreeQueue;
493+
SmallVector<Value *> TempLeafNodes, LeafNodes;
494+
std::map<IntrinsicInst *, bool> mp;
495+
SmallVector<Instruction *> TempDeadInsts;
496+
497+
DeinterleaveTreeQueue.push(DI);
498+
while (!DeinterleaveTreeQueue.empty()) {
499+
auto CurrentDI = DeinterleaveTreeQueue.top();
500+
DeinterleaveTreeQueue.pop();
501+
TempDeadInsts.push_back(CurrentDI);
502+
// iterate over extract users of deinterleave
503+
for (auto UserExtract : CurrentDI->users()) {
504+
Instruction *Extract = dyn_cast<Instruction>(UserExtract);
505+
if (!Extract || Extract->getOpcode() != Instruction::ExtractValue)
506+
continue;
507+
bool IsLeaf = true;
508+
// iterate over deinterleave users of extract
509+
for (auto UserDI : UserExtract->users()) {
510+
IntrinsicInst *Child_DI = dyn_cast<IntrinsicInst>(UserDI);
511+
if (!Child_DI || Child_DI->getIntrinsicID() !=
512+
Intrinsic::experimental_vector_deinterleave2)
513+
continue;
514+
IsLeaf = false;
515+
if (mp.count(Child_DI) == 0) {
516+
DeinterleaveTreeQueue.push(Child_DI);
517+
}
518+
continue;
519+
}
520+
if (IsLeaf) {
521+
TempLeafNodes.push_back(UserExtract);
522+
TempDeadInsts.push_back(Extract);
523+
} else {
524+
TempDeadInsts.push_back(Extract);
525+
}
526+
}
527+
}
528+
// sort the deinterleaved nodes in the order that
529+
// they will be extracted from the target-specific intrinsic.
530+
for (unsigned I = 1; I < TempLeafNodes.size(); I += 2)
531+
LeafNodes.push_back(TempLeafNodes[I]);
532+
533+
for (unsigned I = 0; I < TempLeafNodes.size(); I += 2)
534+
LeafNodes.push_back(TempLeafNodes[I]);
535+
491536
// Try and match this with target specific intrinsics.
492-
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
537+
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LeafNodes, LI))
493538
return false;
494539

495540
// We now have a target-specific load, so delete the old one.
496-
DeadInsts.push_back(DI);
541+
DeadInsts.insert(DeadInsts.end(), TempDeadInsts.rbegin(),
542+
TempDeadInsts.rend());
497543
DeadInsts.push_back(LI);
498544
return true;
499545
}
@@ -509,14 +555,38 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
509555
return false;
510556

511557
LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
512-
558+
std::queue<IntrinsicInst *> IeinterleaveTreeQueue;
559+
SmallVector<Value *> TempLeafNodes, LeafNodes;
560+
SmallVector<Instruction *> TempDeadInsts;
561+
562+
IeinterleaveTreeQueue.push(II);
563+
while (!IeinterleaveTreeQueue.empty()) {
564+
auto node = IeinterleaveTreeQueue.front();
565+
TempDeadInsts.push_back(node);
566+
IeinterleaveTreeQueue.pop();
567+
for (unsigned i = 0; i < 2; i++) {
568+
auto op = node->getOperand(i);
569+
if (auto CurrentII = dyn_cast<IntrinsicInst>(op)) {
570+
if (CurrentII->getIntrinsicID() !=
571+
Intrinsic::experimental_vector_interleave2)
572+
continue;
573+
IeinterleaveTreeQueue.push(CurrentII);
574+
continue;
575+
}
576+
TempLeafNodes.push_back(op);
577+
}
578+
}
579+
for (unsigned I = 0; I < TempLeafNodes.size(); I += 2)
580+
LeafNodes.push_back(TempLeafNodes[I]);
581+
for (unsigned I = 1; I < TempLeafNodes.size(); I += 2)
582+
LeafNodes.push_back(TempLeafNodes[I]);
513583
// Try and match this with target specific intrinsics.
514-
if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
584+
if (!TLI->lowerInterleaveIntrinsicToStore(II, LeafNodes, SI))
515585
return false;
516586

517587
// We now have a target-specific store, so delete the old one.
518588
DeadInsts.push_back(SI);
519-
DeadInsts.push_back(II);
589+
DeadInsts.insert(DeadInsts.end(), TempDeadInsts.begin(), TempDeadInsts.end());
520590
return true;
521591
}
522592

@@ -537,7 +607,8 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
537607
// with a factor of 2.
538608
if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2)
539609
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
540-
if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
610+
611+
else if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
541612
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
542613
}
543614
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16586,15 +16586,16 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
1658616586
}
1658716587

1658816588
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
16589-
IntrinsicInst *DI, LoadInst *LI) const {
16589+
IntrinsicInst *DI, SmallVector<Value *> &LeafNodes, LoadInst *LI) const {
1659016590
// Only deinterleave2 supported at present.
1659116591
if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
1659216592
return false;
1659316593

16594-
// Only a factor of 2 supported at present.
16595-
const unsigned Factor = 2;
16594+
const unsigned Factor = std::max(2, (int)LeafNodes.size());
1659616595

16597-
VectorType *VTy = cast<VectorType>(DI->getType()->getContainedType(0));
16596+
VectorType *VTy = (LeafNodes.size() > 0)
16597+
? cast<VectorType>(LeafNodes.front()->getType())
16598+
: cast<VectorType>(DI->getType()->getContainedType(0));
1659816599
const DataLayout &DL = DI->getModule()->getDataLayout();
1659916600
bool UseScalable;
1660016601
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
@@ -16650,9 +16651,19 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1665016651
Result = Builder.CreateInsertValue(Result, Left, 0);
1665116652
Result = Builder.CreateInsertValue(Result, Right, 1);
1665216653
} else {
16653-
if (UseScalable)
16654+
if (UseScalable) {
1665416655
Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN");
16655-
else
16656+
if (Factor == 2) {
16657+
DI->replaceAllUsesWith(Result);
16658+
return true;
16659+
}
16660+
for (unsigned I = 0; I < LeafNodes.size(); I++) {
16661+
llvm::Value *CurrentExtract = LeafNodes[I];
16662+
Value *Newextrct = Builder.CreateExtractValue(Result, I);
16663+
CurrentExtract->replaceAllUsesWith(Newextrct);
16664+
}
16665+
return true;
16666+
} else
1665616667
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
1665716668
}
1665816669

@@ -16661,15 +16672,15 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1666116672
}
1666216673

1666316674
bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
16664-
IntrinsicInst *II, StoreInst *SI) const {
16675+
IntrinsicInst *II, SmallVector<Value *> &LeafNodes, StoreInst *SI) const {
1666516676
// Only interleave2 supported at present.
1666616677
if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
1666716678
return false;
1666816679

16669-
// Only a factor of 2 supported at present.
16670-
const unsigned Factor = 2;
16680+
// leaf nodes are the nodes that will be interleaved
16681+
const unsigned Factor = LeafNodes.size();
1667116682

16672-
VectorType *VTy = cast<VectorType>(II->getOperand(0)->getType());
16683+
VectorType *VTy = cast<VectorType>(LeafNodes.front()->getType());
1667316684
const DataLayout &DL = II->getModule()->getDataLayout();
1667416685
bool UseScalable;
1667516686
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
@@ -16714,9 +16725,12 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
1671416725
R = Builder.CreateExtractVector(StTy, II->getOperand(1), Idx);
1671516726
}
1671616727

16717-
if (UseScalable)
16718-
Builder.CreateCall(StNFunc, {L, R, Pred, Address});
16719-
else
16728+
if (UseScalable) {
16729+
SmallVector<Value *> Args(LeafNodes);
16730+
Args.push_back(Pred);
16731+
Args.push_back(Address);
16732+
Builder.CreateCall(StNFunc, Args);
16733+
} else
1672016734
Builder.CreateCall(StNFunc, {L, R, Address});
1672116735
}
1672216736

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,9 +696,11 @@ class AArch64TargetLowering : public TargetLowering {
696696
unsigned Factor) const override;
697697

698698
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
699+
SmallVector<Value *> &LeafNodes,
699700
LoadInst *LI) const override;
700701

701702
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
703+
SmallVector<Value *> &LeafNodes,
702704
StoreInst *SI) const override;
703705

704706
bool isLegalAddImmediate(int64_t) const override;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21550,19 +21550,22 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
2155021550
return true;
2155121551
}
2155221552

21553-
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
21554-
LoadInst *LI) const {
21553+
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
21554+
IntrinsicInst *DI, SmallVector<Value *> &LeafNodes, LoadInst *LI) const {
2155521555
assert(LI->isSimple());
2155621556
IRBuilder<> Builder(LI);
2155721557

2155821558
// Only deinterleave2 supported at present.
2155921559
if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
2156021560
return false;
2156121561

21562-
unsigned Factor = 2;
21562+
unsigned Factor = std::max(2, (int)LeafNodes.size());
2156321563

2156421564
VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21565-
VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21565+
VectorType *ResVTy =
21566+
(LeafNodes.size() > 0)
21567+
? cast<VectorType>(LeafNodes.front()->getType())
21568+
: cast<VectorType>(DI->getType()->getContainedType(0));
2156621569

2156721570
if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
2156821571
LI->getPointerAddressSpace(),
@@ -21590,6 +21593,19 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
2159021593
{ResVTy, XLenTy});
2159121594
VL = Constant::getAllOnesValue(XLenTy);
2159221595
Ops.append(Factor, PoisonValue::get(ResVTy));
21596+
Ops.append({LI->getPointerOperand(), VL});
21597+
Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21598+
//-----------
21599+
if (Factor == 2) {
21600+
DI->replaceAllUsesWith(Vlseg);
21601+
return true;
21602+
}
21603+
for (unsigned I = 0; I < LeafNodes.size(); I++) {
21604+
auto CurrentExtract = LeafNodes[I];
21605+
Value *NewExtract = Builder.CreateExtractValue(Vlseg, I);
21606+
CurrentExtract->replaceAllUsesWith(NewExtract);
21607+
}
21608+
return true;
2159321609
}
2159421610

2159521611
Ops.append({LI->getPointerOperand(), VL});
@@ -21600,19 +21616,19 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
2160021616
return true;
2160121617
}
2160221618

21603-
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
21604-
StoreInst *SI) const {
21619+
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
21620+
IntrinsicInst *II, SmallVector<Value *> &LeafNodes, StoreInst *SI) const {
2160521621
assert(SI->isSimple());
2160621622
IRBuilder<> Builder(SI);
2160721623

2160821624
// Only interleave2 supported at present.
2160921625
if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
2161021626
return false;
2161121627

21612-
unsigned Factor = 2;
21628+
unsigned Factor = LeafNodes.size();
2161321629

2161421630
VectorType *VTy = cast<VectorType>(II->getType());
21615-
VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21631+
VectorType *InVTy = cast<VectorType>(LeafNodes.front()->getType());
2161621632

2161721633
if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
2161821634
SI->getPointerAddressSpace(),
@@ -21638,6 +21654,11 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
2163821654
VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
2163921655
{InVTy, XLenTy});
2164021656
VL = Constant::getAllOnesValue(XLenTy);
21657+
SmallVector<Value *> Args(LeafNodes);
21658+
Args.push_back(SI->getPointerOperand());
21659+
Args.push_back(VL);
21660+
Builder.CreateCall(VssegNFunc, Args);
21661+
return true;
2164121662
}
2164221663

2164321664
Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,10 +875,12 @@ class RISCVTargetLowering : public TargetLowering {
875875
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
876876
unsigned Factor) const override;
877877

878-
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
878+
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
879+
SmallVector<Value *> &LeafNodes,
879880
LoadInst *LI) const override;
880881

881882
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
883+
SmallVector<Value *> &LeafNodes,
882884
StoreInst *SI) const override;
883885

884886
bool supportKCFIBundles() const override { return true; }

0 commit comments

Comments
 (0)