Skip to content

Commit 7c19cad

Browse files
committed
[IA]: Construct (de)interleave4 out of (de)interleave2
- InterleavedAccess pass is updated to spot load/store (de)interleave4 like sequences, and emit equivalent sve.ld4 or sve.st4 intrinsics through targets that support SV. - Tests are added for targets that support SV. Change-Id: I76ef31080ddd72b182c1a3b1752a6178dc78ea84
1 parent 8e0a4a8 commit 7c19cad

File tree

8 files changed

+294
-28
lines changed

8 files changed

+294
-28
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
#include <cstdint>
5757
#include <iterator>
5858
#include <map>
59+
#include <queue>
60+
#include <stack>
5961
#include <string>
6062
#include <utility>
6163
#include <vector>
@@ -3145,6 +3147,7 @@ class TargetLoweringBase {
31453147
/// \p DI is the deinterleave intrinsic.
31463148
/// \p LI is the accompanying load instruction
31473149
virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
3150+
SmallVector<Value *> &LeafNodes,
31483151
LoadInst *LI) const {
31493152
return false;
31503153
}
@@ -3156,6 +3159,7 @@ class TargetLoweringBase {
31563159
/// \p II is the interleave intrinsic.
31573160
/// \p SI is the accompanying store instruction
31583161
virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
3162+
SmallVector<Value *> &LeafNodes,
31593163
StoreInst *SI) const {
31603164
return false;
31613165
}

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#include "llvm/Target/TargetMachine.h"
7171
#include "llvm/Transforms/Utils/Local.h"
7272
#include <cassert>
73+
#include <queue>
7374
#include <utility>
7475

7576
using namespace llvm;
@@ -510,12 +511,57 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
510511

511512
LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
512513

514+
std::stack<IntrinsicInst *> DeinterleaveTreeQueue;
515+
SmallVector<Value *> TempLeafNodes, LeafNodes;
516+
std::map<IntrinsicInst *, bool> mp;
517+
SmallVector<Instruction *> TempDeadInsts;
518+
519+
DeinterleaveTreeQueue.push(DI);
520+
while (!DeinterleaveTreeQueue.empty()) {
521+
auto CurrentDI = DeinterleaveTreeQueue.top();
522+
DeinterleaveTreeQueue.pop();
523+
TempDeadInsts.push_back(CurrentDI);
524+
// iterate over extract users of deinterleave
525+
for (auto UserExtract : CurrentDI->users()) {
526+
Instruction *Extract = dyn_cast<Instruction>(UserExtract);
527+
if (!Extract || Extract->getOpcode() != Instruction::ExtractValue)
528+
continue;
529+
bool IsLeaf = true;
530+
// iterate over deinterleave users of extract
531+
for (auto UserDI : UserExtract->users()) {
532+
IntrinsicInst *Child_DI = dyn_cast<IntrinsicInst>(UserDI);
533+
if (!Child_DI || Child_DI->getIntrinsicID() !=
534+
Intrinsic::experimental_vector_deinterleave2)
535+
continue;
536+
IsLeaf = false;
537+
if (mp.count(Child_DI) == 0) {
538+
DeinterleaveTreeQueue.push(Child_DI);
539+
}
540+
continue;
541+
}
542+
if (IsLeaf) {
543+
TempLeafNodes.push_back(UserExtract);
544+
TempDeadInsts.push_back(Extract);
545+
} else {
546+
TempDeadInsts.push_back(Extract);
547+
}
548+
}
549+
}
550+
// sort the deinterleaved nodes in the order that
551+
// they will be extracted from the target-specific intrinsic.
552+
for (unsigned I = 1; I < TempLeafNodes.size(); I += 2)
553+
LeafNodes.push_back(TempLeafNodes[I]);
554+
555+
for (unsigned I = 0; I < TempLeafNodes.size(); I += 2)
556+
LeafNodes.push_back(TempLeafNodes[I]);
557+
513558
// Try and match this with target specific intrinsics.
514-
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
559+
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LeafNodes, LI))
515560
return false;
516561

517562
// We now have a target-specific load, so delete the old one.
518-
DeadInsts.push_back(DI);
563+
DeadInsts.insert(DeadInsts.end(), TempDeadInsts.rbegin(),
564+
TempDeadInsts.rend());
519565
DeadInsts.push_back(LI);
520566
return true;
521567
}
@@ -531,14 +577,38 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
531577
return false;
532578

533579
LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
534-
580+
std::queue<IntrinsicInst *> IeinterleaveTreeQueue;
581+
SmallVector<Value *> TempLeafNodes, LeafNodes;
582+
SmallVector<Instruction *> TempDeadInsts;
583+
584+
IeinterleaveTreeQueue.push(II);
585+
while (!IeinterleaveTreeQueue.empty()) {
586+
auto node = IeinterleaveTreeQueue.front();
587+
TempDeadInsts.push_back(node);
588+
IeinterleaveTreeQueue.pop();
589+
for (unsigned i = 0; i < 2; i++) {
590+
auto op = node->getOperand(i);
591+
if (auto CurrentII = dyn_cast<IntrinsicInst>(op)) {
592+
if (CurrentII->getIntrinsicID() !=
593+
Intrinsic::experimental_vector_interleave2)
594+
continue;
595+
IeinterleaveTreeQueue.push(CurrentII);
596+
continue;
597+
}
598+
TempLeafNodes.push_back(op);
599+
}
600+
}
601+
for (unsigned I = 0; I < TempLeafNodes.size(); I += 2)
602+
LeafNodes.push_back(TempLeafNodes[I]);
603+
for (unsigned I = 1; I < TempLeafNodes.size(); I += 2)
604+
LeafNodes.push_back(TempLeafNodes[I]);
535605
// Try and match this with target specific intrinsics.
536-
if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
606+
if (!TLI->lowerInterleaveIntrinsicToStore(II, LeafNodes, SI))
537607
return false;
538608

539609
// We now have a target-specific store, so delete the old one.
540610
DeadInsts.push_back(SI);
541-
DeadInsts.push_back(II);
611+
DeadInsts.insert(DeadInsts.end(), TempDeadInsts.begin(), TempDeadInsts.end());
542612
return true;
543613
}
544614

@@ -559,7 +629,8 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
559629
// with a factor of 2.
560630
if (II->getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2)
561631
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
562-
if (II->getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
632+
else if (II->getIntrinsicID() ==
633+
Intrinsic::experimental_vector_interleave2)
563634
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
564635
}
565636
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16345,15 +16345,16 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
1634516345
}
1634616346

1634716347
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
16348-
IntrinsicInst *DI, LoadInst *LI) const {
16348+
IntrinsicInst *DI, SmallVector<Value *> &LeafNodes, LoadInst *LI) const {
1634916349
// Only deinterleave2 supported at present.
1635016350
if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
1635116351
return false;
1635216352

16353-
// Only a factor of 2 supported at present.
16354-
const unsigned Factor = 2;
16353+
const unsigned Factor = std::max(2, (int)LeafNodes.size());
1635516354

16356-
VectorType *VTy = cast<VectorType>(DI->getType()->getContainedType(0));
16355+
VectorType *VTy = (LeafNodes.size() > 0)
16356+
? cast<VectorType>(LeafNodes.front()->getType())
16357+
: cast<VectorType>(DI->getType()->getContainedType(0));
1635716358
const DataLayout &DL = DI->getModule()->getDataLayout();
1635816359
bool UseScalable;
1635916360
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
@@ -16409,9 +16410,19 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1640916410
Result = Builder.CreateInsertValue(Result, Left, 0);
1641016411
Result = Builder.CreateInsertValue(Result, Right, 1);
1641116412
} else {
16412-
if (UseScalable)
16413+
if (UseScalable) {
1641316414
Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN");
16414-
else
16415+
if (Factor == 2) {
16416+
DI->replaceAllUsesWith(Result);
16417+
return true;
16418+
}
16419+
for (unsigned I = 0; I < LeafNodes.size(); I++) {
16420+
llvm::Value *CurrentExtract = LeafNodes[I];
16421+
Value *Newextrct = Builder.CreateExtractValue(Result, I);
16422+
CurrentExtract->replaceAllUsesWith(Newextrct);
16423+
}
16424+
return true;
16425+
} else
1641516426
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
1641616427
}
1641716428

@@ -16420,15 +16431,15 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1642016431
}
1642116432

1642216433
bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
16423-
IntrinsicInst *II, StoreInst *SI) const {
16434+
IntrinsicInst *II, SmallVector<Value *> &LeafNodes, StoreInst *SI) const {
1642416435
// Only interleave2 supported at present.
1642516436
if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
1642616437
return false;
1642716438

16428-
// Only a factor of 2 supported at present.
16429-
const unsigned Factor = 2;
16439+
// leaf nodes are the nodes that will be interleaved
16440+
const unsigned Factor = LeafNodes.size();
1643016441

16431-
VectorType *VTy = cast<VectorType>(II->getOperand(0)->getType());
16442+
VectorType *VTy = cast<VectorType>(LeafNodes.front()->getType());
1643216443
const DataLayout &DL = II->getModule()->getDataLayout();
1643316444
bool UseScalable;
1643416445
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
@@ -16473,9 +16484,12 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
1647316484
R = Builder.CreateExtractVector(StTy, II->getOperand(1), Idx);
1647416485
}
1647516486

16476-
if (UseScalable)
16477-
Builder.CreateCall(StNFunc, {L, R, Pred, Address});
16478-
else
16487+
if (UseScalable) {
16488+
SmallVector<Value *> Args(LeafNodes);
16489+
Args.push_back(Pred);
16490+
Args.push_back(Address);
16491+
Builder.CreateCall(StNFunc, Args);
16492+
} else
1647916493
Builder.CreateCall(StNFunc, {L, R, Address});
1648016494
}
1648116495

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,9 +683,11 @@ class AArch64TargetLowering : public TargetLowering {
683683
unsigned Factor) const override;
684684

685685
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
686+
SmallVector<Value *> &LeafNodes,
686687
LoadInst *LI) const override;
687688

688689
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
690+
SmallVector<Value *> &LeafNodes,
689691
StoreInst *SI) const override;
690692

691693
bool isLegalAddImmediate(int64_t) const override;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21024,19 +21024,22 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
2102421024
return true;
2102521025
}
2102621026

21027-
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
21028-
LoadInst *LI) const {
21027+
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
21028+
IntrinsicInst *DI, SmallVector<Value *> &LeafNodes, LoadInst *LI) const {
2102921029
assert(LI->isSimple());
2103021030
IRBuilder<> Builder(LI);
2103121031

2103221032
// Only deinterleave2 supported at present.
2103321033
if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
2103421034
return false;
2103521035

21036-
unsigned Factor = 2;
21036+
unsigned Factor = std::max(2, (int)LeafNodes.size());
2103721037

2103821038
VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21039-
VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21039+
VectorType *ResVTy =
21040+
(LeafNodes.size() > 0)
21041+
? cast<VectorType>(LeafNodes.front()->getType())
21042+
: cast<VectorType>(DI->getType()->getContainedType(0));
2104021043

2104121044
if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
2104221045
LI->getPointerAddressSpace(),
@@ -21064,6 +21067,19 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
2106421067
{ResVTy, XLenTy});
2106521068
VL = Constant::getAllOnesValue(XLenTy);
2106621069
Ops.append(Factor, PoisonValue::get(ResVTy));
21070+
Ops.append({LI->getPointerOperand(), VL});
21071+
Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21072+
//-----------
21073+
if (Factor == 2) {
21074+
DI->replaceAllUsesWith(Vlseg);
21075+
return true;
21076+
}
21077+
for (unsigned I = 0; I < LeafNodes.size(); I++) {
21078+
auto CurrentExtract = LeafNodes[I];
21079+
Value *NewExtract = Builder.CreateExtractValue(Vlseg, I);
21080+
CurrentExtract->replaceAllUsesWith(NewExtract);
21081+
}
21082+
return true;
2106721083
}
2106821084

2106921085
Ops.append({LI->getPointerOperand(), VL});
@@ -21074,19 +21090,19 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
2107421090
return true;
2107521091
}
2107621092

21077-
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
21078-
StoreInst *SI) const {
21093+
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
21094+
IntrinsicInst *II, SmallVector<Value *> &LeafNodes, StoreInst *SI) const {
2107921095
assert(SI->isSimple());
2108021096
IRBuilder<> Builder(SI);
2108121097

2108221098
// Only interleave2 supported at present.
2108321099
if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
2108421100
return false;
2108521101

21086-
unsigned Factor = 2;
21102+
unsigned Factor = LeafNodes.size();
2108721103

2108821104
VectorType *VTy = cast<VectorType>(II->getType());
21089-
VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21105+
VectorType *InVTy = cast<VectorType>(LeafNodes.front()->getType());
2109021106

2109121107
if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
2109221108
SI->getPointerAddressSpace(),
@@ -21112,6 +21128,11 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
2111221128
VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
2111321129
{InVTy, XLenTy});
2111421130
VL = Constant::getAllOnesValue(XLenTy);
21131+
SmallVector<Value *> Args(LeafNodes);
21132+
Args.push_back(SI->getPointerOperand());
21133+
Args.push_back(VL);
21134+
Builder.CreateCall(VssegNFunc, Args);
21135+
return true;
2111521136
}
2111621137

2111721138
Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -855,10 +855,12 @@ class RISCVTargetLowering : public TargetLowering {
855855
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
856856
unsigned Factor) const override;
857857

858-
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
858+
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
859+
SmallVector<Value *> &LeafNodes,
859860
LoadInst *LI) const override;
860861

861862
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
863+
SmallVector<Value *> &LeafNodes,
862864
StoreInst *SI) const override;
863865

864866
bool supportKCFIBundles() const override { return true; }

0 commit comments

Comments
 (0)