Skip to content

Commit 196e018

Browse files
committed
[IA]: Construct (de)interleave4 out of (de)interleave2
- InterleavedAccess pass is updated to spot load/store (de)interleave4 like sequences, and emit equivalent sve.ld4 or sve.st4 intrinsics through targets that support SV. - Tests are added for targets that support SV. Change-Id: I76ef31080ddd72b182c1a3b1752a6178dc78ea84
1 parent da7bc85 commit 196e018

File tree

8 files changed

+294
-28
lines changed

8 files changed

+294
-28
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
#include <cstdint>
5757
#include <iterator>
5858
#include <map>
59+
#include <queue>
60+
#include <stack>
5961
#include <string>
6062
#include <utility>
6163
#include <vector>
@@ -3157,6 +3159,7 @@ class TargetLoweringBase {
31573159
/// \p DI is the deinterleave intrinsic.
31583160
/// \p LI is the accompanying load instruction
31593161
virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
3162+
SmallVector<Value *> &LeafNodes,
31603163
LoadInst *LI) const {
31613164
return false;
31623165
}
@@ -3168,6 +3171,7 @@ class TargetLoweringBase {
31683171
/// \p II is the interleave intrinsic.
31693172
/// \p SI is the accompanying store instruction
31703173
virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
3174+
SmallVector<Value *> &LeafNodes,
31713175
StoreInst *SI) const {
31723176
return false;
31733177
}

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#include "llvm/Target/TargetMachine.h"
7171
#include "llvm/Transforms/Utils/Local.h"
7272
#include <cassert>
73+
#include <queue>
7374
#include <utility>
7475

7576
using namespace llvm;
@@ -488,12 +489,57 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
488489

489490
LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
490491

492+
std::stack<IntrinsicInst *> DeinterleaveTreeQueue;
493+
SmallVector<Value *> TempLeafNodes, LeafNodes;
494+
std::map<IntrinsicInst *, bool> mp;
495+
SmallVector<Instruction *> TempDeadInsts;
496+
497+
DeinterleaveTreeQueue.push(DI);
498+
while (!DeinterleaveTreeQueue.empty()) {
499+
auto CurrentDI = DeinterleaveTreeQueue.top();
500+
DeinterleaveTreeQueue.pop();
501+
TempDeadInsts.push_back(CurrentDI);
502+
// iterate over extract users of deinterleave
503+
for (auto UserExtract : CurrentDI->users()) {
504+
Instruction *Extract = dyn_cast<Instruction>(UserExtract);
505+
if (!Extract || Extract->getOpcode() != Instruction::ExtractValue)
506+
continue;
507+
bool IsLeaf = true;
508+
// iterate over deinterleave users of extract
509+
for (auto UserDI : UserExtract->users()) {
510+
IntrinsicInst *Child_DI = dyn_cast<IntrinsicInst>(UserDI);
511+
if (!Child_DI || Child_DI->getIntrinsicID() !=
512+
Intrinsic::experimental_vector_deinterleave2)
513+
continue;
514+
IsLeaf = false;
515+
if (mp.count(Child_DI) == 0) {
516+
DeinterleaveTreeQueue.push(Child_DI);
517+
}
518+
continue;
519+
}
520+
if (IsLeaf) {
521+
TempLeafNodes.push_back(UserExtract);
522+
TempDeadInsts.push_back(Extract);
523+
} else {
524+
TempDeadInsts.push_back(Extract);
525+
}
526+
}
527+
}
528+
// sort the deinterleaved nodes in the order that
529+
// they will be extracted from the target-specific intrinsic.
530+
for (unsigned I = 1; I < TempLeafNodes.size(); I += 2)
531+
LeafNodes.push_back(TempLeafNodes[I]);
532+
533+
for (unsigned I = 0; I < TempLeafNodes.size(); I += 2)
534+
LeafNodes.push_back(TempLeafNodes[I]);
535+
491536
// Try and match this with target specific intrinsics.
492-
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
537+
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LeafNodes, LI))
493538
return false;
494539

495540
// We now have a target-specific load, so delete the old one.
496-
DeadInsts.push_back(DI);
541+
DeadInsts.insert(DeadInsts.end(), TempDeadInsts.rbegin(),
542+
TempDeadInsts.rend());
497543
DeadInsts.push_back(LI);
498544
return true;
499545
}
@@ -509,14 +555,38 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
509555
return false;
510556

511557
LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
512-
558+
std::queue<IntrinsicInst *> IeinterleaveTreeQueue;
559+
SmallVector<Value *> TempLeafNodes, LeafNodes;
560+
SmallVector<Instruction *> TempDeadInsts;
561+
562+
IeinterleaveTreeQueue.push(II);
563+
while (!IeinterleaveTreeQueue.empty()) {
564+
auto node = IeinterleaveTreeQueue.front();
565+
TempDeadInsts.push_back(node);
566+
IeinterleaveTreeQueue.pop();
567+
for (unsigned i = 0; i < 2; i++) {
568+
auto op = node->getOperand(i);
569+
if (auto CurrentII = dyn_cast<IntrinsicInst>(op)) {
570+
if (CurrentII->getIntrinsicID() !=
571+
Intrinsic::experimental_vector_interleave2)
572+
continue;
573+
IeinterleaveTreeQueue.push(CurrentII);
574+
continue;
575+
}
576+
TempLeafNodes.push_back(op);
577+
}
578+
}
579+
for (unsigned I = 0; I < TempLeafNodes.size(); I += 2)
580+
LeafNodes.push_back(TempLeafNodes[I]);
581+
for (unsigned I = 1; I < TempLeafNodes.size(); I += 2)
582+
LeafNodes.push_back(TempLeafNodes[I]);
513583
// Try and match this with target specific intrinsics.
514-
if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
584+
if (!TLI->lowerInterleaveIntrinsicToStore(II, LeafNodes, SI))
515585
return false;
516586

517587
// We now have a target-specific store, so delete the old one.
518588
DeadInsts.push_back(SI);
519-
DeadInsts.push_back(II);
589+
DeadInsts.insert(DeadInsts.end(), TempDeadInsts.begin(), TempDeadInsts.end());
520590
return true;
521591
}
522592

@@ -537,7 +607,8 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
537607
// with a factor of 2.
538608
if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2)
539609
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
540-
if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
610+
611+
else if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
541612
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
542613
}
543614
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16442,15 +16442,16 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
1644216442
}
1644316443

1644416444
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
16445-
IntrinsicInst *DI, LoadInst *LI) const {
16445+
IntrinsicInst *DI, SmallVector<Value *> &LeafNodes, LoadInst *LI) const {
1644616446
// Only deinterleave2 supported at present.
1644716447
if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
1644816448
return false;
1644916449

16450-
// Only a factor of 2 supported at present.
16451-
const unsigned Factor = 2;
16450+
const unsigned Factor = std::max(2, (int)LeafNodes.size());
1645216451

16453-
VectorType *VTy = cast<VectorType>(DI->getType()->getContainedType(0));
16452+
VectorType *VTy = (LeafNodes.size() > 0)
16453+
? cast<VectorType>(LeafNodes.front()->getType())
16454+
: cast<VectorType>(DI->getType()->getContainedType(0));
1645416455
const DataLayout &DL = DI->getModule()->getDataLayout();
1645516456
bool UseScalable;
1645616457
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
@@ -16506,9 +16507,19 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1650616507
Result = Builder.CreateInsertValue(Result, Left, 0);
1650716508
Result = Builder.CreateInsertValue(Result, Right, 1);
1650816509
} else {
16509-
if (UseScalable)
16510+
if (UseScalable) {
1651016511
Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN");
16511-
else
16512+
if (Factor == 2) {
16513+
DI->replaceAllUsesWith(Result);
16514+
return true;
16515+
}
16516+
for (unsigned I = 0; I < LeafNodes.size(); I++) {
16517+
llvm::Value *CurrentExtract = LeafNodes[I];
16518+
Value *Newextrct = Builder.CreateExtractValue(Result, I);
16519+
CurrentExtract->replaceAllUsesWith(Newextrct);
16520+
}
16521+
return true;
16522+
} else
1651216523
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
1651316524
}
1651416525

@@ -16517,15 +16528,15 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1651716528
}
1651816529

1651916530
bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
16520-
IntrinsicInst *II, StoreInst *SI) const {
16531+
IntrinsicInst *II, SmallVector<Value *> &LeafNodes, StoreInst *SI) const {
1652116532
// Only interleave2 supported at present.
1652216533
if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
1652316534
return false;
1652416535

16525-
// Only a factor of 2 supported at present.
16526-
const unsigned Factor = 2;
16536+
// leaf nodes are the nodes that will be interleaved
16537+
const unsigned Factor = LeafNodes.size();
1652716538

16528-
VectorType *VTy = cast<VectorType>(II->getOperand(0)->getType());
16539+
VectorType *VTy = cast<VectorType>(LeafNodes.front()->getType());
1652916540
const DataLayout &DL = II->getModule()->getDataLayout();
1653016541
bool UseScalable;
1653116542
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
@@ -16570,9 +16581,12 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
1657016581
R = Builder.CreateExtractVector(StTy, II->getOperand(1), Idx);
1657116582
}
1657216583

16573-
if (UseScalable)
16574-
Builder.CreateCall(StNFunc, {L, R, Pred, Address});
16575-
else
16584+
if (UseScalable) {
16585+
SmallVector<Value *> Args(LeafNodes);
16586+
Args.push_back(Pred);
16587+
Args.push_back(Address);
16588+
Builder.CreateCall(StNFunc, Args);
16589+
} else
1657616590
Builder.CreateCall(StNFunc, {L, R, Address});
1657716591
}
1657816592

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,9 +686,11 @@ class AArch64TargetLowering : public TargetLowering {
686686
unsigned Factor) const override;
687687

688688
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
689+
SmallVector<Value *> &LeafNodes,
689690
LoadInst *LI) const override;
690691

691692
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
693+
SmallVector<Value *> &LeafNodes,
692694
StoreInst *SI) const override;
693695

694696
bool isLegalAddImmediate(int64_t) const override;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21231,19 +21231,22 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
2123121231
return true;
2123221232
}
2123321233

21234-
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
21235-
LoadInst *LI) const {
21234+
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
21235+
IntrinsicInst *DI, SmallVector<Value *> &LeafNodes, LoadInst *LI) const {
2123621236
assert(LI->isSimple());
2123721237
IRBuilder<> Builder(LI);
2123821238

2123921239
// Only deinterleave2 supported at present.
2124021240
if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
2124121241
return false;
2124221242

21243-
unsigned Factor = 2;
21243+
unsigned Factor = std::max(2, (int)LeafNodes.size());
2124421244

2124521245
VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21246-
VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21246+
VectorType *ResVTy =
21247+
(LeafNodes.size() > 0)
21248+
? cast<VectorType>(LeafNodes.front()->getType())
21249+
: cast<VectorType>(DI->getType()->getContainedType(0));
2124721250

2124821251
if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
2124921252
LI->getPointerAddressSpace(),
@@ -21271,6 +21274,19 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
2127121274
{ResVTy, XLenTy});
2127221275
VL = Constant::getAllOnesValue(XLenTy);
2127321276
Ops.append(Factor, PoisonValue::get(ResVTy));
21277+
Ops.append({LI->getPointerOperand(), VL});
21278+
Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21279+
//-----------
21280+
if (Factor == 2) {
21281+
DI->replaceAllUsesWith(Vlseg);
21282+
return true;
21283+
}
21284+
for (unsigned I = 0; I < LeafNodes.size(); I++) {
21285+
auto CurrentExtract = LeafNodes[I];
21286+
Value *NewExtract = Builder.CreateExtractValue(Vlseg, I);
21287+
CurrentExtract->replaceAllUsesWith(NewExtract);
21288+
}
21289+
return true;
2127421290
}
2127521291

2127621292
Ops.append({LI->getPointerOperand(), VL});
@@ -21281,19 +21297,19 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
2128121297
return true;
2128221298
}
2128321299

21284-
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
21285-
StoreInst *SI) const {
21300+
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
21301+
IntrinsicInst *II, SmallVector<Value *> &LeafNodes, StoreInst *SI) const {
2128621302
assert(SI->isSimple());
2128721303
IRBuilder<> Builder(SI);
2128821304

2128921305
// Only interleave2 supported at present.
2129021306
if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
2129121307
return false;
2129221308

21293-
unsigned Factor = 2;
21309+
unsigned Factor = LeafNodes.size();
2129421310

2129521311
VectorType *VTy = cast<VectorType>(II->getType());
21296-
VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21312+
VectorType *InVTy = cast<VectorType>(LeafNodes.front()->getType());
2129721313

2129821314
if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
2129921315
SI->getPointerAddressSpace(),
@@ -21319,6 +21335,11 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
2131921335
VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
2132021336
{InVTy, XLenTy});
2132121337
VL = Constant::getAllOnesValue(XLenTy);
21338+
SmallVector<Value *> Args(LeafNodes);
21339+
Args.push_back(SI->getPointerOperand());
21340+
Args.push_back(VL);
21341+
Builder.CreateCall(VssegNFunc, Args);
21342+
return true;
2132221343
}
2132321344

2132421345
Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -867,10 +867,12 @@ class RISCVTargetLowering : public TargetLowering {
867867
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
868868
unsigned Factor) const override;
869869

870-
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
870+
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
871+
SmallVector<Value *> &LeafNodes,
871872
LoadInst *LI) const override;
872873

873874
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
875+
SmallVector<Value *> &LeafNodes,
874876
StoreInst *SI) const override;
875877

876878
bool supportKCFIBundles() const override { return true; }

0 commit comments

Comments
 (0)