Skip to content

Commit d8ba622

Browse files
committed
[AArch64][GlobalISel] Selection support for vector DUP[X]lane instructions.
In future, we'd like to use the perfect-shuffle mechanism to deal with these shuffle permutations. For now, this improves performance by avoiding the super-expensive const-pool load + tbl instruction. Differential Revision: https://reviews.llvm.org/D84866
1 parent ebaa8b1 commit d8ba622

File tree

6 files changed

+197
-25
lines changed

6 files changed

+197
-25
lines changed

llvm/include/llvm/CodeGen/GlobalISel/Utils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,5 +213,9 @@ LLT getLCMType(LLT OrigTy, LLT TargetTy);
213213
LLVM_READNONE
214214
LLT getGCDType(LLT OrigTy, LLT TargetTy);
215215

216+
/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat.
217+
/// If \p MI is not a splat, returns None.
218+
Optional<int> getSplatIndex(MachineInstr &MI);
219+
216220
} // End namespace llvm.
217221
#endif

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,3 +604,24 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
604604
unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
605605
return LLT::scalar(GCD);
606606
}
607+
608+
Optional<int> llvm::getSplatIndex(MachineInstr &MI) {
609+
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
610+
"Only G_SHUFFLE_VECTOR can have a splat index!");
611+
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
612+
auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; });
613+
614+
// If all elements are undefined, this shuffle can be considered a splat.
615+
// Return 0 for better potential for callers to simplify.
616+
if (FirstDefinedIdx == Mask.end())
617+
return 0;
618+
619+
// Make sure all remaining elements are either undef or the same
620+
// as the first non-undef value.
621+
int SplatValue = *FirstDefinedIdx;
622+
if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
623+
[&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
624+
return None;
625+
626+
return SplatValue;
627+
}

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ class AArch64InstructionSelector : public InstructionSelector {
133133
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
134134
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
135135

136+
bool tryOptShuffleDupLane(MachineInstr &I, LLT DstTy, LLT SrcTy,
137+
ArrayRef<int> Mask, MachineRegisterInfo &MRI) const;
136138
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
137139
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
138140
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
@@ -4306,6 +4308,67 @@ MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare(
43064308
return &*CmpMI;
43074309
}
43084310

4311+
bool AArch64InstructionSelector::tryOptShuffleDupLane(
4312+
MachineInstr &I, LLT DstTy, LLT SrcTy, ArrayRef<int> Mask,
4313+
MachineRegisterInfo &MRI) const {
4314+
assert(I.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4315+
4316+
// We assume that scalar->vector splats have been been handled in the
4317+
// post-legalizer combiner to G_DUP. However splats of a source vector's
4318+
// lane don't fit that pattern, detect it here:
4319+
// %res = G_SHUFFLE_VECTOR %src:<n x ty>, undef, <n x i32> splat(lane-idx)
4320+
// =>
4321+
// %res = DUPv[N][Ty]lane %src, lane-idx
4322+
// FIXME: this case should be covered by re-implementing the perfect shuffle
4323+
// codegen mechanism.
4324+
4325+
auto LaneIdx = getSplatIndex(I);
4326+
if (!LaneIdx)
4327+
return false;
4328+
4329+
// The lane idx should be within the first source vector.
4330+
if (*LaneIdx >= SrcTy.getNumElements())
4331+
return false;
4332+
4333+
if (DstTy != SrcTy)
4334+
return false;
4335+
4336+
LLT ScalarTy = SrcTy.getElementType();
4337+
unsigned ScalarSize = ScalarTy.getSizeInBits();
4338+
4339+
unsigned Opc = 0;
4340+
switch (SrcTy.getNumElements()) {
4341+
case 2:
4342+
if (ScalarSize == 64)
4343+
Opc = AArch64::DUPv2i64lane;
4344+
break;
4345+
case 4:
4346+
if (ScalarSize == 32)
4347+
Opc = AArch64::DUPv4i32lane;
4348+
break;
4349+
case 8:
4350+
if (ScalarSize == 16)
4351+
Opc = AArch64::DUPv8i16lane;
4352+
break;
4353+
case 16:
4354+
if (ScalarSize == 8)
4355+
Opc = AArch64::DUPv16i8lane;
4356+
break;
4357+
default:
4358+
break;
4359+
}
4360+
if (!Opc)
4361+
return false;
4362+
4363+
MachineIRBuilder MIB(I);
4364+
auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()},
4365+
{I.getOperand(1).getReg()})
4366+
.addImm(*LaneIdx);
4367+
constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
4368+
I.eraseFromParent();
4369+
return true;
4370+
}
4371+
43094372
bool AArch64InstructionSelector::selectShuffleVector(
43104373
MachineInstr &I, MachineRegisterInfo &MRI) const {
43114374
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
@@ -4327,6 +4390,9 @@ bool AArch64InstructionSelector::selectShuffleVector(
43274390
return false;
43284391
}
43294392

4393+
if (tryOptShuffleDupLane(I, DstTy, Src1Ty, Mask, MRI))
4394+
return true;
4395+
43304396
unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
43314397

43324398
SmallVector<Constant *, 64> CstIdxs;

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
2121
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
2222
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23+
#include "llvm/CodeGen/GlobalISel/Utils.h"
2324
#include "llvm/CodeGen/MachineDominators.h"
2425
#include "llvm/CodeGen/MachineFunctionPass.h"
2526
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -43,29 +44,6 @@ struct ShuffleVectorPseudo {
4344
ShuffleVectorPseudo() {}
4445
};
4546

46-
/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat.
47-
/// If \p MI is not a splat, returns None.
48-
static Optional<int> getSplatIndex(MachineInstr &MI) {
49-
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
50-
"Only G_SHUFFLE_VECTOR can have a splat index!");
51-
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
52-
auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; });
53-
54-
// If all elements are undefined, this shuffle can be considered a splat.
55-
// Return 0 for better potential for callers to simplify.
56-
if (FirstDefinedIdx == Mask.end())
57-
return 0;
58-
59-
// Make sure all remaining elements are either undef or the same
60-
// as the first non-undef value.
61-
int SplatValue = *FirstDefinedIdx;
62-
if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
63-
[&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
64-
return None;
65-
66-
return SplatValue;
67-
}
68-
6947
/// Check if a vector shuffle corresponds to a REV instruction with the
7048
/// specified blocksize.
7149
static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
3+
...
4+
---
5+
name: duplane_v16i8
6+
alignment: 4
7+
legalized: true
8+
regBankSelected: true
9+
tracksRegLiveness: true
10+
liveins:
11+
- { reg: '$q0' }
12+
body: |
13+
bb.1:
14+
liveins: $q0
15+
16+
; CHECK-LABEL: name: duplane_v16i8
17+
; CHECK: liveins: $q0
18+
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
19+
; CHECK: [[DUPv16i8lane:%[0-9]+]]:fpr128 = DUPv16i8lane [[COPY]], 0
20+
; CHECK: $q0 = COPY [[DUPv16i8lane]]
21+
; CHECK: RET_ReallyLR implicit $q0
22+
%0:fpr(<16 x s8>) = COPY $q0
23+
%2:fpr(<16 x s8>) = G_IMPLICIT_DEF
24+
%1:fpr(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
25+
$q0 = COPY %1(<16 x s8>)
26+
RET_ReallyLR implicit $q0
27+
28+
...
29+
---
30+
name: duplane_v8i16
31+
alignment: 4
32+
legalized: true
33+
regBankSelected: true
34+
tracksRegLiveness: true
35+
liveins:
36+
- { reg: '$q0' }
37+
body: |
38+
bb.1:
39+
liveins: $q0
40+
41+
; CHECK-LABEL: name: duplane_v8i16
42+
; CHECK: liveins: $q0
43+
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
44+
; CHECK: [[DUPv8i16lane:%[0-9]+]]:fpr128 = DUPv8i16lane [[COPY]], 0
45+
; CHECK: $q0 = COPY [[DUPv8i16lane]]
46+
; CHECK: RET_ReallyLR implicit $q0
47+
%0:fpr(<8 x s16>) = COPY $q0
48+
%2:fpr(<8 x s16>) = G_IMPLICIT_DEF
49+
%1:fpr(<8 x s16>) = G_SHUFFLE_VECTOR %0(<8 x s16>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0)
50+
$q0 = COPY %1(<8 x s16>)
51+
RET_ReallyLR implicit $q0
52+
53+
...
54+
---
55+
name: duplane_v4f32
56+
alignment: 4
57+
legalized: true
58+
regBankSelected: true
59+
tracksRegLiveness: true
60+
liveins:
61+
- { reg: '$q0' }
62+
body: |
63+
bb.1:
64+
liveins: $q0
65+
66+
; CHECK-LABEL: name: duplane_v4f32
67+
; CHECK: liveins: $q0
68+
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
69+
; CHECK: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[COPY]], 0
70+
; CHECK: $q0 = COPY [[DUPv4i32lane]]
71+
; CHECK: RET_ReallyLR implicit $q0
72+
%0:fpr(<4 x s32>) = COPY $q0
73+
%2:fpr(<4 x s32>) = G_IMPLICIT_DEF
74+
%1:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %2, shufflemask(0, 0, 0, 0)
75+
$q0 = COPY %1(<4 x s32>)
76+
RET_ReallyLR implicit $q0
77+
78+
...
79+
---
80+
name: duplane_v2i64
81+
alignment: 4
82+
legalized: true
83+
regBankSelected: true
84+
tracksRegLiveness: true
85+
liveins:
86+
- { reg: '$q0' }
87+
body: |
88+
bb.1:
89+
liveins: $q0
90+
91+
; CHECK-LABEL: name: duplane_v2i64
92+
; CHECK: liveins: $q0
93+
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
94+
; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[COPY]], 0
95+
; CHECK: $q0 = COPY [[DUPv2i64lane]]
96+
; CHECK: RET_ReallyLR implicit $q0
97+
%0:fpr(<2 x s64>) = COPY $q0
98+
%2:fpr(<2 x s64>) = G_IMPLICIT_DEF
99+
%1:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(0, 0)
100+
$q0 = COPY %1(<2 x s64>)
101+
RET_ReallyLR implicit $q0
102+
103+
...

llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ body: |
140140
141141
; CHECK-LABEL: name: shuffle_v2i64
142142
; CHECK: constants:
143-
; CHECK: value: '<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>'
143+
; CHECK: value: '<16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>'
144144
; CHECK: alignment: 16
145145
; CHECK: isTargetSpecific: false
146146
; CHECK: liveins: $q0, $q1
@@ -154,7 +154,7 @@ body: |
154154
; CHECK: RET_ReallyLR implicit $q0
155155
%0:fpr(<2 x s64>) = COPY $q0
156156
%1:fpr(<2 x s64>) = COPY $q1
157-
%2:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, shufflemask(0, 0)
157+
%2:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, shufflemask(1, 0)
158158
$q0 = COPY %2(<2 x s64>)
159159
RET_ReallyLR implicit $q0
160160

0 commit comments

Comments
 (0)