Skip to content

Commit eb89053

Browse files
committed
[AMDGPU] Vectorize i8 Shuffles
1 parent bc56bcd commit eb89053

File tree

4 files changed

+236
-284
lines changed

4 files changed

+236
-284
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,14 +1154,15 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11541154

11551155
Kind = improveShuffleKindFromMask(Kind, Mask, VT, Index, SubTp);
11561156

1157-
// Larger vector widths may require additional instructions, but are
1158-
// typically cheaper than scalarized versions.
1159-
unsigned NumVectorElts = cast<FixedVectorType>(VT)->getNumElements();
1157+
unsigned ScalarSize = DL.getTypeSizeInBits(VT->getElementType());
11601158
if (ST->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
1161-
DL.getTypeSizeInBits(VT->getElementType()) == 16) {
1162-
bool HasVOP3P = ST->hasVOP3PInsts();
1159+
(ScalarSize == 16 || ScalarSize == 8)) {
1160+
// Larger vector widths may require additional instructions, but are
1161+
// typically cheaper than scalarized versions.
1162+
unsigned NumVectorElts = cast<FixedVectorType>(VT)->getNumElements();
11631163
unsigned RequestedElts =
11641164
count_if(Mask, [](int MaskElt) { return MaskElt != -1; });
1165+
unsigned EltsPerReg = 32 / ScalarSize;
11651166
if (RequestedElts == 0)
11661167
return 0;
11671168
switch (Kind) {
@@ -1170,9 +1171,9 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11701171
case TTI::SK_PermuteSingleSrc: {
11711172
// With op_sel VOP3P instructions freely can access the low half or high
11721173
// half of a register, so any swizzle of two elements is free.
1173-
if (HasVOP3P && NumVectorElts == 2)
1174+
if (ST->hasVOP3PInsts() && ScalarSize == 16 && NumVectorElts == 2)
11741175
return 0;
1175-
unsigned NumPerms = alignTo(RequestedElts, 2) / 2;
1176+
unsigned NumPerms = alignTo(RequestedElts, EltsPerReg) / EltsPerReg;
11761177
// SK_Broadcast just reuses the same mask
11771178
unsigned NumPermMasks = Kind == TTI::SK_Broadcast ? 1 : NumPerms;
11781179
return NumPerms + NumPermMasks;
@@ -1184,12 +1185,12 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11841185
return 0;
11851186
// Insert/extract subvectors only require shifts / extract code to get the
11861187
// relevant bits
1187-
return alignTo(RequestedElts, 2) / 2;
1188+
return alignTo(RequestedElts, EltsPerReg) / EltsPerReg;
11881189
}
11891190
case TTI::SK_PermuteTwoSrc:
11901191
case TTI::SK_Splice:
11911192
case TTI::SK_Select: {
1192-
unsigned NumPerms = alignTo(RequestedElts, 2) / 2;
1193+
unsigned NumPerms = alignTo(RequestedElts, EltsPerReg) / EltsPerReg;
11931194
// SK_Select just reuses the same mask
11941195
unsigned NumPermMasks = Kind == TTI::SK_Select ? 1 : NumPerms;
11951196
return NumPerms + NumPermMasks;

0 commit comments

Comments
 (0)