Skip to content

Commit 1262897

Browse files
committed
[AMDGPU] Prefer vectorized i8 shuffles
Change-Id: I0ff7db2e87cd3940ab61629489e64223ead27ee6
1 parent 85abef2 commit 1262897

File tree

3 files changed

+130
-133
lines changed

3 files changed

+130
-133
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,12 +1162,21 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11621162
if (IsExtractSubvector)
11631163
Kind = TTI::SK_PermuteSingleSrc;
11641164

1165-
if (ST->hasVOP3PInsts()) {
1166-
if (cast<FixedVectorType>(VT)->getNumElements() == 2 &&
1167-
DL.getTypeSizeInBits(VT->getElementType()) == 16) {
1165+
bool IsGFX8Plus = ST->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
1166+
1167+
FixedVectorType *FixedVT = dyn_cast<FixedVectorType>(VT);
1168+
if (FixedVT) {
1169+
unsigned NumElts = FixedVT->getNumElements();
1170+
unsigned ScalarSizeInBits = DL.getTypeSizeInBits(VT->getElementType());
1171+
1172+
if (IsGFX8Plus && ScalarSizeInBits == 8) {
1173+
// For GFX8Plus, we can emit v_perms for shuffle vectors
1174+
return (NumElts + 3) / 4;
1175+
}
1176+
1177+
if (ST->hasVOP3PInsts() && NumElts == 2 && ScalarSizeInBits == 16) {
11681178
// With op_sel VOP3P instructions freely can access the low half or high
11691179
// half of a register, so any swizzle is free.
1170-
11711180
switch (Kind) {
11721181
case TTI::SK_Broadcast:
11731182
case TTI::SK_Reverse:

0 commit comments

Comments
 (0)