Skip to content

Commit ada24ae

Browse files
committed
Revert 2ac85d8 "[VectorCombine] foldBitcastShuf - add support for binary shuffles"
Breaks some tests in other subprojects - will recommit with a fix later
1 parent 98c6bc5 commit ada24ae

File tree

2 files changed

+11
-45
lines changed

2 files changed

+11
-45
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -684,10 +684,10 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
684684
/// destination type followed by shuffle. This can enable further transforms by
685685
/// moving bitcasts or shuffles together.
686686
bool VectorCombine::foldBitcastShuffle(Instruction &I) {
687-
Value *V0, *V1;
687+
Value *V0;
688688
ArrayRef<int> Mask;
689689
if (!match(&I, m_BitCast(m_OneUse(
690-
m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(Mask))))))
690+
m_Shuffle(m_Value(V0), m_Undef(), m_Mask(Mask))))))
691691
return false;
692692

693693
// 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
@@ -728,33 +728,28 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) {
728728
FixedVectorType::get(DestTy->getScalarType(), NumSrcElts);
729729
auto *OldShuffleTy =
730730
FixedVectorType::get(SrcTy->getScalarType(), Mask.size());
731-
bool IsUnary = isa<UndefValue>(V1);
732-
unsigned NumOps = IsUnary ? 1 : 2;
733731

734732
// The new shuffle must not cost more than the old shuffle.
735733
TargetTransformInfo::TargetCostKind CK =
736734
TargetTransformInfo::TCK_RecipThroughput;
737735
TargetTransformInfo::ShuffleKind SK =
738-
IsUnary ? TargetTransformInfo::SK_PermuteSingleSrc
739-
: TargetTransformInfo::SK_PermuteTwoSrc;
736+
TargetTransformInfo::SK_PermuteSingleSrc;
740737

741738
InstructionCost DestCost =
742739
TTI.getShuffleCost(SK, NewShuffleTy, NewMask, CK) +
743-
(NumOps * TTI.getCastInstrCost(Instruction::BitCast, NewShuffleTy, SrcTy,
744-
TargetTransformInfo::CastContextHint::None,
745-
CK));
740+
TTI.getCastInstrCost(Instruction::BitCast, NewShuffleTy, SrcTy,
741+
TargetTransformInfo::CastContextHint::None, CK);
746742
InstructionCost SrcCost =
747743
TTI.getShuffleCost(SK, SrcTy, Mask, CK) +
748744
TTI.getCastInstrCost(Instruction::BitCast, DestTy, OldShuffleTy,
749745
TargetTransformInfo::CastContextHint::None, CK);
750746
if (DestCost > SrcCost || !DestCost.isValid())
751747
return false;
752748

753-
// bitcast (shuf V0, V1, MaskC) --> shuf (bitcast V0), (bitcast V1), MaskC'
749+
// bitcast (shuf V0, MaskC) --> shuf (bitcast V0), MaskC'
754750
++NumShufOfBitcast;
755-
Value *CastV0 = Builder.CreateBitCast(V0, NewShuffleTy);
756-
Value *CastV1 = Builder.CreateBitCast(V1, NewShuffleTy);
757-
Value *Shuf = Builder.CreateShuffleVector(CastV0, CastV1, NewMask);
751+
Value *CastV = Builder.CreateBitCast(V0, NewShuffleTy);
752+
Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
758753
replaceValue(I, *Shuf);
759754
return true;
760755
}

llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK
3-
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK
4-
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
2+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s
3+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s
4+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
55

66
define <4 x i64> @PR67803(<4 x i64> %x, <4 x i64> %y, <4 x i64> %a, <4 x i64> %b) {
77
; CHECK-LABEL: @PR67803(
@@ -35,35 +35,6 @@ define <4 x i64> @PR67803(<4 x i64> %x, <4 x i64> %y, <4 x i64> %a, <4 x i64> %b
3535
; CHECK-NEXT: [[SHUFFLE_I23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3636
; CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I23]]
3737
;
38-
; AVX512-LABEL: @PR67803(
39-
; AVX512-NEXT: entry:
40-
; AVX512-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[X:%.*]] to <8 x i32>
41-
; AVX512-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[Y:%.*]] to <8 x i32>
42-
; AVX512-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP0]], [[TMP1]]
43-
; AVX512-NEXT: [[CMP_I21:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
44-
; AVX512-NEXT: [[SEXT_I22:%.*]] = sext <4 x i1> [[CMP_I21]] to <4 x i32>
45-
; AVX512-NEXT: [[CMP_I:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
46-
; AVX512-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
47-
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[SEXT_I22]], <4 x i32> [[SEXT_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
48-
; AVX512-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
49-
; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <32 x i8> [[TMP4]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
50-
; AVX512-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
51-
; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <32 x i8> [[TMP6]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
52-
; AVX512-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
53-
; AVX512-NEXT: [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP8]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
54-
; AVX512-NEXT: [[TMP10:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[TMP5]], <16 x i8> [[TMP7]], <16 x i8> [[TMP9]])
55-
; AVX512-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
56-
; AVX512-NEXT: [[TMP12:%.*]] = bitcast <4 x i64> [[A]] to <32 x i8>
57-
; AVX512-NEXT: [[TMP13:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
58-
; AVX512-NEXT: [[TMP14:%.*]] = bitcast <4 x i64> [[B]] to <32 x i8>
59-
; AVX512-NEXT: [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP14]], <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
60-
; AVX512-NEXT: [[TMP16:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
61-
; AVX512-NEXT: [[TMP17:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
62-
; AVX512-NEXT: [[TMP18:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[TMP13]], <16 x i8> [[TMP15]], <16 x i8> [[TMP17]])
63-
; AVX512-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <2 x i64>
64-
; AVX512-NEXT: [[SHUFFLE_I23:%.*]] = shufflevector <2 x i64> [[TMP11]], <2 x i64> [[TMP19]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
65-
; AVX512-NEXT: ret <4 x i64> [[SHUFFLE_I23]]
66-
;
6738
entry:
6839
%0 = bitcast <4 x i64> %x to <8 x i32>
6940
%extract = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

0 commit comments

Comments
 (0)