-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VectorCombine] foldShuffleOfShuffles - allow fold with only single shuffle operand. #119354
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…huffle operand. foldShuffleOfShuffles already handles "shuffle (shuffle x, undef), (shuffle y, undef)" patterns, this patch relaxes the requirement so it can handle cases where only a single operand is a shuffle (and the other can be any other value and will be kept in place). Fixes llvm#86068
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Simon Pilgrim (RKSimon) ChangesfoldShuffleOfShuffles already handles "shuffle (shuffle x, undef), (shuffle y, undef)" patterns, this patch relaxes the requirement so it can handle cases where only a single operand is a shuffle (and the other can be any other value and will be kept in place). Fixes #86068 Patch is 21.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119354.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ebbd05e6d47afc..033900de55278c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1706,21 +1706,30 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
return true;
}
-/// Try to convert "shuffle (shuffle x, undef), (shuffle y, undef)"
+/// Try to convert any of:
+/// "shuffle (shuffle x, undef), (shuffle y, undef)"
+/// "shuffle (shuffle x, undef), y"
+/// "shuffle x, (shuffle y, undef)"
/// into "shuffle x, y".
bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
- Value *V0, *V1;
- UndefValue *U0, *U1;
- ArrayRef<int> OuterMask, InnerMask0, InnerMask1;
+ ArrayRef<int> OuterMask;
+ Value *OuterV0, *OuterV1;
if (!match(&I,
- m_Shuffle(
- m_Shuffle(m_Value(V0), m_UndefValue(U0), m_Mask(InnerMask0)),
- m_Shuffle(m_Value(V1), m_UndefValue(U1), m_Mask(InnerMask1)),
- m_Mask(OuterMask))))
+ m_Shuffle(m_Value(OuterV0), m_Value(OuterV1), m_Mask(OuterMask))))
return false;
- auto *ShufI0 = dyn_cast<Instruction>(I.getOperand(0));
- auto *ShufI1 = dyn_cast<Instruction>(I.getOperand(1));
+ ArrayRef<int> InnerMask0, InnerMask1;
+ Value *V0 = nullptr, *V1 = nullptr;
+ UndefValue *U0 = nullptr, *U1 = nullptr;
+ bool Match0 = match(
+ OuterV0, m_Shuffle(m_Value(V0), m_UndefValue(U0), m_Mask(InnerMask0)));
+ bool Match1 = match(
+ OuterV1, m_Shuffle(m_Value(V1), m_UndefValue(U1), m_Mask(InnerMask1)));
+ if (!Match0 && !Match1)
+ return false;
+
+ V0 = Match0 ? V0 : OuterV0;
+ V1 = Match1 ? V1 : OuterV1;
auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(V0->getType());
auto *ShuffleImmTy = dyn_cast<FixedVectorType>(I.getOperand(0)->getType());
@@ -1732,9 +1741,9 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
unsigned NumImmElts = ShuffleImmTy->getNumElements();
// Bail if either inner masks reference a RHS undef arg.
- if ((!isa<PoisonValue>(U0) &&
+ if ((Match0 && !isa<PoisonValue>(U0) &&
any_of(InnerMask0, [&](int M) { return M >= (int)NumSrcElts; })) ||
- (!isa<PoisonValue>(U1) &&
+ (Match1 && !isa<PoisonValue>(U1) &&
any_of(InnerMask1, [&](int M) { return M >= (int)NumSrcElts; })))
return false;
@@ -1742,12 +1751,15 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
SmallVector<int, 16> NewMask(OuterMask);
for (int &M : NewMask) {
if (0 <= M && M < (int)NumImmElts) {
- M = (InnerMask0[M] >= (int)NumSrcElts) ? PoisonMaskElem : InnerMask0[M];
+ if (Match0)
+ M = (InnerMask0[M] >= (int)NumSrcElts) ? PoisonMaskElem : InnerMask0[M];
} else if (M >= (int)NumImmElts) {
- if (InnerMask1[M - NumImmElts] >= (int)NumSrcElts)
- M = PoisonMaskElem;
- else
- M = InnerMask1[M - NumImmElts] + (V0 == V1 ? 0 : NumSrcElts);
+ if (Match1) {
+ if (InnerMask1[M - NumImmElts] >= (int)NumSrcElts)
+ M = PoisonMaskElem;
+ else
+ M = InnerMask1[M - NumImmElts] + (V0 == V1 ? 0 : NumSrcElts);
+ }
}
}
@@ -1758,23 +1770,30 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
}
// Try to merge the shuffles if the new shuffle is not costly.
- InstructionCost InnerCost0 =
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy,
- InnerMask0, CostKind, 0, nullptr, {V0, U0}, ShufI0);
- InstructionCost InnerCost1 =
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy,
- InnerMask1, CostKind, 0, nullptr, {V1, U1}, ShufI1);
- InstructionCost OuterCost =
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleImmTy,
- OuterMask, CostKind, 0, nullptr, {ShufI0, ShufI1}, &I);
+ InstructionCost InnerCost0 = 0;
+ if (Match0)
+ InnerCost0 = TTI.getShuffleCost(
+ TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy, InnerMask0,
+ CostKind, 0, nullptr, {V0, U0}, cast<ShuffleVectorInst>(OuterV0));
+
+ InstructionCost InnerCost1 = 0;
+ if (Match1)
+ InnerCost1 = TTI.getShuffleCost(
+ TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy, InnerMask1,
+ CostKind, 0, nullptr, {V1, U1}, cast<ShuffleVectorInst>(OuterV1));
+
+ InstructionCost OuterCost = TTI.getShuffleCost(
+ TargetTransformInfo::SK_PermuteTwoSrc, ShuffleImmTy, OuterMask, CostKind,
+ 0, nullptr, {OuterV0, OuterV1}, &I);
+
InstructionCost OldCost = InnerCost0 + InnerCost1 + OuterCost;
InstructionCost NewCost =
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleSrcTy,
NewMask, CostKind, 0, nullptr, {V0, V1});
- if (!ShufI0->hasOneUse())
+ if (!OuterV0->hasOneUse())
NewCost += InnerCost0;
- if (!ShufI1->hasOneUse())
+ if (!OuterV1->hasOneUse())
NewCost += InnerCost1;
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two shuffles: " << I
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll
index 719d6df77cb7d5..9b03433e00cbc8 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll
@@ -159,13 +159,11 @@ define <4 x i32> @shuffle_8_add_32_masks_are_eq_and_can_be_converted_up(<16 x i8
}
; shuffle<8 x i16>( bitcast<8 x i16>( shuffle<4 x i32>(v)))
-; TODO: Squash shuffles and widen type?
define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(<4 x i32> %v1) {
; CHECK-LABEL: @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <8 x i16>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -175,13 +173,11 @@ define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(<4 x i32>
}
; shuffle<8 x i16>( bitcast<8 x i16>( shuffle<4 x i32>(v)))
-; TODO: Squash shuffles?
define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(<4 x i32> %v1) {
; CHECK-LABEL: @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <8 x i16>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> poison, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -191,13 +187,11 @@ define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(<4 x
}
; shuffle<16 x i8>( bitcast<16 x i8>( shuffle<4 x i32>(v)))
-; TODO: Squash shuffles and widen type?
define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(<4 x i32> %v1) {
; CHECK-LABEL: @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <16 x i8>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[BC1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: ret <16 x i8> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -207,13 +201,11 @@ define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(<4 x i32> %
}
; shuffle<16 x i8>( bitcast<16 x i8>( shuffle<4 x i32>(v)))
-; TODO: Squash shuffles?
define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(<4 x i32> %v1) {
; CHECK-LABEL: @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <16 x i8>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[BC1]], <16 x i8> poison, <16 x i32> <i32 5, i32 4, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 13, i32 12, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <16 x i8> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -223,13 +215,11 @@ define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(<4 x i3
}
; shuffle<4 x i32>( bitcast<4 x i32>( shuffle<16 x i8>(v)))
-; TODO: squash shuffles?
define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(<16 x i8> %v1) {
; CHECK-LABEL: @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[V1:%.*]] to <4 x i32>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: ret <4 x i32> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
@@ -239,13 +229,11 @@ define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(<16 x i8>
}
; shuffle<4 x i32>( bitcast<4 x i32>( shuffle<8 x i16>(v)))
-; TODO: squash shuffles?
define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up(<8 x i16> %v1) {
; CHECK-LABEL: @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V1:%.*]] to <4 x i32>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: ret <4 x i32> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <8 x i16> %v1, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
@@ -287,13 +275,11 @@ define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_not_be_converted_up(<8 x
}
; shuffle<8 x i16>( bitcast<8 x i16>( shuffle<16 x i8>(v)))
-; TODO: squash shuffles and widen type?
define <8 x i16> @shuffle_8_bitcast_16_shuffle_16_can__be_converted_up(<16 x i8> %v1) {
; CHECK-LABEL: @shuffle_8_bitcast_16_shuffle_16_can__be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[V1:%.*]] to <8 x i16>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll
index 930ce6a7b7d677..29464fec8e7a2e 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll
@@ -159,13 +159,11 @@ define <4 x i32> @shuffle_8_add_32_masks_are_eq_and_can_be_converted_up(<16 x i8
}
; shuffle<8 x i16>( bitcast<8 x i16>( shuffle<4 x i32>(v)))
-; TODO: Squash shuffles and widen type?
define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(<4 x i32> %v1) {
; CHECK-LABEL: @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <8 x i16>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -175,13 +173,11 @@ define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(<4 x i32>
}
; shuffle<8 x i16>( bitcast<8 x i16>( shuffle<4 x i32>(v)))
-; TODO: Squash shuffles?
define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(<4 x i32> %v1) {
; CHECK-LABEL: @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <8 x i16>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> poison, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i16> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -191,13 +187,11 @@ define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(<4 x
}
; shuffle<16 x i8>( bitcast<16 x i8>( shuffle<4 x i32>(v)))
-; TODO: Squash shuffles and widen type?
define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(<4 x i32> %v1) {
; CHECK-LABEL: @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <16 x i8>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[BC1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: ret <16 x i8> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -207,13 +201,11 @@ define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(<4 x i32> %
}
; shuffle<16 x i8>( bitcast<16 x i8>( shuffle<4 x i32>(v)))
-; TODO: Squash shuffles?
define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(<4 x i32> %v1) {
; CHECK-LABEL: @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V1:%.*]] to <16 x i8>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[BC1]], <16 x i8> poison, <16 x i32> <i32 5, i32 4, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 13, i32 12, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <16 x i8> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -223,13 +215,11 @@ define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(<4 x i3
}
; shuffle<4 x i32>( bitcast<4 x i32>( shuffle<16 x i8>(v)))
-; TODO: squash shuffles?
define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(<16 x i8> %v1) {
; CHECK-LABEL: @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[V1:%.*]] to <4 x i32>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: ret <4 x i32> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
@@ -239,13 +229,11 @@ define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(<16 x i8>
}
; shuffle<4 x i32>( bitcast<4 x i32>( shuffle<8 x i16>(v)))
-; TODO: squash shuffles?
define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up(<8 x i16> %v1) {
; CHECK-LABEL: @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V1:%.*]] to <4 x i32>
-; CHECK-NEXT: [[BC1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: ret <4 x i32> [[SHUFFLE2]]
;
%shuffle1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
@@ -287,13 +275,11 @@ define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_not_be_converted_up(<8 x
}
; shuffle<8 x i16>( bitcast<8 x i16>( shuffle<16 x i8>(v)))
-; TODO: squash shuffles and widen type?
define <8 x i16> @shuffle_8_bitcast_16_shuffle_16_can__be_converted_up(<16 x i8> %v1) {
; CHECK-LABEL: @shuffle_8_bitcast_16_...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
foldShuffleOfShuffles already handles "shuffle (shuffle x, undef), (shuffle y, undef)" patterns, this patch relaxes the requirement so it can handle cases where only a single operand is a shuffle (and the other can be any other value and will be kept in place).
Fixes #86068