Skip to content

Commit 827dd80

Browse files
committed
[VectorCombine] foldShuffleOfBinops - add support for length changing shuffles
Refactor to be closer to foldShuffleOfCastops
1 parent ca9a44e commit 827dd80

File tree

3 files changed

+110
-66
lines changed

3 files changed

+110
-66
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,60 +1394,90 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
13941394
return true;
13951395
}
13961396

1397-
/// Try to convert "shuffle (binop), (binop)" with a shared binop operand into
1398-
/// "binop (shuffle), (shuffle)".
1397+
/// Try to convert "shuffle (binop), (binop)" into "binop (shuffle), (shuffle)".
13991398
bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
1400-
auto *VecTy = cast<FixedVectorType>(I.getType());
14011399
BinaryOperator *B0, *B1;
1402-
ArrayRef<int> Mask;
1400+
ArrayRef<int> OldMask;
14031401
if (!match(&I, m_Shuffle(m_OneUse(m_BinOp(B0)), m_OneUse(m_BinOp(B1)),
1404-
m_Mask(Mask))) ||
1405-
B0->getOpcode() != B1->getOpcode() || B0->getType() != VecTy)
1402+
m_Mask(OldMask))))
14061403
return false;
14071404

14081405
// Don't introduce poison into div/rem.
1409-
if (any_of(Mask, [](int M) { return M == PoisonMaskElem; }) &&
1406+
if (any_of(OldMask, [](int M) { return M == PoisonMaskElem; }) &&
14101407
B0->isIntDivRem())
14111408
return false;
14121409

1413-
// Try to replace a binop with a shuffle if the shuffle is not costly.
1414-
// The new shuffle will choose from a single, common operand, so it may be
1415-
// cheaper than the existing two-operand shuffle.
1416-
SmallVector<int> UnaryMask = createUnaryMask(Mask, Mask.size());
1410+
// TODO: Add support for addlike etc.
14171411
Instruction::BinaryOps Opcode = B0->getOpcode();
1418-
InstructionCost BinopCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
1419-
InstructionCost ShufCost = TTI.getShuffleCost(
1420-
TargetTransformInfo::SK_PermuteSingleSrc, VecTy, UnaryMask);
1421-
if (ShufCost > BinopCost)
1412+
if (Opcode != B1->getOpcode())
1413+
return false;
1414+
1415+
auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
1416+
auto *BinOpTy = dyn_cast<FixedVectorType>(B0->getType());
1417+
if (!ShuffleDstTy || !BinOpTy)
14221418
return false;
14231419

1420+
unsigned NumSrcElts = BinOpTy->getNumElements();
1421+
14241422
// If we have something like "add X, Y" and "add Z, X", swap ops to match.
14251423
Value *X = B0->getOperand(0), *Y = B0->getOperand(1);
14261424
Value *Z = B1->getOperand(0), *W = B1->getOperand(1);
14271425
if (BinaryOperator::isCommutative(Opcode) && X != Z && Y != W)
14281426
std::swap(X, Y);
14291427

1430-
Value *Shuf0, *Shuf1;
1428+
auto ConvertToUnary = [NumSrcElts](int &M) {
1429+
if (M >= (int)NumSrcElts)
1430+
M -= NumSrcElts;
1431+
};
1432+
1433+
SmallVector<int> NewMask0(OldMask.begin(), OldMask.end());
1434+
TargetTransformInfo::ShuffleKind SK0 = TargetTransformInfo::SK_PermuteTwoSrc;
14311435
if (X == Z) {
1432-
// shuf (bo X, Y), (bo X, W) --> bo (shuf X), (shuf Y, W)
1433-
Shuf0 = Builder.CreateShuffleVector(X, UnaryMask);
1434-
Shuf1 = Builder.CreateShuffleVector(Y, W, Mask);
1435-
} else if (Y == W) {
1436-
// shuf (bo X, Y), (bo Z, Y) --> bo (shuf X, Z), (shuf Y)
1437-
Shuf0 = Builder.CreateShuffleVector(X, Z, Mask);
1438-
Shuf1 = Builder.CreateShuffleVector(Y, UnaryMask);
1439-
} else {
1440-
return false;
1436+
llvm::for_each(NewMask0, ConvertToUnary);
1437+
SK0 = TargetTransformInfo::SK_PermuteSingleSrc;
1438+
Z = PoisonValue::get(BinOpTy);
1439+
}
1440+
1441+
SmallVector<int> NewMask1(OldMask.begin(), OldMask.end());
1442+
TargetTransformInfo::ShuffleKind SK1 = TargetTransformInfo::SK_PermuteTwoSrc;
1443+
if (Y == W) {
1444+
llvm::for_each(NewMask1, ConvertToUnary);
1445+
SK1 = TargetTransformInfo::SK_PermuteSingleSrc;
1446+
W = PoisonValue::get(BinOpTy);
14411447
}
14421448

1449+
// Try to replace a binop with a shuffle if the shuffle is not costly.
1450+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1451+
1452+
InstructionCost OldCost =
1453+
TTI.getArithmeticInstrCost(B0->getOpcode(), BinOpTy) +
1454+
TTI.getArithmeticInstrCost(B1->getOpcode(), BinOpTy) +
1455+
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, BinOpTy,
1456+
OldMask, CostKind, 0, nullptr, std::nullopt, &I);
1457+
1458+
InstructionCost NewCost =
1459+
TTI.getShuffleCost(SK0, BinOpTy, NewMask0, CostKind) +
1460+
TTI.getShuffleCost(SK1, BinOpTy, NewMask1, CostKind) +
1461+
TTI.getArithmeticInstrCost(Opcode, ShuffleDstTy);
1462+
1463+
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
1464+
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
1465+
<< "\n");
1466+
if (NewCost > OldCost)
1467+
return false;
1468+
1469+
Value *Shuf0 = Builder.CreateShuffleVector(X, Z, NewMask0);
1470+
Value *Shuf1 = Builder.CreateShuffleVector(Y, W, NewMask1);
14431471
Value *NewBO = Builder.CreateBinOp(Opcode, Shuf0, Shuf1);
1472+
14441473
// Intersect flags from the old binops.
14451474
if (auto *NewInst = dyn_cast<Instruction>(NewBO)) {
14461475
NewInst->copyIRFlags(B0);
14471476
NewInst->andIRFlags(B1);
14481477
}
14491478

1450-
// TODO: Add Shuf0/Shuf1 to WorkList?
1479+
Worklist.pushValue(Shuf0);
1480+
Worklist.pushValue(Shuf1);
14511481
replaceValue(I, *NewBO);
14521482
return true;
14531483
}

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -564,31 +564,31 @@ define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) {
564564
; CHECK-NEXT: [[STRIDED_VEC40:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 5, i32 13>
565565
; CHECK-NEXT: [[STRIDED_VEC41:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 6, i32 14>
566566
; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 7, i32 15>
567-
; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[STRIDED_VEC35]], [[TMP2]]
568-
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[STRIDED_VEC27]], [[BROADCAST_SPLAT]]
569-
; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[STRIDED_VEC36]], [[TMP5]]
570-
; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x double> [[STRIDED_VEC28]], [[BROADCAST_SPLAT]]
571-
; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <2 x double> [[STRIDED_VEC37]], [[TMP7]]
572-
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <2 x double> [[STRIDED_VEC29]], [[BROADCAST_SPLAT]]
573-
; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[STRIDED_VEC38]], [[TMP9]]
574-
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[STRIDED_VEC30]], [[BROADCAST_SPLAT]]
575-
; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[STRIDED_VEC39]], [[TMP11]]
576-
; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x double> [[STRIDED_VEC31]], [[BROADCAST_SPLAT]]
577-
; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[STRIDED_VEC40]], [[TMP13]]
578-
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[STRIDED_VEC32]], [[BROADCAST_SPLAT]]
579-
; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <2 x double> [[STRIDED_VEC41]], [[TMP15]]
567+
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[STRIDED_VEC27]], [[BROADCAST_SPLAT]]
568+
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[STRIDED_VEC28]], [[BROADCAST_SPLAT]]
569+
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x double> [[STRIDED_VEC29]], [[BROADCAST_SPLAT]]
570+
; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x double> [[STRIDED_VEC30]], [[BROADCAST_SPLAT]]
571+
; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[STRIDED_VEC31]], [[BROADCAST_SPLAT]]
572+
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <2 x double> [[STRIDED_VEC32]], [[BROADCAST_SPLAT]]
580573
; CHECK-NEXT: [[TMP17:%.*]] = or disjoint i64 [[TMP0]], 7
581-
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <2 x double> [[STRIDED_VEC33]], [[BROADCAST_SPLAT]]
574+
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[STRIDED_VEC33]], [[BROADCAST_SPLAT]]
582575
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP17]]
583-
; CHECK-NEXT: [[TMP20:%.*]] = fadd fast <2 x double> [[STRIDED_VEC42]], [[TMP18]]
584576
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 -56
585-
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
586-
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
587-
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
588-
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x double> [[TMP16]], <2 x double> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
589-
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x double> [[TMP22]], <4 x double> [[TMP23]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
590-
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> [[TMP25]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
591-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> [[TMP27]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
577+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[STRIDED_VEC36]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
578+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[STRIDED_VEC35]], <2 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
579+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[STRIDED_VEC38]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
580+
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x double> [[STRIDED_VEC37]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
581+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[STRIDED_VEC40]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
582+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[STRIDED_VEC39]], <2 x double> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
583+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> [[STRIDED_VEC42]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
584+
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x double> [[STRIDED_VEC41]], <2 x double> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
585+
; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
586+
; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> [[TMP18]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
587+
; CHECK-NEXT: [[TMP24:%.*]] = fadd fast <8 x double> [[TMP31]], [[TMP32]]
588+
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> [[TMP20]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
589+
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x double> [[TMP22]], <4 x double> [[TMP28]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
590+
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <8 x double> [[TMP25]], [[TMP26]]
591+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> [[TMP27]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
592592
; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP21]], align 8
593593
; CHECK-NEXT: ret void
594594
;

llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,14 @@ define <4 x i32> @shuf_shl_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
8484
ret <4 x i32> %r
8585
}
8686

87-
; negative test - common operand, but not commutable
87+
; common operand, but not commutable (expensive vector shift)
8888

8989
define <4 x i32> @shuf_shl_v4i32_xx_swap(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
9090
; CHECK-LABEL: define <4 x i32> @shuf_shl_v4i32_xx_swap(
9191
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
92-
; CHECK-NEXT: [[B0:%.*]] = shl <4 x i32> [[X]], [[Y]]
93-
; CHECK-NEXT: [[B1:%.*]] = shl <4 x i32> [[Z]], [[X]]
94-
; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
92+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
93+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
94+
; CHECK-NEXT: [[R1:%.*]] = shl <4 x i32> [[TMP1]], [[TMP2]]
9595
; CHECK-NEXT: ret <4 x i32> [[R1]]
9696
;
9797
%b0 = shl <4 x i32> %x, %y
@@ -116,15 +116,22 @@ define <2 x i64> @shuf_sub_add_v2i64_yy(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z
116116
ret <2 x i64> %r
117117
}
118118

119-
; negative test - type change via shuffle
119+
; widen vector (SSE - cheaper fmul vs AVX - cheaper shuffle)
120120

121121
define <8 x float> @shuf_fmul_v4f32_xx_type(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
122-
; CHECK-LABEL: define <8 x float> @shuf_fmul_v4f32_xx_type(
123-
; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
124-
; CHECK-NEXT: [[B0:%.*]] = fmul <4 x float> [[X]], [[Y]]
125-
; CHECK-NEXT: [[B1:%.*]] = fmul <4 x float> [[Z]], [[X]]
126-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 0, i32 1, i32 1, i32 6>
127-
; CHECK-NEXT: ret <8 x float> [[R]]
122+
; SSE-LABEL: define <8 x float> @shuf_fmul_v4f32_xx_type(
123+
; SSE-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
124+
; SSE-NEXT: [[B0:%.*]] = fmul <4 x float> [[X]], [[Y]]
125+
; SSE-NEXT: [[B1:%.*]] = fmul <4 x float> [[Z]], [[X]]
126+
; SSE-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 0, i32 1, i32 1, i32 6>
127+
; SSE-NEXT: ret <8 x float> [[R]]
128+
;
129+
; AVX-LABEL: define <8 x float> @shuf_fmul_v4f32_xx_type(
130+
; AVX-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
131+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 0, i32 1, i32 1, i32 6>
132+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <8 x i32> <i32 0, i32 3, i32 0, i32 3, i32 0, i32 1, i32 1, i32 2>
133+
; AVX-NEXT: [[R:%.*]] = fmul <8 x float> [[TMP1]], [[TMP2]]
134+
; AVX-NEXT: ret <8 x float> [[R]]
128135
;
129136
%b0 = fmul <4 x float> %x, %y
130137
%b1 = fmul <4 x float> %z, %x
@@ -168,15 +175,22 @@ define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
168175
ret <4 x i32> %r
169176
}
170177

171-
; negative test - must have matching operand
178+
; must have matching operand (SSE - cheaper shuffle vs AVX - cheaper fadd)
172179

173180
define <4 x float> @shuf_fadd_v4f32_no_common_op(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) {
174-
; CHECK-LABEL: define <4 x float> @shuf_fadd_v4f32_no_common_op(
175-
; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]], <4 x float> [[W:%.*]]) #[[ATTR0]] {
176-
; CHECK-NEXT: [[B0:%.*]] = fadd <4 x float> [[X]], [[Y]]
177-
; CHECK-NEXT: [[B1:%.*]] = fadd <4 x float> [[Z]], [[W]]
178-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
179-
; CHECK-NEXT: ret <4 x float> [[R]]
181+
; SSE-LABEL: define <4 x float> @shuf_fadd_v4f32_no_common_op(
182+
; SSE-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]], <4 x float> [[W:%.*]]) #[[ATTR0]] {
183+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
184+
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
185+
; SSE-NEXT: [[R:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
186+
; SSE-NEXT: ret <4 x float> [[R]]
187+
;
188+
; AVX-LABEL: define <4 x float> @shuf_fadd_v4f32_no_common_op(
189+
; AVX-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]], <4 x float> [[W:%.*]]) #[[ATTR0]] {
190+
; AVX-NEXT: [[B0:%.*]] = fadd <4 x float> [[X]], [[Y]]
191+
; AVX-NEXT: [[B1:%.*]] = fadd <4 x float> [[Z]], [[W]]
192+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
193+
; AVX-NEXT: ret <4 x float> [[R]]
180194
;
181195
%b0 = fadd <4 x float> %x, %y
182196
%b1 = fadd <4 x float> %z, %w

0 commit comments

Comments
 (0)