Skip to content

Commit 59a67ea

Browse files
committed
[SLP]Improve costs in computeExtractCost() to avoid crash after D158449.
Need to consider the length of the original vector for extractelements, not the length, matched number of the scalars. It fixes 2 issues: 1) improves cost estimation; 2) Fixes crashes after D158449.
1 parent 6bbccc0 commit 59a67ea

14 files changed

+484
-476
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 58 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7084,30 +7084,58 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
70847084
/// extracted values from \p VL.
70857085
InstructionCost computeExtractCost(ArrayRef<Value *> VL, ArrayRef<int> Mask,
70867086
TTI::ShuffleKind ShuffleKind) {
7087-
auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
7087+
unsigned NumElts = 0;
7088+
for (Value *V : VL) {
7089+
auto *EE = dyn_cast<ExtractElementInst>(V);
7090+
if (!EE)
7091+
continue;
7092+
auto *VecTy = cast<FixedVectorType>(EE->getVectorOperandType());
7093+
NumElts = std::max(NumElts, VecTy->getNumElements());
7094+
}
7095+
assert(NumElts > 1 &&
7096+
"Expected at least 2-element fixed length vector(s).");
7097+
auto *VecTy = FixedVectorType::get(VL.front()->getType(), NumElts);
70887098
unsigned NumOfParts = TTI.getNumberOfParts(VecTy);
7089-
7090-
if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc ||
7091-
!NumOfParts || VecTy->getNumElements() < NumOfParts)
7099+
if (!NumOfParts || NumElts < NumOfParts)
7100+
return TTI.getShuffleCost(ShuffleKind, VecTy, Mask);
7101+
unsigned EltsPerVector = PowerOf2Ceil(divideCeil(NumElts, NumOfParts));
7102+
int ValNum = -1;
7103+
int ValIdx = -1;
7104+
// Check that if trying to permute 2 input vectors (which may result in
7105+
// several vector registers), each per-register subvector is the result of
7106+
// the permutation of 2 single registers.
7107+
if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc &&
7108+
!all_of(enumerate(Mask), [&](auto &&Arg) {
7109+
if (Arg.value() == PoisonMaskElem)
7110+
return true;
7111+
int CurValNum = (Arg.value() % NumElts) / EltsPerVector;
7112+
int CurValIdx = Arg.index() / EltsPerVector;
7113+
if (ValIdx != CurValIdx) {
7114+
ValIdx = CurValIdx;
7115+
ValNum = CurValNum;
7116+
return true;
7117+
}
7118+
return CurValNum == ValNum;
7119+
}))
70927120
return TTI.getShuffleCost(ShuffleKind, VecTy, Mask);
70937121

7094-
bool AllConsecutive = true;
7095-
unsigned EltsPerVector = VecTy->getNumElements() / NumOfParts;
7096-
unsigned Idx = -1;
70977122
InstructionCost Cost = 0;
70987123

70997124
// Process extracts in blocks of EltsPerVector to check if the source vector
71007125
// operand can be re-used directly. If not, add the cost of creating a
71017126
// shuffle to extract the values into a vector register.
7127+
auto *RegisterVecTy =
7128+
FixedVectorType::get(VL.front()->getType(), EltsPerVector);
71027129
SmallVector<int> RegMask(EltsPerVector, PoisonMaskElem);
7103-
for (auto *V : VL) {
7104-
++Idx;
7105-
7130+
TTI::ShuffleKind RegisterSK = TargetTransformInfo::SK_PermuteSingleSrc;
7131+
Value *VecBase = nullptr;
7132+
bool IsIdentity = true;
7133+
for (auto [Idx, V] : enumerate(VL)) {
71067134
// Reached the start of a new vector registers.
71077135
if (Idx % EltsPerVector == 0) {
71087136
RegMask.assign(EltsPerVector, PoisonMaskElem);
7109-
AllConsecutive = true;
7110-
continue;
7137+
RegisterSK = TargetTransformInfo::SK_PermuteSingleSrc;
7138+
VecBase = nullptr;
71117139
}
71127140

71137141
// Need to exclude undefs from analysis.
@@ -7117,14 +7145,26 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
71177145
// Check all extracts for a vector register on the target directly
71187146
// extract values in order.
71197147
unsigned CurrentIdx = *getExtractIndex(cast<Instruction>(V));
7120-
if (!isa<UndefValue>(VL[Idx - 1]) && Mask[Idx - 1] != PoisonMaskElem) {
7121-
unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
7122-
AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
7123-
CurrentIdx % EltsPerVector == Idx % EltsPerVector;
7148+
unsigned PrevIdx = CurrentIdx;
7149+
if (Idx % EltsPerVector != 0 && !isa<UndefValue>(VL[Idx - 1]) &&
7150+
Mask[Idx - 1] != PoisonMaskElem)
7151+
PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1])) + 1;
7152+
if (!VecBase) {
7153+
VecBase = cast<ExtractElementInst>(V)->getVectorOperand();
7154+
RegMask[Idx % EltsPerVector] = CurrentIdx % EltsPerVector;
7155+
IsIdentity = CurrentIdx % EltsPerVector == Idx % EltsPerVector;
7156+
} else if (VecBase != cast<ExtractElementInst>(V)->getVectorOperand()) {
7157+
IsIdentity = false;
7158+
RegisterSK = TargetTransformInfo::SK_PermuteTwoSrc;
7159+
RegMask[Idx % EltsPerVector] =
7160+
CurrentIdx % EltsPerVector + EltsPerVector;
7161+
} else {
7162+
IsIdentity &= PrevIdx == CurrentIdx &&
7163+
CurrentIdx % EltsPerVector == Idx % EltsPerVector;
71247164
RegMask[Idx % EltsPerVector] = CurrentIdx % EltsPerVector;
71257165
}
71267166

7127-
if (AllConsecutive)
7167+
if (IsIdentity)
71287168
continue;
71297169

71307170
// Skip all indices, except for the last index per vector block.
@@ -7134,10 +7174,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
71347174
// If we have a series of extracts which are not consecutive and hence
71357175
// cannot re-use the source vector register directly, compute the shuffle
71367176
// cost to extract the vector with EltsPerVector elements.
7137-
Cost += TTI.getShuffleCost(
7138-
TargetTransformInfo::SK_PermuteSingleSrc,
7139-
FixedVectorType::get(VecTy->getElementType(), EltsPerVector),
7140-
RegMask);
7177+
Cost += TTI.getShuffleCost(RegisterSK, RegisterVecTy, RegMask);
71417178
}
71427179
return Cost;
71437180
}

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,14 +291,19 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
291291
;
292292
; GFX8-LABEL: @uadd_sat_v4i16(
293293
; GFX8-NEXT: bb:
294-
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
295-
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
294+
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2
295+
; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
296+
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2
297+
; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
298+
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
299+
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
296300
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
297-
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
298-
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
299-
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
300-
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
301-
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
301+
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
302+
; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
303+
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
304+
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
305+
; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
306+
; GFX8-NEXT: ret <4 x i16> [[INS_3]]
302307
;
303308
bb:
304309
%arg0.0 = extractelement <4 x i16> %arg0, i64 0

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,14 +291,19 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
291291
;
292292
; GFX8-LABEL: @uadd_sat_v4i16(
293293
; GFX8-NEXT: bb:
294-
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
295-
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
294+
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2
295+
; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
296+
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2
297+
; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
298+
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
299+
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
296300
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
297-
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
298-
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
299-
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
300-
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
301-
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
301+
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
302+
; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
303+
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
304+
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
305+
; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
306+
; GFX8-NEXT: ret <4 x i16> [[INS_3]]
302307
;
303308
bb:
304309
%arg0.0 = extractelement <4 x i16> %arg0, i64 0

llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,15 @@
44
define <2 x i16> @uadd_sat_v9i16_combine_vi16(<9 x i16> %arg0, <9 x i16> %arg1) {
55
; CHECK-LABEL: @uadd_sat_v9i16_combine_vi16(
66
; CHECK-NEXT: bb:
7-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x i16> [[ARG0:%.*]], <9 x i16> poison, <2 x i32> <i32 poison, i32 8>
8-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <9 x i16> [[ARG1:%.*]], <9 x i16> poison, <2 x i32> <i32 7, i32 8>
9-
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
10-
; CHECK-NEXT: ret <2 x i16> [[TMP2]]
7+
; CHECK-NEXT: [[ARG0_1:%.*]] = extractelement <9 x i16> undef, i64 7
8+
; CHECK-NEXT: [[ARG0_2:%.*]] = extractelement <9 x i16> [[ARG0:%.*]], i64 8
9+
; CHECK-NEXT: [[ARG1_1:%.*]] = extractelement <9 x i16> [[ARG1:%.*]], i64 7
10+
; CHECK-NEXT: [[ARG1_2:%.*]] = extractelement <9 x i16> [[ARG1]], i64 8
11+
; CHECK-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
12+
; CHECK-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
13+
; CHECK-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_1]], i64 0
14+
; CHECK-NEXT: [[INS_2:%.*]] = insertelement <2 x i16> [[INS_1]], i16 [[ADD_2]], i64 1
15+
; CHECK-NEXT: ret <2 x i16> [[INS_2]]
1116
;
1217
bb:
1318
%arg0.1 = extractelement <9 x i16> undef, i64 7

llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,23 @@
44
define <4 x half> @phis(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
55
; CHECK-LABEL: @phis(
66
; CHECK-NEXT: entry:
7-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
8-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
7+
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2
8+
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
9+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
910
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
1011
; CHECK: bb0:
11-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
12-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
12+
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2
13+
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
14+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
1315
; CHECK-NEXT: br label [[BB1]]
1416
; CHECK: bb1:
15-
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
16-
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
17-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
18-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
19-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
20-
; CHECK-NEXT: ret <4 x half> [[TMP8]]
17+
; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY:%.*]] ], [ [[B2]], [[BB0]] ]
18+
; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY]] ], [ [[B3]], [[BB0]] ]
19+
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ]
20+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
21+
; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2
22+
; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3
23+
; CHECK-NEXT: ret <4 x half> [[O3]]
2124
;
2225
entry:
2326
%a0 = extractelement <4 x half> %in1, i64 0
@@ -49,20 +52,23 @@ bb1:
4952
define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
5053
; CHECK-LABEL: @phis_reverse(
5154
; CHECK-NEXT: entry:
52-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
53-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
55+
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2
56+
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
57+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
5458
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
5559
; CHECK: bb0:
56-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
57-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
60+
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2
61+
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
62+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
5863
; CHECK-NEXT: br label [[BB1]]
5964
; CHECK: bb1:
60-
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
61-
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
62-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
63-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
64-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
65-
; CHECK-NEXT: ret <4 x half> [[TMP8]]
65+
; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY:%.*]] ], [ [[B3]], [[BB0]] ]
66+
; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY]] ], [ [[B2]], [[BB0]] ]
67+
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ]
68+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
69+
; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2
70+
; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3
71+
; CHECK-NEXT: ret <4 x half> [[O3]]
6672
;
6773
entry:
6874
%a0 = extractelement <4 x half> %in1, i64 0

0 commit comments

Comments
 (0)