Skip to content

Commit 51b88fa

Browse files
committed
[VPlan] Explicit pack
1 parent c455f4a commit 51b88fa

20 files changed

+526
-471
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -360,17 +360,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
360360
// resulting vectors are stored in State, we will only generate the
361361
// insertelements once.
362362
Value *VectorValue = nullptr;
363-
if (IsSingleScalar) {
364-
VectorValue = GetBroadcastInstrs(ScalarValue);
365-
set(Def, VectorValue);
366-
} else {
367-
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
368-
// Initialize packing with insertelements to start from poison.
369-
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
370-
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
371-
VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane);
372-
set(Def, VectorValue);
373-
}
363+
assert(IsSingleScalar && "replicates must be packed explicitly");
364+
VectorValue = GetBroadcastInstrs(ScalarValue);
365+
set(Def, VectorValue);
374366
Builder.restoreIP(OldIP);
375367
return VectorValue;
376368
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
970970
// Creates a step vector starting from 0 to VF with a step of 1.
971971
StepVector,
972972

973+
Pack,
974+
973975
};
974976

975977
private:
@@ -979,14 +981,6 @@ class VPInstruction : public VPRecipeWithIRFlags,
979981
/// An optional name that can be used for the generated IR instruction.
980982
const std::string Name;
981983

982-
/// Returns true if this VPInstruction generates scalar values for all lanes.
983-
/// Most VPInstructions generate a single value per part, either vector or
984-
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
985-
/// values per all lanes, stemming from an original ingredient. This method
986-
/// identifies the (rare) cases of VPInstructions that do so as well, w/o an
987-
/// underlying ingredient.
988-
bool doesGeneratePerAllLanes() const;
989-
990984
/// Returns true if we can generate a scalar for the first lane only if
991985
/// needed.
992986
bool canGenerateScalarForFirstLane() const;
@@ -1080,6 +1074,14 @@ class VPInstruction : public VPRecipeWithIRFlags,
10801074
/// result is also a single scalar.
10811075
bool isSingleScalar() const;
10821076

1077+
/// Returns true if this VPInstruction generates scalar values for all lanes.
1078+
/// Most VPInstructions generate a single value per part, either vector or
1079+
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1080+
/// values per all lanes, stemming from an original ingredient. This method
1081+
/// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1082+
/// underlying ingredient.
1083+
bool doesGeneratePerAllLanes() const;
1084+
10831085
/// Returns the symbolic name assigned to the VPInstruction.
10841086
StringRef getName() const { return Name; }
10851087
};

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
100100
case VPInstruction::ExplicitVectorLength:
101101
return Type::getIntNTy(Ctx, 32);
102102
case Instruction::PHI:
103+
case VPInstruction::Pack:
103104
// Infer the type of first operand only, as other operands of header phi's
104105
// may lead to infinite recursion.
105106
return inferScalarType(R->getOperand(0));
@@ -440,6 +441,9 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
440441
if (!VPBB->getParent())
441442
break;
442443
for (VPRecipeBase &R : *VPBB) {
444+
if (isa<VPInstruction>(&R) &&
445+
cast<VPInstruction>(&R)->getOpcode() == VPInstruction::Pack)
446+
continue;
443447
Idx2Recipe.push_back(&R);
444448

445449
// Save the end location of each USE.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,15 @@ Value *VPInstruction::generate(VPTransformState &State) {
766766
return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask,
767767
true, Name);
768768
}
769+
case VPInstruction::Pack: {
770+
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
771+
Value *WideValue = PoisonValue::get(
772+
toVectorizedTy(State.TypeAnalysis.inferScalarType(this), State.VF));
773+
for (unsigned Lane = 0; Lane < State.VF.getFixedValue(); ++Lane)
774+
WideValue =
775+
State.packScalarIntoVectorizedValue(getOperand(0), WideValue, Lane);
776+
return WideValue;
777+
}
769778
default:
770779
llvm_unreachable("Unsupported opcode for instruction");
771780
}
@@ -894,10 +903,11 @@ void VPInstruction::execute(VPTransformState &State) {
894903
if (!hasResult())
895904
return;
896905
assert(GeneratedValue && "generate must produce a value");
897-
assert(
898-
(GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
899-
State.VF.isScalar()) &&
900-
"scalar value but not only first lane defined");
906+
assert(((GeneratedValue->getType()->isVectorTy() ||
907+
GeneratedValue->getType()->isStructTy()) ==
908+
!GeneratesPerFirstLaneOnly ||
909+
State.VF.isScalar()) &&
910+
"scalar value but not only first lane defined");
901911
State.set(this, GeneratedValue,
902912
/*IsScalar*/ GeneratesPerFirstLaneOnly);
903913
}
@@ -923,6 +933,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
923933
case VPInstruction::WideIVStep:
924934
case VPInstruction::StepVector:
925935
case VPInstruction::ReductionStartVector:
936+
case VPInstruction::Pack:
926937
return false;
927938
default:
928939
return true;
@@ -1063,6 +1074,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
10631074
case VPInstruction::ReductionStartVector:
10641075
O << "reduction-start-vector";
10651076
break;
1077+
case VPInstruction::Pack:
1078+
O << "pack-into-vector";
1079+
break;
10661080
default:
10671081
O << Instruction::getOpcodeName(getOpcode());
10681082
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,6 +1895,34 @@ static void removeBranchOnConst(VPlan &Plan) {
18951895
}
18961896
}
18971897

1898+
static void materializePack(VPlan &Plan) {
1899+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1900+
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
1901+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1902+
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
1903+
if (!(RepR && !RepR->isSingleScalar()) &&
1904+
!(isa<VPInstruction>(&R) &&
1905+
cast<VPInstruction>(&R)->doesGeneratePerAllLanes()))
1906+
continue;
1907+
auto *Def = cast<VPSingleDefRecipe>(&R);
1908+
if (all_of(Def->users(),
1909+
[Def](VPUser *U) { return U->usesScalars(Def); }))
1910+
continue;
1911+
1912+
auto *Pack = new VPInstruction(VPInstruction::Pack, {Def});
1913+
Pack->insertAfter(Def);
1914+
Def->replaceUsesWithIf(Pack, [Pack, Def](VPUser &U, unsigned) {
1915+
return &U != Pack && !U.usesScalars(Def) &&
1916+
(!isa<VPInstruction>(&U) ||
1917+
(cast<VPInstruction>(&U)->getOpcode() !=
1918+
VPInstruction::ExtractLastElement &&
1919+
cast<VPInstruction>(&U)->getOpcode() !=
1920+
VPInstruction::ExtractPenultimateElement));
1921+
});
1922+
}
1923+
}
1924+
}
1925+
18981926
void VPlanTransforms::optimize(VPlan &Plan) {
18991927
runPass(removeRedundantCanonicalIVs, Plan);
19001928
runPass(removeRedundantInductionCasts, Plan);
@@ -1912,6 +1940,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
19121940
runPass(createAndOptimizeReplicateRegions, Plan);
19131941
runPass(mergeBlocksIntoPredecessors, Plan);
19141942
runPass(licm, Plan);
1943+
runPass(materializePack, Plan);
19151944
}
19161945

19171946
// Add a VPActiveLaneMaskPHIRecipe and related recipes to \p Plan and replace

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
2020
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
2121
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]]
2222
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]]
23-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
24-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1
2523
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP2]]
2624
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP3]]
25+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
26+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1
2727
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0
2828
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1
2929
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP5]], zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,10 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
109109
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
110110
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]]
111111
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
112-
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
113-
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
114112
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
115113
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
114+
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
115+
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
116116
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
117117
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP10]], i32 1
118118
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -306,22 +306,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
306306
; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP50]], align 2
307307
; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP51]], align 2
308308
; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP52]], align 2
309-
; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = insertelement <16 x i16> poison, i16 [[TMP69]], i32 0
310-
; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = insertelement <16 x i16> [[TMP85]], i16 [[TMP70]], i32 1
311-
; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <16 x i16> [[TMP86]], i16 [[TMP71]], i32 2
312-
; CHECK-INTERLEAVED-NEXT: [[TMP88:%.*]] = insertelement <16 x i16> [[TMP87]], i16 [[TMP72]], i32 3
313-
; CHECK-INTERLEAVED-NEXT: [[TMP89:%.*]] = insertelement <16 x i16> [[TMP88]], i16 [[TMP73]], i32 4
314-
; CHECK-INTERLEAVED-NEXT: [[TMP90:%.*]] = insertelement <16 x i16> [[TMP89]], i16 [[TMP74]], i32 5
315-
; CHECK-INTERLEAVED-NEXT: [[TMP91:%.*]] = insertelement <16 x i16> [[TMP90]], i16 [[TMP75]], i32 6
316-
; CHECK-INTERLEAVED-NEXT: [[TMP92:%.*]] = insertelement <16 x i16> [[TMP91]], i16 [[TMP76]], i32 7
317-
; CHECK-INTERLEAVED-NEXT: [[TMP93:%.*]] = insertelement <16 x i16> [[TMP92]], i16 [[TMP77]], i32 8
318-
; CHECK-INTERLEAVED-NEXT: [[TMP94:%.*]] = insertelement <16 x i16> [[TMP93]], i16 [[TMP78]], i32 9
319-
; CHECK-INTERLEAVED-NEXT: [[TMP95:%.*]] = insertelement <16 x i16> [[TMP94]], i16 [[TMP79]], i32 10
320-
; CHECK-INTERLEAVED-NEXT: [[TMP96:%.*]] = insertelement <16 x i16> [[TMP95]], i16 [[TMP80]], i32 11
321-
; CHECK-INTERLEAVED-NEXT: [[TMP97:%.*]] = insertelement <16 x i16> [[TMP96]], i16 [[TMP81]], i32 12
322-
; CHECK-INTERLEAVED-NEXT: [[TMP98:%.*]] = insertelement <16 x i16> [[TMP97]], i16 [[TMP82]], i32 13
323-
; CHECK-INTERLEAVED-NEXT: [[TMP99:%.*]] = insertelement <16 x i16> [[TMP98]], i16 [[TMP83]], i32 14
324-
; CHECK-INTERLEAVED-NEXT: [[TMP100:%.*]] = insertelement <16 x i16> [[TMP99]], i16 [[TMP84]], i32 15
325309
; CHECK-INTERLEAVED-NEXT: [[TMP101:%.*]] = load i16, ptr [[TMP53]], align 2
326310
; CHECK-INTERLEAVED-NEXT: [[TMP102:%.*]] = load i16, ptr [[TMP54]], align 2
327311
; CHECK-INTERLEAVED-NEXT: [[TMP103:%.*]] = load i16, ptr [[TMP55]], align 2
@@ -338,6 +322,22 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
338322
; CHECK-INTERLEAVED-NEXT: [[TMP114:%.*]] = load i16, ptr [[TMP66]], align 2
339323
; CHECK-INTERLEAVED-NEXT: [[TMP115:%.*]] = load i16, ptr [[TMP67]], align 2
340324
; CHECK-INTERLEAVED-NEXT: [[TMP116:%.*]] = load i16, ptr [[TMP68]], align 2
325+
; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = insertelement <16 x i16> poison, i16 [[TMP69]], i32 0
326+
; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = insertelement <16 x i16> [[TMP85]], i16 [[TMP70]], i32 1
327+
; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <16 x i16> [[TMP86]], i16 [[TMP71]], i32 2
328+
; CHECK-INTERLEAVED-NEXT: [[TMP88:%.*]] = insertelement <16 x i16> [[TMP87]], i16 [[TMP72]], i32 3
329+
; CHECK-INTERLEAVED-NEXT: [[TMP89:%.*]] = insertelement <16 x i16> [[TMP88]], i16 [[TMP73]], i32 4
330+
; CHECK-INTERLEAVED-NEXT: [[TMP90:%.*]] = insertelement <16 x i16> [[TMP89]], i16 [[TMP74]], i32 5
331+
; CHECK-INTERLEAVED-NEXT: [[TMP91:%.*]] = insertelement <16 x i16> [[TMP90]], i16 [[TMP75]], i32 6
332+
; CHECK-INTERLEAVED-NEXT: [[TMP92:%.*]] = insertelement <16 x i16> [[TMP91]], i16 [[TMP76]], i32 7
333+
; CHECK-INTERLEAVED-NEXT: [[TMP93:%.*]] = insertelement <16 x i16> [[TMP92]], i16 [[TMP77]], i32 8
334+
; CHECK-INTERLEAVED-NEXT: [[TMP94:%.*]] = insertelement <16 x i16> [[TMP93]], i16 [[TMP78]], i32 9
335+
; CHECK-INTERLEAVED-NEXT: [[TMP95:%.*]] = insertelement <16 x i16> [[TMP94]], i16 [[TMP79]], i32 10
336+
; CHECK-INTERLEAVED-NEXT: [[TMP96:%.*]] = insertelement <16 x i16> [[TMP95]], i16 [[TMP80]], i32 11
337+
; CHECK-INTERLEAVED-NEXT: [[TMP97:%.*]] = insertelement <16 x i16> [[TMP96]], i16 [[TMP81]], i32 12
338+
; CHECK-INTERLEAVED-NEXT: [[TMP98:%.*]] = insertelement <16 x i16> [[TMP97]], i16 [[TMP82]], i32 13
339+
; CHECK-INTERLEAVED-NEXT: [[TMP99:%.*]] = insertelement <16 x i16> [[TMP98]], i16 [[TMP83]], i32 14
340+
; CHECK-INTERLEAVED-NEXT: [[TMP100:%.*]] = insertelement <16 x i16> [[TMP99]], i16 [[TMP84]], i32 15
341341
; CHECK-INTERLEAVED-NEXT: [[TMP117:%.*]] = insertelement <16 x i16> poison, i16 [[TMP101]], i32 0
342342
; CHECK-INTERLEAVED-NEXT: [[TMP118:%.*]] = insertelement <16 x i16> [[TMP117]], i16 [[TMP102]], i32 1
343343
; CHECK-INTERLEAVED-NEXT: [[TMP119:%.*]] = insertelement <16 x i16> [[TMP118]], i16 [[TMP103]], i32 2

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -655,22 +655,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
655655
; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP52]], align 2
656656
; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP53]], align 2
657657
; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP54]], align 2
658-
; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = insertelement <16 x i16> poison, i16 [[TMP69]], i32 0
659-
; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = insertelement <16 x i16> [[TMP85]], i16 [[TMP70]], i32 1
660-
; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <16 x i16> [[TMP86]], i16 [[TMP71]], i32 2
661-
; CHECK-INTERLEAVED-NEXT: [[TMP88:%.*]] = insertelement <16 x i16> [[TMP87]], i16 [[TMP72]], i32 3
662-
; CHECK-INTERLEAVED-NEXT: [[TMP89:%.*]] = insertelement <16 x i16> [[TMP88]], i16 [[TMP73]], i32 4
663-
; CHECK-INTERLEAVED-NEXT: [[TMP90:%.*]] = insertelement <16 x i16> [[TMP89]], i16 [[TMP74]], i32 5
664-
; CHECK-INTERLEAVED-NEXT: [[TMP91:%.*]] = insertelement <16 x i16> [[TMP90]], i16 [[TMP75]], i32 6
665-
; CHECK-INTERLEAVED-NEXT: [[TMP92:%.*]] = insertelement <16 x i16> [[TMP91]], i16 [[TMP76]], i32 7
666-
; CHECK-INTERLEAVED-NEXT: [[TMP93:%.*]] = insertelement <16 x i16> [[TMP92]], i16 [[TMP77]], i32 8
667-
; CHECK-INTERLEAVED-NEXT: [[TMP94:%.*]] = insertelement <16 x i16> [[TMP93]], i16 [[TMP78]], i32 9
668-
; CHECK-INTERLEAVED-NEXT: [[TMP95:%.*]] = insertelement <16 x i16> [[TMP94]], i16 [[TMP79]], i32 10
669-
; CHECK-INTERLEAVED-NEXT: [[TMP96:%.*]] = insertelement <16 x i16> [[TMP95]], i16 [[TMP80]], i32 11
670-
; CHECK-INTERLEAVED-NEXT: [[TMP97:%.*]] = insertelement <16 x i16> [[TMP96]], i16 [[TMP81]], i32 12
671-
; CHECK-INTERLEAVED-NEXT: [[TMP98:%.*]] = insertelement <16 x i16> [[TMP97]], i16 [[TMP82]], i32 13
672-
; CHECK-INTERLEAVED-NEXT: [[TMP99:%.*]] = insertelement <16 x i16> [[TMP98]], i16 [[TMP83]], i32 14
673-
; CHECK-INTERLEAVED-NEXT: [[TMP100:%.*]] = insertelement <16 x i16> [[TMP99]], i16 [[TMP84]], i32 15
674658
; CHECK-INTERLEAVED-NEXT: [[TMP101:%.*]] = load i16, ptr [[TMP55]], align 2
675659
; CHECK-INTERLEAVED-NEXT: [[TMP102:%.*]] = load i16, ptr [[TMP56]], align 2
676660
; CHECK-INTERLEAVED-NEXT: [[TMP103:%.*]] = load i16, ptr [[TMP57]], align 2
@@ -687,6 +671,22 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
687671
; CHECK-INTERLEAVED-NEXT: [[TMP114:%.*]] = load i16, ptr [[TMP68]], align 2
688672
; CHECK-INTERLEAVED-NEXT: [[TMP115:%.*]] = load i16, ptr [[TMP139]], align 2
689673
; CHECK-INTERLEAVED-NEXT: [[TMP116:%.*]] = load i16, ptr [[TMP140]], align 2
674+
; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = insertelement <16 x i16> poison, i16 [[TMP69]], i32 0
675+
; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = insertelement <16 x i16> [[TMP85]], i16 [[TMP70]], i32 1
676+
; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <16 x i16> [[TMP86]], i16 [[TMP71]], i32 2
677+
; CHECK-INTERLEAVED-NEXT: [[TMP88:%.*]] = insertelement <16 x i16> [[TMP87]], i16 [[TMP72]], i32 3
678+
; CHECK-INTERLEAVED-NEXT: [[TMP89:%.*]] = insertelement <16 x i16> [[TMP88]], i16 [[TMP73]], i32 4
679+
; CHECK-INTERLEAVED-NEXT: [[TMP90:%.*]] = insertelement <16 x i16> [[TMP89]], i16 [[TMP74]], i32 5
680+
; CHECK-INTERLEAVED-NEXT: [[TMP91:%.*]] = insertelement <16 x i16> [[TMP90]], i16 [[TMP75]], i32 6
681+
; CHECK-INTERLEAVED-NEXT: [[TMP92:%.*]] = insertelement <16 x i16> [[TMP91]], i16 [[TMP76]], i32 7
682+
; CHECK-INTERLEAVED-NEXT: [[TMP93:%.*]] = insertelement <16 x i16> [[TMP92]], i16 [[TMP77]], i32 8
683+
; CHECK-INTERLEAVED-NEXT: [[TMP94:%.*]] = insertelement <16 x i16> [[TMP93]], i16 [[TMP78]], i32 9
684+
; CHECK-INTERLEAVED-NEXT: [[TMP95:%.*]] = insertelement <16 x i16> [[TMP94]], i16 [[TMP79]], i32 10
685+
; CHECK-INTERLEAVED-NEXT: [[TMP96:%.*]] = insertelement <16 x i16> [[TMP95]], i16 [[TMP80]], i32 11
686+
; CHECK-INTERLEAVED-NEXT: [[TMP97:%.*]] = insertelement <16 x i16> [[TMP96]], i16 [[TMP81]], i32 12
687+
; CHECK-INTERLEAVED-NEXT: [[TMP98:%.*]] = insertelement <16 x i16> [[TMP97]], i16 [[TMP82]], i32 13
688+
; CHECK-INTERLEAVED-NEXT: [[TMP99:%.*]] = insertelement <16 x i16> [[TMP98]], i16 [[TMP83]], i32 14
689+
; CHECK-INTERLEAVED-NEXT: [[TMP100:%.*]] = insertelement <16 x i16> [[TMP99]], i16 [[TMP84]], i32 15
690690
; CHECK-INTERLEAVED-NEXT: [[TMP117:%.*]] = insertelement <16 x i16> poison, i16 [[TMP101]], i32 0
691691
; CHECK-INTERLEAVED-NEXT: [[TMP118:%.*]] = insertelement <16 x i16> [[TMP117]], i16 [[TMP102]], i32 1
692692
; CHECK-INTERLEAVED-NEXT: [[TMP119:%.*]] = insertelement <16 x i16> [[TMP118]], i16 [[TMP103]], i32 2

0 commit comments

Comments
 (0)