Skip to content

Commit 177de6d

Browse files
committed
[LV][EVL] Support call instruction with EVL-vectorization
1 parent fcf02bc commit 177de6d

File tree

14 files changed

+115
-31
lines changed

14 files changed

+115
-31
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,12 @@ bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx);
160160
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
161161
const TargetLibraryInfo *TLI);
162162

163+
/// Returns VP intrinsic ID for call.
164+
/// For the input call instruction it finds mapping intrinsic and returns
165+
/// its intrinsic ID, in case it does not found it return not_intrinsic.
166+
Intrinsic::ID getVPIntrinsicIDForCall(const CallInst *CI,
167+
const TargetLibraryInfo *TLI);
168+
163169
/// Given a vector and an element number, see if the scalar value is
164170
/// already around as a register, for example if it were inserted then extracted
165171
/// from the vector.

llvm/include/llvm/IR/VectorBuilder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,11 @@ class VectorBuilder {
9999
const Twine &Name = Twine());
100100

101101
/// Emit a VP reduction intrinsic call for recurrence kind.
102-
/// \param RdxID The intrinsic ID of llvm.vector.reduce.*
102+
/// \param ID The intrinsic ID of call Intrinsic
103103
/// \param ValTy The type of operand which the reduction operation is
104104
/// performed.
105105
/// \param VecOpArray The operand list.
106-
Value *createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy,
106+
Value *createSimpleIntrinsic(Intrinsic::ID RdxID, Type *ValTy,
107107
ArrayRef<Value *> VecOpArray,
108108
const Twine &Name = Twine());
109109
};

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,13 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
169169
return Intrinsic::not_intrinsic;
170170
}
171171

172+
Intrinsic::ID llvm::getVPIntrinsicIDForCall(const CallInst *CI,
173+
const TargetLibraryInfo *TLI) {
174+
Intrinsic::ID ID = getIntrinsicForCallSite(*CI, TLI);
175+
176+
return VPIntrinsic::getForIntrinsic(ID);
177+
}
178+
172179
/// Given a vector and an element number, see if the scalar value is
173180
/// already around as a register, for example if it were inserted then extracted
174181
/// from the vector.

llvm/lib/IR/VectorBuilder.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,12 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy,
6060
return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name);
6161
}
6262

63-
Value *VectorBuilder::createSimpleReduction(Intrinsic::ID RdxID,
64-
Type *ValTy,
63+
Value *VectorBuilder::createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
6564
ArrayRef<Value *> InstOpArray,
6665
const Twine &Name) {
67-
auto VPID = VPIntrinsic::getForIntrinsic(RdxID);
68-
assert(VPReductionIntrinsic::isVPReduction(VPID) &&
69-
"No VPIntrinsic for this reduction");
66+
auto VPID = VPIntrinsic::getForIntrinsic(ID);
67+
assert(VPIntrinsic::isVPIntrinsic(VPID) &&
68+
"No VPIntrinsic for this Intrinsic");
7069
return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name);
7170
}
7271

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
10751075
return getArithmeticInstrCost(*FOp, ICA.getReturnType(), CostKind);
10761076
break;
10771077
}
1078+
// TODO: Need push a new patch
1079+
case Intrinsic::vp_smax:
1080+
case Intrinsic::vp_smin:
1081+
case Intrinsic::vp_umax:
1082+
case Intrinsic::vp_umin: {
1083+
// return LT.first;
1084+
return 1;
1085+
}
10781086
// vp int cast ops.
10791087
case Intrinsic::vp_trunc:
10801088
case Intrinsic::vp_zext:

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,7 +1300,7 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
13001300
Type *SrcEltTy = SrcTy->getElementType();
13011301
Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
13021302
Value *Ops[] = {Iden, Src};
1303-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1303+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13041304
}
13051305

13061306
Value *llvm::createReduction(IRBuilderBase &B,
@@ -1343,7 +1343,7 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
13431343
Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
13441344
auto *SrcTy = cast<VectorType>(Src->getType());
13451345
Value *Ops[] = {Start, Src};
1346-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1346+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13471347
}
13481348

13491349
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8349,7 +8349,6 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
83498349
return nullptr;
83508350

83518351
SmallVector<VPValue *, 4> Ops(Operands.take_front(CI->arg_size()));
8352-
83538352
// Is it beneficial to perform intrinsic call compared to lib call?
83548353
bool ShouldUseVectorIntrinsic =
83558354
ID && LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -8690,7 +8689,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
86908689
// TODO: try to put it close to addActiveLaneMask().
86918690
// Discard the plan if it is not EVL-compatible
86928691
if (CM.foldTailWithEVL() &&
8693-
!VPlanTransforms::tryAddExplicitVectorLength(*Plan))
8692+
!VPlanTransforms::tryAddExplicitVectorLength(*Plan, *TLI))
86948693
break;
86958694
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
86968695
VPlans.push_back(std::move(Plan));

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1678,9 +1678,24 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
16781678
/// Returns true if the intrinsic may write to memory.
16791679
bool mayWriteToMemory() const { return MayWriteToMemory; }
16801680

1681+
operand_range arg_operands() {
1682+
unsigned argNum = VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)
1683+
? getNumOperands() - 1
1684+
: getNumOperands();
1685+
return make_range(op_begin(), op_begin() + argNum);
1686+
}
1687+
1688+
const_operand_range arg_operands() const {
1689+
unsigned argNum = VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)
1690+
? getNumOperands() - 1
1691+
: getNumOperands();
1692+
return make_range(op_begin(), op_begin() + argNum);
1693+
}
1694+
16811695
/// Returns true if the intrinsic may have side-effects.
16821696
bool mayHaveSideEffects() const { return MayHaveSideEffects; }
16831697

1698+
bool onlyFirstLaneUsed(const VPValue *Op) const override;
16841699
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
16851700
/// Print the recipe.
16861701
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
6161
case Instruction::ICmp:
6262
case VPInstruction::ActiveLaneMask:
6363
return inferScalarType(R->getOperand(1));
64+
case VPInstruction::ExplicitVectorLength:
65+
return Type::getIntNTy(Ctx, 32);
6466
case VPInstruction::FirstOrderRecurrenceSplice:
6567
case VPInstruction::Not:
6668
return SetResultTyFromOp();

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -962,17 +962,21 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
962962
void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
963963
assert(State.VF.isVector() && "not widening");
964964
State.setDebugLocFrom(getDebugLoc());
965-
965+
Intrinsic::ID FuncID =
966+
VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)
967+
? VPIntrinsic::getFunctionalIntrinsicIDForVP(VectorIntrinsicID)
968+
.value()
969+
: VectorIntrinsicID;
966970
SmallVector<Type *, 2> TysForDecl;
967971
// Add return type if intrinsic is overloaded on it.
968-
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
972+
if (isVectorIntrinsicWithOverloadTypeAtArg(FuncID, -1))
969973
TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
970974
SmallVector<Value *, 4> Args;
971-
for (const auto &I : enumerate(operands())) {
975+
for (const auto &I : enumerate(arg_operands())) {
972976
// Some intrinsics have a scalar argument - don't replace it with a
973977
// vector.
974978
Value *Arg;
975-
if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
979+
if (isVectorIntrinsicWithScalarOpAtArg(FuncID, I.index()))
976980
Arg = State.get(I.value(), VPLane(0));
977981
else
978982
Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
@@ -981,18 +985,34 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
981985
Args.push_back(Arg);
982986
}
983987

984-
// Use vector version of the intrinsic.
985-
Module *M = State.Builder.GetInsertBlock()->getModule();
986-
Function *VectorF =
987-
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
988-
assert(VectorF && "Can't retrieve vector intrinsic.");
989-
988+
CallInst *V = nullptr;
990989
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
991990
SmallVector<OperandBundleDef, 1> OpBundles;
992991
if (CI)
993992
CI->getOperandBundlesAsDefs(OpBundles);
994993

995-
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
994+
if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
995+
// Use vector version of the vector predicate Intrinsic
996+
IRBuilderBase &BuilderIR = State.Builder;
997+
VectorBuilder VBuilder(BuilderIR);
998+
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
999+
// VPValue EVL = getOperand(getNumOperands() - 1);
1000+
VBuilder.setMask(Mask).setEVL(
1001+
State.get(getOperand(getNumOperands() - 1), /*NeedsScalar=*/true));
1002+
auto *TyReturn = VectorType::get(getResultType(), State.VF);
1003+
Value *VPInst = VBuilder.createSimpleIntrinsic(VectorIntrinsicID, TyReturn,
1004+
Args, "vp.call");
1005+
if (VPInst) {
1006+
V = cast<CallInst>(VPInst);
1007+
}
1008+
} else {
1009+
// Use vector version of the intrinsic.
1010+
Module *M = State.Builder.GetInsertBlock()->getModule();
1011+
Function *VectorF =
1012+
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1013+
assert(VectorF && "Can't retrieve vector intrinsic.");
1014+
V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1015+
}
9961016

9971017
setFlags(V);
9981018

@@ -1011,7 +1031,7 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
10111031
// clear Arguments.
10121032
// TODO: Rework TTI interface to be independent of concrete IR values.
10131033
SmallVector<const Value *> Arguments;
1014-
for (const auto &[Idx, Op] : enumerate(operands())) {
1034+
for (const auto &[Idx, Op] : enumerate(arg_operands())) {
10151035
auto *V = Op->getUnderlyingValue();
10161036
if (!V) {
10171037
if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
@@ -1042,6 +1062,14 @@ StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {
10421062
return Intrinsic::getBaseName(VectorIntrinsicID);
10431063
}
10441064

1065+
bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
1066+
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1067+
// Vector predication intrinsics only demand the the first lane the last
1068+
// operand (the EVL operand).
1069+
return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1070+
Op == getOperand(getNumOperands() - 1);
1071+
}
1072+
10451073
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
10461074
void VPWidenIntrinsicRecipe::print(raw_ostream &O, const Twine &Indent,
10471075
VPSlotTracker &SlotTracker) const {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,8 +1351,10 @@ void VPlanTransforms::addActiveLaneMask(
13511351
}
13521352

13531353
/// Replace recipes with their EVL variants.
1354-
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
1354+
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL,
1355+
const TargetLibraryInfo &TLI) {
13551356
SmallVector<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
1357+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
13561358
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
13571359
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
13581360
auto *CurRecipe = dyn_cast<VPRecipeBase>(U);
@@ -1380,6 +1382,18 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
13801382
return nullptr;
13811383
return new VPWidenEVLRecipe(*W, EVL);
13821384
})
1385+
.Case<VPWidenIntrinsicRecipe>(
1386+
[&](VPWidenIntrinsicRecipe *CInst) -> VPRecipeBase * {
1387+
auto *CI = cast<CallInst>(CInst->getUnderlyingInstr());
1388+
// VPValue *NewMask = GetNewMask(CInst->getMask());
1389+
SmallVector<VPValue *> Ops(CInst->operands());
1390+
Ops.push_back(&EVL);
1391+
Intrinsic::ID VPID = getVPIntrinsicIDForCall(CI, &TLI);
1392+
if (VPID == Intrinsic::not_intrinsic)
1393+
return nullptr;
1394+
return new VPWidenIntrinsicRecipe(
1395+
*CI, VPID, Ops, CI->getType(), CI->getDebugLoc());
1396+
})
13831397
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
13841398
VPValue *NewMask = GetNewMask(Red->getCondOp());
13851399
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
@@ -1430,7 +1444,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
14301444
/// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi
14311445
/// ...
14321446
///
1433-
bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
1447+
bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan,
1448+
const TargetLibraryInfo &TLI) {
14341449
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
14351450
// The transform updates all users of inductions to work based on EVL, instead
14361451
// of the VF directly. At the moment, widened inductions cannot be updated, so
@@ -1482,7 +1497,7 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
14821497
NextEVLIV->insertBefore(CanonicalIVIncrement);
14831498
EVLPhi->addOperand(NextEVLIV);
14841499

1485-
transformRecipestoEVLRecipes(Plan, *VPEVL);
1500+
transformRecipestoEVLRecipes(Plan, *VPEVL, TLI);
14861501

14871502
// Replace all uses of VPCanonicalIVPHIRecipe by
14881503
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ struct VPlanTransforms {
108108
/// VPCanonicalIVPHIRecipe is only used to control the loop after
109109
/// this transformation.
110110
/// \returns true if the transformation succeeds, or false if it doesn't.
111-
static bool tryAddExplicitVectorLength(VPlan &Plan);
111+
static bool tryAddExplicitVectorLength(VPlan &Plan,
112+
const TargetLibraryInfo &TLI);
112113

113114
// For each Interleave Group in \p InterleaveGroups replace the Recipes
114115
// widening its memory instructions with a single VPInterleaveRecipe at its

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
138138
};
139139
for (const VPUser *U : EVL.users()) {
140140
if (!TypeSwitch<const VPUser *, bool>(U)
141+
.Case<VPWidenIntrinsicRecipe>(
142+
[&](const VPWidenIntrinsicRecipe *S) {
143+
return VerifyEVLUse(*S, S->getNumOperands() - 1);
144+
})
141145
.Case<VPWidenStoreEVLRecipe>([&](const VPWidenStoreEVLRecipe *S) {
142146
return VerifyEVLUse(*S, 2);
143147
})

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define void @vp_smax(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
2727
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
2828
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
2929
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
30-
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.smax(ir<[[LD1]]>, ir<[[LD2]]>)
30+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.vp.smax(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>)
3131
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
3232
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
3333
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
@@ -80,7 +80,7 @@ define void @vp_smin(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
8080
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
8181
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
8282
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
83-
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.smin(ir<[[LD1]]>, ir<[[LD2]]>)
83+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.vp.smin(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>)
8484
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
8585
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
8686
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
@@ -133,7 +133,7 @@ define void @vp_umax(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
133133
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
134134
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
135135
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
136-
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.umax(ir<[[LD1]]>, ir<[[LD2]]>)
136+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.vp.umax(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>)
137137
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
138138
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
139139
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
@@ -186,7 +186,7 @@ define void @vp_umin(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
186186
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
187187
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
188188
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
189-
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.umin(ir<[[LD1]]>, ir<[[LD2]]>)
189+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.vp.umin(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>)
190190
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
191191
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
192192
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>

0 commit comments

Comments
 (0)