Skip to content

Commit 9b24ba5

Browse files
committed
[VPlan][LoopVectorize] Truncate min/max intrinsic ops
This adds support for intrinsics that are understood by DemandedBits. Fixes #87407.
1 parent 82383d5 commit 9b24ba5

File tree

9 files changed

+1087
-26
lines changed

9 files changed

+1087
-26
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,14 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
611611
!InstructionSet.count(I))
612612
continue;
613613

614+
// Byteswaps require at least 16 bits
615+
if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
616+
if (II->getIntrinsicID() == Intrinsic::bswap) {
617+
DBits[Leader] |= 0xFFFF;
618+
DBits[I] |= 0xFFFF;
619+
}
620+
}
621+
614622
// Unsafe casts terminate a chain unsuccessfully. We can't do anything
615623
// useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
616624
// transform anything that relies on them.
@@ -687,6 +695,30 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
687695
isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&
688696
U.getOperandNo() == 1)
689697
return CI->uge(MinBW);
698+
// Ignore the call pointer when considering intrinsics that
699+
// DemandedBits understands.
700+
if (U->getType()->isPointerTy() && isa<CallInst>(U.getUser()) &&
701+
dyn_cast<CallInst>(U.getUser())->getCalledFunction() ==
702+
dyn_cast<Function>(U)) {
703+
if (const auto *II = dyn_cast<IntrinsicInst>(U.getUser())) {
704+
// Only ignore cases that DemandedBits understands.
705+
switch (II->getIntrinsicID()) {
706+
default:
707+
break;
708+
case Intrinsic::umax:
709+
case Intrinsic::umin:
710+
case Intrinsic::smax:
711+
case Intrinsic::smin:
712+
case Intrinsic::fshl:
713+
case Intrinsic::fshr:
714+
case Intrinsic::cttz:
715+
case Intrinsic::ctlz:
716+
case Intrinsic::bitreverse:
717+
case Intrinsic::bswap:
718+
return false;
719+
}
720+
}
721+
}
690722
uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
691723
return bit_ceil(BW) > MinBW;
692724
}))

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8284,7 +8284,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
82848284
Range);
82858285
if (ShouldUseVectorIntrinsic)
82868286
return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()), ID,
8287-
CI->getDebugLoc());
8287+
CI->getType(), CI->getDebugLoc());
82888288

82898289
Function *Variant = nullptr;
82908290
std::optional<unsigned> MaskPos;
@@ -8337,8 +8337,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
83378337
}
83388338

83398339
return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()),
8340-
Intrinsic::not_intrinsic, CI->getDebugLoc(),
8341-
Variant);
8340+
Intrinsic::not_intrinsic, CI->getType(),
8341+
CI->getDebugLoc(), Variant);
83428342
}
83438343

83448344
return nullptr;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1455,14 +1455,17 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
14551455
/// chosen vectorized variant, so there will be a different vplan for each
14561456
/// VF with a valid variant.
14571457
Function *Variant;
1458+
/// Result type for the cast.
1459+
Type *ResultTy;
14581460

14591461
public:
14601462
template <typename IterT>
14611463
VPWidenCallRecipe(Value *UV, iterator_range<IterT> CallArguments,
1462-
Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1463-
Function *Variant = nullptr)
1464+
Intrinsic::ID VectorIntrinsicID, Type *ResultTy,
1465+
DebugLoc DL = {}, Function *Variant = nullptr)
14641466
: VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, UV, DL),
1465-
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
1467+
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant),
1468+
ResultTy(ResultTy) {
14661469
assert(
14671470
isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
14681471
"last operand must be the called function");
@@ -1472,7 +1475,7 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
14721475

14731476
VPWidenCallRecipe *clone() override {
14741477
return new VPWidenCallRecipe(getUnderlyingValue(), operands(),
1475-
VectorIntrinsicID, getDebugLoc(), Variant);
1478+
VectorIntrinsicID, ResultTy, getDebugLoc(), Variant);
14761479
}
14771480

14781481
VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
@@ -1496,6 +1499,11 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
14961499
void print(raw_ostream &O, const Twine &Indent,
14971500
VPSlotTracker &SlotTracker) const override;
14981501
#endif
1502+
1503+
/// Returns the result type of the cast.
1504+
Type *getResultType() const { return ResultTy; }
1505+
1506+
void setResultType(Type *newResTy) { ResultTy = newResTy; }
14991507
};
15001508

15011509
/// A recipe for widening select instructions.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
110110
llvm_unreachable("Unhandled opcode!");
111111
}
112112

113-
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
114-
auto &CI = *cast<CallInst>(R->getUnderlyingInstr());
115-
return CI.getType();
116-
}
117-
118113
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
119114
assert((isa<VPWidenLoadRecipe>(R) || isa<VPWidenLoadEVLRecipe>(R)) &&
120115
"Store recipes should not define any values");
@@ -238,7 +233,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
238233
return inferScalarType(R->getOperand(0));
239234
})
240235
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
241-
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
236+
VPWidenMemoryRecipe, VPWidenSelectRecipe>(
242237
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
243238
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
244239
// TODO: Use info from interleave group.
@@ -248,6 +243,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
248243
[](const VPWidenCastRecipe *R) { return R->getResultType(); })
249244
.Case<VPScalarCastRecipe>(
250245
[](const VPScalarCastRecipe *R) { return R->getResultType(); })
246+
.Case<VPWidenCallRecipe>(
247+
[](const VPWidenCallRecipe *R) { return R->getResultType(); })
251248
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
252249
return R->getSCEV()->getType();
253250
});

llvm/lib/Transforms/Vectorize/VPlanAnalysis.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ class VPTypeAnalysis {
4343

4444
Type *inferScalarTypeForRecipe(const VPBlendRecipe *R);
4545
Type *inferScalarTypeForRecipe(const VPInstruction *R);
46-
Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R);
4746
Type *inferScalarTypeForRecipe(const VPWidenRecipe *R);
4847
Type *inferScalarTypeForRecipe(const VPWidenIntOrFpInductionRecipe *R);
4948
Type *inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -723,8 +723,8 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
723723
// Add return type if intrinsic is overloaded on it.
724724
if (UseIntrinsic &&
725725
isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
726-
TysForDecl.push_back(VectorType::get(
727-
CalledScalarFn->getReturnType()->getScalarType(), State.VF));
726+
TysForDecl.push_back(
727+
VectorType::get(getResultType()->getScalarType(), State.VF));
728728
SmallVector<Value *, 4> Args;
729729
for (const auto &I : enumerate(arg_operands())) {
730730
// Some intrinsics have a scalar argument - don't replace it with a
@@ -780,14 +780,14 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
780780
VPSlotTracker &SlotTracker) const {
781781
O << Indent << "WIDEN-CALL ";
782782

783-
Function *CalledFn = getCalledScalarFunction();
784-
if (CalledFn->getReturnType()->isVoidTy())
783+
if (getResultType()->isVoidTy())
785784
O << "void ";
786785
else {
787786
printAsOperand(O, SlotTracker);
788787
O << " = ";
789788
}
790789

790+
Function *CalledFn = getCalledScalarFunction();
791791
O << "call @" << CalledFn->getName() << "(";
792792
interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) {
793793
Op->printAsOperand(O, SlotTracker);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,10 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
7474
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
7575
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
7676
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
77-
NewRecipe = new VPWidenCallRecipe(
78-
CI, Ingredient.operands(), getVectorIntrinsicIDForCall(CI, &TLI),
79-
CI->getDebugLoc());
77+
NewRecipe =
78+
new VPWidenCallRecipe(CI, Ingredient.operands(),
79+
getVectorIntrinsicIDForCall(CI, &TLI),
80+
CI->getType(), CI->getDebugLoc());
8081
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
8182
NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
8283
} else if (auto *CI = dyn_cast<CastInst>(Inst)) {
@@ -971,8 +972,8 @@ void VPlanTransforms::truncateToMinimalBitwidths(
971972
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
972973
vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
973974
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
974-
if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
975-
VPWidenSelectRecipe, VPWidenLoadRecipe>(&R))
975+
if (!isa<VPWidenRecipe, VPWidenCallRecipe, VPWidenCastRecipe,
976+
VPReplicateRecipe, VPWidenSelectRecipe, VPWidenLoadRecipe>(&R))
976977
continue;
977978

978979
VPValue *ResultVPV = R.getVPSingleValue();
@@ -1078,6 +1079,12 @@ void VPlanTransforms::truncateToMinimalBitwidths(
10781079
}
10791080
}
10801081

1082+
// If this was a WIDEN-CALL (intrinsic) then we need to update the return
1083+
// type so it's compatible with the new args.
1084+
if (isa<VPWidenCallRecipe>(&R)) {
1085+
auto *callInsn = dyn_cast<VPWidenCallRecipe>(&R);
1086+
callInsn->setResultType(NewResTy);
1087+
}
10811088
}
10821089
}
10831090

0 commit comments

Comments
 (0)