Skip to content

Commit 14d8a3f

Browse files
committed
[VPlan][LoopVectorize] Truncate min/max intrinsic ops
This adds support for intrinsics that are understood by DemandedBits. Fixes #87407.
1 parent f9a0b46 commit 14d8a3f

File tree

9 files changed

+1089
-28
lines changed

9 files changed

+1089
-28
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,14 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
611611
!InstructionSet.count(I))
612612
continue;
613613

614+
// Byteswaps require at least 16 bits
615+
if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
616+
if (II->getIntrinsicID() == Intrinsic::bswap) {
617+
DBits[Leader] |= 0xFFFF;
618+
DBits[I] |= 0xFFFF;
619+
}
620+
}
621+
614622
// Unsafe casts terminate a chain unsuccessfully. We can't do anything
615623
// useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
616624
// transform anything that relies on them.
@@ -687,6 +695,30 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
687695
isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&
688696
U.getOperandNo() == 1)
689697
return CI->uge(MinBW);
698+
// Ignore the call pointer when considering intrinsics that
699+
// DemandedBits understands.
700+
if (U->getType()->isPointerTy() && isa<CallInst>(U.getUser()) &&
701+
dyn_cast<CallInst>(U.getUser())->getCalledFunction() ==
702+
dyn_cast<Function>(U)) {
703+
if (const auto *II = dyn_cast<IntrinsicInst>(U.getUser())) {
704+
// Only ignore cases that DemandedBits understands.
705+
switch (II->getIntrinsicID()) {
706+
default:
707+
break;
708+
case Intrinsic::umax:
709+
case Intrinsic::umin:
710+
case Intrinsic::smax:
711+
case Intrinsic::smin:
712+
case Intrinsic::fshl:
713+
case Intrinsic::fshr:
714+
case Intrinsic::cttz:
715+
case Intrinsic::ctlz:
716+
case Intrinsic::bitreverse:
717+
case Intrinsic::bswap:
718+
return false;
719+
}
720+
}
721+
}
690722
uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
691723
return bit_ceil(BW) > MinBW;
692724
}))

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8276,7 +8276,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
82768276
Range);
82778277
if (ShouldUseVectorIntrinsic)
82788278
return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID,
8279-
CI->getDebugLoc());
8279+
CI->getType(), CI->getDebugLoc());
82808280

82818281
Function *Variant = nullptr;
82828282
std::optional<unsigned> MaskPos;
@@ -8329,8 +8329,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
83298329
}
83308330

83318331
return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()),
8332-
Intrinsic::not_intrinsic, CI->getDebugLoc(),
8333-
Variant);
8332+
Intrinsic::not_intrinsic, CI->getType(),
8333+
CI->getDebugLoc(), Variant);
83348334
}
83358335

83368336
return nullptr;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1455,21 +1455,24 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
14551455
/// chosen vectorized variant, so there will be a different vplan for each
14561456
/// VF with a valid variant.
14571457
Function *Variant;
1458+
/// Result type for the cast.
1459+
Type *ResultTy;
14581460

14591461
public:
14601462
template <typename IterT>
14611463
VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments,
1462-
Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1463-
Function *Variant = nullptr)
1464+
Intrinsic::ID VectorIntrinsicID, Type *ResultTy,
1465+
DebugLoc DL = {}, Function *Variant = nullptr)
14641466
: VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, &I, DL),
1465-
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {}
1467+
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant),
1468+
ResultTy(ResultTy) {}
14661469

14671470
~VPWidenCallRecipe() override = default;
14681471

14691472
VPWidenCallRecipe *clone() override {
14701473
return new VPWidenCallRecipe(*cast<CallInst>(getUnderlyingInstr()),
1471-
operands(), VectorIntrinsicID, getDebugLoc(),
1472-
Variant);
1474+
operands(), VectorIntrinsicID, ResultTy,
1475+
getDebugLoc(), Variant);
14731476
}
14741477

14751478
VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
@@ -1482,6 +1485,11 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
14821485
void print(raw_ostream &O, const Twine &Indent,
14831486
VPSlotTracker &SlotTracker) const override;
14841487
#endif
1488+
1489+
/// Returns the result type of the cast.
1490+
Type *getResultType() const { return ResultTy; }
1491+
1492+
void setResultType(Type *newResTy) { ResultTy = newResTy; }
14851493
};
14861494

14871495
/// A recipe for widening select instructions.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
110110
llvm_unreachable("Unhandled opcode!");
111111
}
112112

113-
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
114-
auto &CI = *cast<CallInst>(R->getUnderlyingInstr());
115-
return CI.getType();
116-
}
117-
118113
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
119114
assert((isa<VPWidenLoadRecipe>(R) || isa<VPWidenLoadEVLRecipe>(R)) &&
120115
"Store recipes should not define any values");
@@ -238,7 +233,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
238233
return inferScalarType(R->getOperand(0));
239234
})
240235
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
241-
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
236+
VPWidenMemoryRecipe, VPWidenSelectRecipe>(
242237
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
243238
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
244239
// TODO: Use info from interleave group.
@@ -248,6 +243,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
248243
[](const VPWidenCastRecipe *R) { return R->getResultType(); })
249244
.Case<VPScalarCastRecipe>(
250245
[](const VPScalarCastRecipe *R) { return R->getResultType(); })
246+
.Case<VPWidenCallRecipe>(
247+
[](const VPWidenCallRecipe *R) { return R->getResultType(); })
251248
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
252249
return R->getSCEV()->getType();
253250
});

llvm/lib/Transforms/Vectorize/VPlanAnalysis.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ class VPTypeAnalysis {
4343

4444
Type *inferScalarTypeForRecipe(const VPBlendRecipe *R);
4545
Type *inferScalarTypeForRecipe(const VPInstruction *R);
46-
Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R);
4746
Type *inferScalarTypeForRecipe(const VPWidenRecipe *R);
4847
Type *inferScalarTypeForRecipe(const VPWidenIntOrFpInductionRecipe *R);
4948
Type *inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
717717
if (UseIntrinsic &&
718718
isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
719719
TysForDecl.push_back(
720-
VectorType::get(CI.getType()->getScalarType(), State.VF));
720+
VectorType::get(getResultType()->getScalarType(), State.VF));
721721
SmallVector<Value *, 4> Args;
722722
for (const auto &I : enumerate(operands())) {
723723
// Some intrinsics have a scalar argument - don't replace it with a
@@ -770,14 +770,14 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
770770
VPSlotTracker &SlotTracker) const {
771771
O << Indent << "WIDEN-CALL ";
772772

773-
auto *CI = cast<CallInst>(getUnderlyingInstr());
774-
if (CI->getType()->isVoidTy())
773+
if (getResultType()->isVoidTy())
775774
O << "void ";
776775
else {
777776
printAsOperand(O, SlotTracker);
778777
O << " = ";
779778
}
780779

780+
auto *CI = cast<CallInst>(getUnderlyingInstr());
781781
O << "call @" << CI->getCalledFunction()->getName() << "(";
782782
printOperands(O, SlotTracker);
783783
O << ")";

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,10 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
7474
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
7575
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
7676
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
77-
NewRecipe = new VPWidenCallRecipe(
78-
*CI, drop_end(Ingredient.operands()),
79-
getVectorIntrinsicIDForCall(CI, &TLI), CI->getDebugLoc());
77+
NewRecipe =
78+
new VPWidenCallRecipe(*CI, drop_end(Ingredient.operands()),
79+
getVectorIntrinsicIDForCall(CI, &TLI),
80+
CI->getType(), CI->getDebugLoc());
8081
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
8182
NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
8283
} else if (auto *CI = dyn_cast<CastInst>(Inst)) {
@@ -975,8 +976,8 @@ void VPlanTransforms::truncateToMinimalBitwidths(
975976
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
976977
vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
977978
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
978-
if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
979-
VPWidenSelectRecipe, VPWidenLoadRecipe>(&R))
979+
if (!isa<VPWidenRecipe, VPWidenCallRecipe, VPWidenCastRecipe,
980+
VPReplicateRecipe, VPWidenSelectRecipe, VPWidenLoadRecipe>(&R))
980981
continue;
981982

982983
VPValue *ResultVPV = R.getVPSingleValue();
@@ -1082,6 +1083,12 @@ void VPlanTransforms::truncateToMinimalBitwidths(
10821083
}
10831084
}
10841085

1086+
// If this was a WIDEN-CALL (intrinsic) then we need to update the return
1087+
// type so it's compatible with the new args.
1088+
if (isa<VPWidenCallRecipe>(&R)) {
1089+
auto *callInsn = dyn_cast<VPWidenCallRecipe>(&R);
1090+
callInsn->setResultType(NewResTy);
1091+
}
10851092
}
10861093
}
10871094

0 commit comments

Comments
 (0)