@@ -7056,16 +7056,19 @@ bool BoUpSLP::areAllUsersVectorized(
7056
7056
7057
7057
static std::pair<InstructionCost, InstructionCost>
7058
7058
getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
7059
- TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
7060
- ArrayRef<Type *> ArgTys) {
7059
+ TargetTransformInfo *TTI, TargetLibraryInfo *TLI) {
7061
7060
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
7062
7061
7063
7062
// Calculate the cost of the scalar and vector calls.
7063
+ SmallVector<Type *, 4> VecTys;
7064
+ for (Use &Arg : CI->args())
7065
+ VecTys.push_back(
7066
+ FixedVectorType::get(Arg->getType(), VecTy->getNumElements()));
7064
7067
FastMathFlags FMF;
7065
7068
if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
7066
7069
FMF = FPCI->getFastMathFlags();
7067
7070
SmallVector<const Value *> Arguments(CI->args());
7068
- IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, ArgTys , FMF,
7071
+ IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys , FMF,
7069
7072
dyn_cast<IntrinsicInst>(CI));
7070
7073
auto IntrinsicCost =
7071
7074
TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
@@ -7078,8 +7081,8 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
7078
7081
if (!CI->isNoBuiltin() && VecFunc) {
7079
7082
// Calculate the cost of the vector library call.
7080
7083
// If the corresponding vector call is cheaper, return its cost.
7081
- LibCost =
7082
- TTI->getCallInstrCost(nullptr, VecTy, ArgTys, TTI::TCK_RecipThroughput);
7084
+ LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
7085
+ TTI::TCK_RecipThroughput);
7083
7086
}
7084
7087
return {IntrinsicCost, LibCost};
7085
7088
}
@@ -8505,30 +8508,6 @@ TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
8505
8508
return TTI::CastContextHint::None;
8506
8509
}
8507
8510
8508
- /// Builds the arguments types vector for the given call instruction with the
8509
- /// given \p ID for the specified vector factor.
8510
- static SmallVector<Type *> buildIntrinsicArgTypes(const CallInst *CI,
8511
- const Intrinsic::ID ID,
8512
- const unsigned VF,
8513
- unsigned MinBW) {
8514
- SmallVector<Type *> ArgTys;
8515
- for (auto [Idx, Arg] : enumerate(CI->args())) {
8516
- if (ID != Intrinsic::not_intrinsic) {
8517
- if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx)) {
8518
- ArgTys.push_back(Arg->getType());
8519
- continue;
8520
- }
8521
- if (MinBW > 0) {
8522
- ArgTys.push_back(FixedVectorType::get(
8523
- IntegerType::get(CI->getContext(), MinBW), VF));
8524
- continue;
8525
- }
8526
- }
8527
- ArgTys.push_back(FixedVectorType::get(Arg->getType(), VF));
8528
- }
8529
- return ArgTys;
8530
- }
8531
-
8532
8511
InstructionCost
8533
8512
BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
8534
8513
SmallPtrSetImpl<Value *> &CheckedExtracts) {
@@ -9095,11 +9074,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
9095
9074
};
9096
9075
auto GetVectorCost = [=](InstructionCost CommonCost) {
9097
9076
auto *CI = cast<CallInst>(VL0);
9098
- Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
9099
- SmallVector<Type *> ArgTys =
9100
- buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(),
9101
- It != MinBWs.end() ? It->second.first : 0);
9102
- auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
9077
+ auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
9103
9078
return std::min(VecCallCosts.first, VecCallCosts.second) + CommonCost;
9104
9079
};
9105
9080
return GetCostDiff(GetScalarCost, GetVectorCost);
@@ -12571,10 +12546,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
12571
12546
12572
12547
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
12573
12548
12574
- SmallVector<Type *> ArgTys =
12575
- buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(),
12576
- It != MinBWs.end() ? It->second.first : 0);
12577
- auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
12549
+ auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
12578
12550
bool UseIntrinsic = ID != Intrinsic::not_intrinsic &&
12579
12551
VecCallCosts.first <= VecCallCosts.second;
12580
12552
@@ -12583,20 +12555,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
12583
12555
SmallVector<Type *, 2> TysForDecl;
12584
12556
// Add return type if intrinsic is overloaded on it.
12585
12557
if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
12586
- TysForDecl.push_back(VecTy);
12558
+ TysForDecl.push_back(
12559
+ FixedVectorType::get(CI->getType(), E->Scalars.size()));
12587
12560
auto *CEI = cast<CallInst>(VL0);
12588
12561
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
12589
12562
ValueList OpVL;
12590
12563
// Some intrinsics have scalar arguments. This argument should not be
12591
12564
// vectorized.
12592
12565
if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) {
12593
12566
ScalarArg = CEI->getArgOperand(I);
12594
- // if decided to reduce bitwidth of abs intrinsic, it second argument
12595
- // must be set false (do not return poison, if value issigned min).
12596
- if (ID == Intrinsic::abs && It != MinBWs.end() &&
12597
- It->second.first < DL->getTypeSizeInBits(CEI->getType()))
12598
- ScalarArg = Builder.getFalse();
12599
- OpVecs.push_back(ScalarArg);
12567
+ OpVecs.push_back(CEI->getArgOperand(I));
12600
12568
if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
12601
12569
TysForDecl.push_back(ScalarArg->getType());
12602
12570
continue;
@@ -12609,13 +12577,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
12609
12577
}
12610
12578
ScalarArg = CEI->getArgOperand(I);
12611
12579
if (cast<VectorType>(OpVec->getType())->getElementType() !=
12612
- ScalarArg->getType() &&
12613
- It == MinBWs.end()) {
12580
+ ScalarArg->getType()) {
12614
12581
auto *CastTy = FixedVectorType::get(ScalarArg->getType(),
12615
12582
VecTy->getNumElements());
12616
12583
OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I));
12617
- } else if (It != MinBWs.end()) {
12618
- OpVec = Builder.CreateIntCast(OpVec, VecTy, GetOperandSignedness(I));
12619
12584
}
12620
12585
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
12621
12586
OpVecs.push_back(OpVec);
@@ -14359,45 +14324,6 @@ bool BoUpSLP::collectValuesToDemote(
14359
14324
return TryProcessInstruction(I, *ITE, BitWidth, Ops);
14360
14325
}
14361
14326
14362
- case Instruction::Call: {
14363
- auto *IC = dyn_cast<IntrinsicInst>(I);
14364
- if (!IC)
14365
- break;
14366
- Intrinsic::ID ID = getVectorIntrinsicIDForCall(IC, TLI);
14367
- if (ID != Intrinsic::abs && ID != Intrinsic::smin &&
14368
- ID != Intrinsic::smax && ID != Intrinsic::umin && ID != Intrinsic::umax)
14369
- break;
14370
- SmallVector<Value *> Operands(1, I->getOperand(0));
14371
- End = 1;
14372
- if (ID != Intrinsic::abs) {
14373
- Operands.push_back(I->getOperand(1));
14374
- End = 2;
14375
- }
14376
- InstructionCost BestCost =
14377
- std::numeric_limits<InstructionCost::CostType>::max();
14378
- unsigned BestBitWidth = BitWidth;
14379
- unsigned VF = ITE->Scalars.size();
14380
- // Choose the best bitwidth based on cost estimations.
14381
- auto Checker = [&](unsigned BitWidth, unsigned) {
14382
- unsigned MinBW = PowerOf2Ceil(BitWidth);
14383
- SmallVector<Type *> ArgTys = buildIntrinsicArgTypes(IC, ID, VF, MinBW);
14384
- auto VecCallCosts = getVectorCallCosts(
14385
- IC,
14386
- FixedVectorType::get(IntegerType::get(IC->getContext(), MinBW), VF),
14387
- TTI, TLI, ArgTys);
14388
- InstructionCost Cost = std::min(VecCallCosts.first, VecCallCosts.second);
14389
- if (Cost < BestCost) {
14390
- BestCost = Cost;
14391
- BestBitWidth = BitWidth;
14392
- }
14393
- return false;
14394
- };
14395
- [[maybe_unused]] bool NeedToExit;
14396
- (void)AttemptCheckBitwidth(Checker, NeedToExit);
14397
- BitWidth = BestBitWidth;
14398
- return TryProcessInstruction(I, *ITE, BitWidth, Operands);
14399
- }
14400
-
14401
14327
// Otherwise, conservatively give up.
14402
14328
default:
14403
14329
break;
0 commit comments