Skip to content

Commit 1bdc03a

Browse files
committed
Rebase to update the motivating case, add checks to prevent test failure
1 parent e0b89f2 commit 1bdc03a

File tree

2 files changed

+49
-94
lines changed

2 files changed

+49
-94
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/CodeGen/BasicTTIImpl.h"
1919
#include "llvm/CodeGen/CostTable.h"
2020
#include "llvm/CodeGen/TargetLowering.h"
21+
#include "llvm/IR/Constants.h"
2122
#include "llvm/IR/IntrinsicInst.h"
2223
#include "llvm/IR/Intrinsics.h"
2324
#include "llvm/IR/IntrinsicsAArch64.h"
@@ -3575,15 +3576,35 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
35753576
// If the information about individual scalars being vectorized is
35763577
// available, this yeilds better cost estimation.
35773578
if (auto *VTy = dyn_cast<FixedVectorType>(Ty); VTy && !Args.empty()) {
3579+
assert(Args.size() % 2 == 0 && "Args size should be even");
35783580
InstructionCost InsertExtractCost =
35793581
ST->getVectorInsertExtractBaseCost();
3580-
Cost = (3 * InsertExtractCost) * VTy->getNumElements();
3581-
for (int i = 0, Sz = Args.size(); i < Sz; i += 2) {
3582-
Cost += getArithmeticInstrCost(
3583-
Opcode, VTy->getScalarType(), CostKind,
3584-
TTI::getOperandInfo(Args[i]), TTI::getOperandInfo(Args[i + 1]));
3582+
// If the cost of single sdiv is inquired through the cost-model.
3583+
// FIXME: remove the isa checks once the PR 122236 lands.
3584+
if (Args.size() == 2 &&
3585+
!(isa<ConstantVector>(Args[1]) ||
3586+
isa<ConstantDataVector>(Args[1]) ||
3587+
isa<ConstantExpr>(Args[1])) &&
3588+
none_of(Args, IsaPred<UndefValue, PoisonValue>)) {
3589+
unsigned NElts = VTy->getNumElements();
3590+
// Compute per element cost
3591+
Cost = getArithmeticInstrCost(Opcode, VTy->getScalarType(),
3592+
CostKind, Op1Info.getNoProps(),
3593+
Op2Info.getNoProps());
3594+
Cost += 3 * InsertExtractCost;
3595+
Cost *= NElts;
3596+
return Cost;
3597+
} else if (Args.size() > 2) // vectorization cost is inquired
3598+
{
3599+
Cost = (3 * InsertExtractCost) * VTy->getNumElements();
3600+
for (int i = 0, Sz = Args.size(); i < Sz; i += 2) {
3601+
Cost +=
3602+
getArithmeticInstrCost(Opcode, VTy->getScalarType(), CostKind,
3603+
TTI::getOperandInfo(Args[i]),
3604+
TTI::getOperandInfo(Args[i + 1]));
3605+
}
3606+
return Cost;
35853607
}
3586-
return Cost;
35873608
}
35883609

35893610
// If one of the operands is a uniform constant then the cost for each

llvm/test/Transforms/SLPVectorizer/AArch64/div.ll

Lines changed: 22 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -553,35 +553,13 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(<4 x i32> %a)
553553
}
554554

555555
define <2 x i32> @sdiv_v2i32_unknown_divisor(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
556-
; NO-SVE-LABEL: define <2 x i32> @sdiv_v2i32_unknown_divisor(
557-
; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
558-
; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
559-
; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
560-
; NO-SVE-NEXT: [[X0:%.*]] = extractelement <2 x i32> [[X]], i64 0
561-
; NO-SVE-NEXT: [[X1:%.*]] = extractelement <2 x i32> [[X]], i64 1
562-
; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], [[X0]]
563-
; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], [[X1]]
564-
; NO-SVE-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[X0]]
565-
; NO-SVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[X1]]
566-
; NO-SVE-NEXT: [[Y0:%.*]] = extractelement <2 x i32> [[Y]], i64 0
567-
; NO-SVE-NEXT: [[Y1:%.*]] = extractelement <2 x i32> [[Y]], i64 1
568-
; NO-SVE-NEXT: [[TMP5:%.*]] = sub i32 [[TMP3]], [[Y0]]
569-
; NO-SVE-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[Y1]]
570-
; NO-SVE-NEXT: [[Z0:%.*]] = extractelement <2 x i32> [[Z]], i64 0
571-
; NO-SVE-NEXT: [[Z1:%.*]] = extractelement <2 x i32> [[Z]], i64 1
572-
; NO-SVE-NEXT: [[TMP7:%.*]] = mul i32 [[TMP5]], [[Z0]]
573-
; NO-SVE-NEXT: [[TMP8:%.*]] = mul i32 [[TMP6]], [[Z1]]
574-
; NO-SVE-NEXT: [[RES0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
575-
; NO-SVE-NEXT: [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
576-
; NO-SVE-NEXT: ret <2 x i32> [[RES1]]
577-
;
578-
; SVE-LABEL: define <2 x i32> @sdiv_v2i32_unknown_divisor(
579-
; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
580-
; SVE-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[A]], [[X]]
581-
; SVE-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[X]]
582-
; SVE-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], [[Y]]
583-
; SVE-NEXT: [[TMP5:%.*]] = mul <2 x i32> [[TMP4]], [[Z]]
584-
; SVE-NEXT: ret <2 x i32> [[TMP5]]
556+
; CHECK-LABEL: define <2 x i32> @sdiv_v2i32_unknown_divisor(
557+
; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
558+
; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[A]], [[X]]
559+
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
560+
; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], [[Y]]
561+
; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP3]], [[Z]]
562+
; CHECK-NEXT: ret <2 x i32> [[TMP4]]
585563
;
586564
{
587565
%a0 = extractelement <2 x i32> %a, i64 0
@@ -607,35 +585,13 @@ define <2 x i32> @sdiv_v2i32_unknown_divisor(<2 x i32> %a, <2 x i32> %x, <2 x i3
607585

608586
; computes (a/const + x - y) * z
609587
define <2 x i32> @sdiv_v2i32_const_divisor(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
610-
; NO-SVE-LABEL: define <2 x i32> @sdiv_v2i32_const_divisor(
611-
; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
612-
; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
613-
; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
614-
; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], 2
615-
; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], 4
616-
; NO-SVE-NEXT: [[X0:%.*]] = extractelement <2 x i32> [[X]], i64 0
617-
; NO-SVE-NEXT: [[X1:%.*]] = extractelement <2 x i32> [[X]], i64 1
618-
; NO-SVE-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[X0]]
619-
; NO-SVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[X1]]
620-
; NO-SVE-NEXT: [[Y0:%.*]] = extractelement <2 x i32> [[Y]], i64 0
621-
; NO-SVE-NEXT: [[Y1:%.*]] = extractelement <2 x i32> [[Y]], i64 1
622-
; NO-SVE-NEXT: [[TMP5:%.*]] = sub i32 [[TMP3]], [[Y0]]
623-
; NO-SVE-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[Y1]]
624-
; NO-SVE-NEXT: [[Z0:%.*]] = extractelement <2 x i32> [[Z]], i64 0
625-
; NO-SVE-NEXT: [[Z1:%.*]] = extractelement <2 x i32> [[Z]], i64 1
626-
; NO-SVE-NEXT: [[TMP7:%.*]] = mul i32 [[TMP5]], [[Z0]]
627-
; NO-SVE-NEXT: [[TMP8:%.*]] = mul i32 [[TMP6]], [[Z1]]
628-
; NO-SVE-NEXT: [[RES0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
629-
; NO-SVE-NEXT: [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
630-
; NO-SVE-NEXT: ret <2 x i32> [[RES1]]
631-
;
632-
; SVE-LABEL: define <2 x i32> @sdiv_v2i32_const_divisor(
633-
; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
634-
; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[A]], <i32 2, i32 4>
635-
; SVE-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
636-
; SVE-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], [[Y]]
637-
; SVE-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP3]], [[Z]]
638-
; SVE-NEXT: ret <2 x i32> [[TMP4]]
588+
; CHECK-LABEL: define <2 x i32> @sdiv_v2i32_const_divisor(
589+
; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
590+
; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[A]], <i32 2, i32 4>
591+
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
592+
; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], [[Y]]
593+
; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP3]], [[Z]]
594+
; CHECK-NEXT: ret <2 x i32> [[TMP4]]
639595
;
640596
{
641597
%a0 = extractelement <2 x i32> %a, i64 0
@@ -660,36 +616,14 @@ define <2 x i32> @sdiv_v2i32_const_divisor(<2 x i32> %a, <2 x i32> %x, <2 x i32>
660616
}
661617

662618
define <2 x i32> @sdiv_v2i32_Op1_unknown_Op2_const(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
663-
; NO-SVE-LABEL: define <2 x i32> @sdiv_v2i32_Op1_unknown_Op2_const(
664-
; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
665-
; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
666-
; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
667-
; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], [[A0]]
668-
; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], 4
669-
; NO-SVE-NEXT: [[X0:%.*]] = extractelement <2 x i32> [[X]], i64 0
670-
; NO-SVE-NEXT: [[X1:%.*]] = extractelement <2 x i32> [[X]], i64 1
671-
; NO-SVE-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[X0]]
672-
; NO-SVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[X1]]
673-
; NO-SVE-NEXT: [[Y0:%.*]] = extractelement <2 x i32> [[Y]], i64 0
674-
; NO-SVE-NEXT: [[Y1:%.*]] = extractelement <2 x i32> [[Y]], i64 1
675-
; NO-SVE-NEXT: [[TMP5:%.*]] = sub i32 [[TMP3]], [[Y0]]
676-
; NO-SVE-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[Y1]]
677-
; NO-SVE-NEXT: [[Z0:%.*]] = extractelement <2 x i32> [[Z]], i64 0
678-
; NO-SVE-NEXT: [[Z1:%.*]] = extractelement <2 x i32> [[Z]], i64 1
679-
; NO-SVE-NEXT: [[TMP7:%.*]] = mul i32 [[TMP5]], [[Z0]]
680-
; NO-SVE-NEXT: [[TMP8:%.*]] = mul i32 [[TMP6]], [[Z1]]
681-
; NO-SVE-NEXT: [[RES0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
682-
; NO-SVE-NEXT: [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
683-
; NO-SVE-NEXT: ret <2 x i32> [[RES1]]
684-
;
685-
; SVE-LABEL: define <2 x i32> @sdiv_v2i32_Op1_unknown_Op2_const(
686-
; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
687-
; SVE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> <i32 poison, i32 4>, <2 x i32> <i32 0, i32 3>
688-
; SVE-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[A]], [[TMP1]]
689-
; SVE-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[X]]
690-
; SVE-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], [[Y]]
691-
; SVE-NEXT: [[TMP5:%.*]] = mul <2 x i32> [[TMP4]], [[Z]]
692-
; SVE-NEXT: ret <2 x i32> [[TMP5]]
619+
; CHECK-LABEL: define <2 x i32> @sdiv_v2i32_Op1_unknown_Op2_const(
620+
; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
621+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> <i32 poison, i32 4>, <2 x i32> <i32 0, i32 3>
622+
; CHECK-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[A]], [[TMP1]]
623+
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[X]]
624+
; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], [[Y]]
625+
; CHECK-NEXT: [[TMP5:%.*]] = mul <2 x i32> [[TMP4]], [[Z]]
626+
; CHECK-NEXT: ret <2 x i32> [[TMP5]]
693627
;
694628
{
695629
%a0 = extractelement <2 x i32> %a, i64 0

0 commit comments

Comments
 (0)