Skip to content

Commit 6ca9685

Browse files
DevM-ukSamTebbs33
authored andcommitted
[AArch64] Consider histcnt smaller than i32 in the cost model
This PR updates the AArch64 cost model to consider the cheaper cost of <i32 histograms to reflect the improvements from llvm#101017 and llvm#103037 Work by Max Beck-Jones (@DevM-uk)
1 parent d6832a6 commit 6ca9685

File tree

2 files changed

+26
-20
lines changed

2 files changed

+26
-20
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -517,25 +517,31 @@ static bool isUnpackedVectorVT(EVT VecVT) {
517517
static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
518518
Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
519519
Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
520+
unsigned TotalHistCnts = 1;
520521

521-
// Only allow (32b and 64b) integers or pointers for now...
522+
// Only allow (up to 64b) integers or pointers
522523
if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) ||
523-
(EltTy->getScalarSizeInBits() != 32 &&
524-
EltTy->getScalarSizeInBits() != 64))
524+
EltTy->getScalarSizeInBits() > 64)
525525
return InstructionCost::getInvalid();
526526

527-
// FIXME: Hacky check for legal vector types. We can promote smaller types
528-
// but we cannot legalize vectors via splitting for histcnt.
529527
// FIXME: We should be able to generate histcnt for fixed-length vectors
530528
// using ptrue with a specific VL.
531-
if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy))
532-
if ((VTy->getElementCount().getKnownMinValue() != 2 &&
533-
VTy->getElementCount().getKnownMinValue() != 4) ||
534-
VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 ||
535-
!VTy->isScalableTy())
529+
if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy)) {
530+
unsigned EC = VTy->getElementCount().getKnownMinValue();
531+
if (!isPowerOf2_64(EC) || !VTy->isScalableTy())
536532
return InstructionCost::getInvalid();
537533

538-
return InstructionCost(BaseHistCntCost);
534+
bool Element64b = EltTy->isIntegerTy(64);
535+
536+
if (EC == 2 || (!Element64b && EC == 4))
537+
return InstructionCost(BaseHistCntCost);
538+
539+
unsigned NaturalVectorWidth = Element64b ? AArch64::SVEBitsPerBlock / 64
540+
: AArch64::SVEBitsPerBlock / 32;
541+
TotalHistCnts = EC / NaturalVectorWidth;
542+
}
543+
544+
return InstructionCost(BaseHistCntCost * TotalHistCnts);
539545
}
540546

541547
InstructionCost

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -971,26 +971,26 @@ define void @histogram_nxv4i32(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %m
971971
ret void
972972
}
973973

974-
define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) {
974+
define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) #3 {
975975
; CHECK-LABEL: 'histogram_nxv8i16'
976-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
976+
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
977977
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
978978
;
979979
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv8i16'
980-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
980+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
981981
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
982982
;
983983
call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
984984
ret void
985985
}
986986

987-
define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) {
987+
define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) #3 {
988988
; CHECK-LABEL: 'histogram_nxv16i8'
989-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
989+
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
990990
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
991991
;
992992
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv16i8'
993-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
993+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
994994
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
995995
;
996996
call void @llvm.experimental.vector.histogram.add.nxv16p0.i64(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
@@ -1049,13 +1049,13 @@ define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) {
10491049
ret void
10501050
}
10511051

1052-
define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) {
1052+
define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) #3 {
10531053
; CHECK-LABEL: 'histogram_nxv4i64'
1054-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
1054+
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
10551055
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
10561056
;
10571057
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i64'
1058-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
1058+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
10591059
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
10601060
;
10611061
call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)

0 commit comments

Comments
 (0)