Skip to content

Commit def587d

Browse files
SamTebbs33DevM-uk
authored andcommitted
[AArch64] Consider histcnt smaller than i32 in the cost model (llvm#108521)
This PR updates the AArch64 cost model to consider the cheaper cost of <i32 histograms to reflect the improvements from llvm#101017 and llvm#103037 Work by Max Beck-Jones (@DevM-uk) --------- Co-authored-by: DevM-uk <[email protected]>
1 parent 36a7d56 commit def587d

File tree

2 files changed

+27
-21
lines changed

2 files changed

+27
-21
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -517,25 +517,31 @@ static bool isUnpackedVectorVT(EVT VecVT) {
517517
static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
518518
Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
519519
Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
520+
unsigned TotalHistCnts = 1;
520521

521-
// Only allow (32b and 64b) integers or pointers for now...
522-
if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) ||
523-
(EltTy->getScalarSizeInBits() != 32 &&
524-
EltTy->getScalarSizeInBits() != 64))
522+
unsigned EltSize = EltTy->getScalarSizeInBits();
523+
// Only allow (up to 64b) integers or pointers
524+
if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) || EltSize > 64)
525525
return InstructionCost::getInvalid();
526526

527-
// FIXME: Hacky check for legal vector types. We can promote smaller types
528-
// but we cannot legalize vectors via splitting for histcnt.
529527
// FIXME: We should be able to generate histcnt for fixed-length vectors
530528
// using ptrue with a specific VL.
531-
if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy))
532-
if ((VTy->getElementCount().getKnownMinValue() != 2 &&
533-
VTy->getElementCount().getKnownMinValue() != 4) ||
534-
VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 ||
535-
!VTy->isScalableTy())
529+
if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy)) {
530+
unsigned EC = VTy->getElementCount().getKnownMinValue();
531+
if (!isPowerOf2_64(EC) || !VTy->isScalableTy())
536532
return InstructionCost::getInvalid();
537533

538-
return InstructionCost(BaseHistCntCost);
534+
// HistCnt only supports 32b and 64b element types
535+
unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
536+
537+
if (EC == 2 || (!LegalEltSize == 32 && EC == 4))
538+
return InstructionCost(BaseHistCntCost);
539+
540+
unsigned NaturalVectorWidth = AArch64::SVEBitsPerBlock / LegalEltSize;
541+
TotalHistCnts = EC / NaturalVectorWidth;
542+
}
543+
544+
return InstructionCost(BaseHistCntCost * TotalHistCnts);
539545
}
540546

541547
InstructionCost

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -971,26 +971,26 @@ define void @histogram_nxv4i32(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %m
971971
ret void
972972
}
973973

974-
define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) {
974+
define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) #3 {
975975
; CHECK-LABEL: 'histogram_nxv8i16'
976-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
976+
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
977977
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
978978
;
979979
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv8i16'
980-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
980+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
981981
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
982982
;
983983
call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
984984
ret void
985985
}
986986

987-
define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) {
987+
define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) #3 {
988988
; CHECK-LABEL: 'histogram_nxv16i8'
989-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
989+
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
990990
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
991991
;
992992
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv16i8'
993-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
993+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
994994
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
995995
;
996996
call void @llvm.experimental.vector.histogram.add.nxv16p0.i64(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
@@ -1049,13 +1049,13 @@ define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) {
10491049
ret void
10501050
}
10511051

1052-
define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) {
1052+
define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) #3 {
10531053
; CHECK-LABEL: 'histogram_nxv4i64'
1054-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
1054+
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
10551055
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
10561056
;
10571057
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i64'
1058-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
1058+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
10591059
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
10601060
;
10611061
call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)

0 commit comments

Comments
 (0)