Skip to content

Commit 4cef24a

Browse files
committed
[ARM] Improve reduction integer min/max costs
This adds some basic smin/smax/umin/umax reduction costs for MVE/NEON, similar to the existing Add reduction costs. They follow the same style as Add reductions, but include a higher cost as the costs tend to be dependant on the element size for vminv/vmaxv. These costs may not be precise, but will be more inline than the default that extracts each element.
1 parent 62e90db commit 4cef24a

File tree

5 files changed

+124
-108
lines changed

5 files changed

+124
-108
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,6 +1826,22 @@ ARMTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
18261826
(NumElts - 1) * getIntrinsicInstrCost(ICA, CostKind);
18271827
}
18281828

1829+
if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
1830+
IID == Intrinsic::umin || IID == Intrinsic::umax) {
1831+
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1832+
1833+
// All costs are the same for u/s min/max. These lower to vminv, which are
1834+
// given a slightly higher cost as they tend to take multiple cycles for
1835+
// smaller type sizes.
1836+
static const CostTblEntry CostTblAdd[]{
1837+
{ISD::SMIN, MVT::v16i8, 4},
1838+
{ISD::SMIN, MVT::v8i16, 3},
1839+
{ISD::SMIN, MVT::v4i32, 2},
1840+
};
1841+
if (const auto *Entry = CostTableLookup(CostTblAdd, ISD::SMIN, LT.second))
1842+
return Entry->Cost * ST->getMVEVectorCostFactor(CostKind) * LT.first;
1843+
}
1844+
18291845
return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
18301846
}
18311847

llvm/test/Analysis/CostModel/ARM/reduce-smax.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,18 @@ define i32 @reduce_i32(i32 %arg) {
4949
;
5050
; NEON-LABEL: 'reduce_i32'
5151
; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
52-
; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
53-
; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
54-
; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
55-
; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
52+
; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
53+
; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
54+
; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
55+
; NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
5656
; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
5757
;
5858
; MVE-LABEL: 'reduce_i32'
5959
; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
60-
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
61-
; MVE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
62-
; MVE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
63-
; MVE-NEXT: Cost Model: Found an estimated cost of 712 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
60+
; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
61+
; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
62+
; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
63+
; MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
6464
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
6565
;
6666
%V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
@@ -84,19 +84,19 @@ define i32 @reduce_i16(i32 %arg) {
8484
; NEON-LABEL: 'reduce_i16'
8585
; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
8686
; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
87-
; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
88-
; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
89-
; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
90-
; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
87+
; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
88+
; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
89+
; NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
90+
; NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
9191
; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
9292
;
9393
; MVE-LABEL: 'reduce_i16'
9494
; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
95-
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
96-
; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
97-
; MVE-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
98-
; MVE-NEXT: Cost Model: Found an estimated cost of 860 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
99-
; MVE-NEXT: Cost Model: Found an estimated cost of 1516 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
95+
; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
96+
; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
97+
; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
98+
; MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
99+
; MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
100100
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
101101
;
102102
%V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
@@ -123,20 +123,20 @@ define i32 @reduce_i8(i32 %arg) {
123123
; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
124124
; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
125125
; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
126-
; NEON-NEXT: Cost Model: Found an estimated cost of 395 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
127-
; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
128-
; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
129-
; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
126+
; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
127+
; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
128+
; NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
129+
; NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
130130
; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
131131
;
132132
; MVE-LABEL: 'reduce_i8'
133133
; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
134-
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
135-
; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
136-
; MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
137-
; MVE-NEXT: Cost Model: Found an estimated cost of 1304 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
138-
; MVE-NEXT: Cost Model: Found an estimated cost of 1952 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
139-
; MVE-NEXT: Cost Model: Found an estimated cost of 3248 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
134+
; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
135+
; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
136+
; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
137+
; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
138+
; MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
139+
; MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
140140
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
141141
;
142142
%V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)

llvm/test/Analysis/CostModel/ARM/reduce-smin.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,18 @@ define i32 @reduce_i32(i32 %arg) {
4949
;
5050
; NEON-LABEL: 'reduce_i32'
5151
; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
52-
; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
53-
; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
54-
; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
55-
; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
52+
; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
53+
; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
54+
; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
55+
; NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
5656
; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
5757
;
5858
; MVE-LABEL: 'reduce_i32'
5959
; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
60-
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
61-
; MVE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
62-
; MVE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
63-
; MVE-NEXT: Cost Model: Found an estimated cost of 712 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
60+
; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
61+
; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
62+
; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
63+
; MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
6464
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
6565
;
6666
%V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
@@ -84,19 +84,19 @@ define i32 @reduce_i16(i32 %arg) {
8484
; NEON-LABEL: 'reduce_i16'
8585
; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
8686
; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
87-
; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
88-
; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
89-
; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
90-
; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
87+
; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
88+
; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
89+
; NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
90+
; NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
9191
; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
9292
;
9393
; MVE-LABEL: 'reduce_i16'
9494
; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
95-
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
96-
; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
97-
; MVE-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
98-
; MVE-NEXT: Cost Model: Found an estimated cost of 860 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
99-
; MVE-NEXT: Cost Model: Found an estimated cost of 1516 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
95+
; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
96+
; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
97+
; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
98+
; MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
99+
; MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
100100
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
101101
;
102102
%V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
@@ -123,20 +123,20 @@ define i32 @reduce_i8(i32 %arg) {
123123
; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
124124
; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
125125
; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
126-
; NEON-NEXT: Cost Model: Found an estimated cost of 395 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
127-
; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
128-
; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
129-
; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
126+
; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
127+
; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
128+
; NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
129+
; NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
130130
; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
131131
;
132132
; MVE-LABEL: 'reduce_i8'
133133
; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
134-
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
135-
; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
136-
; MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
137-
; MVE-NEXT: Cost Model: Found an estimated cost of 1304 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
138-
; MVE-NEXT: Cost Model: Found an estimated cost of 1952 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
139-
; MVE-NEXT: Cost Model: Found an estimated cost of 3248 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
134+
; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
135+
; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
136+
; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
137+
; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
138+
; MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
139+
; MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
140140
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
141141
;
142142
%V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)

0 commit comments

Comments
 (0)