Skip to content

Commit 536095a

Browse files
committed
[RISCV] Refine costs for i1 reductions
Our actual lowering for i1 reductions uses ctpop combined with possibly a vector negate and possibly a logic op afterwards. I believe ctpop to be low cost on all reasonable hardware. The default costing implementation here was returning quite inconsistent costs. and/or were returning very high costs (because we seem to think moving into scalar registers is very expensive?) and others were returning lower but still too high (because of the assumed tree reduce strategy). While we should probably improve the generic costing strategy for i1 vectors, let's start by fixing the immediate problem. Differential Revision: https://reviews.llvm.org/D127511
1 parent f7bb691 commit 536095a

File tree

7 files changed

+105
-109
lines changed

7 files changed

+105
-109
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -323,10 +323,15 @@ RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
323323
if (Ty->getScalarSizeInBits() > ST->getELEN())
324324
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
325325

326+
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
327+
if (Ty->getElementType()->isIntegerTy(1))
328+
// vcpop sequences, see vreduction-mask.ll. umax, smin actually only
329+
// cost 2, but we don't have enough info here so we slightly over cost.
330+
return (LT.first - 1) + 3;
331+
326332
// IR Reduction is composed by two vmv and one rvv reduction instruction.
327333
InstructionCost BaseCost = 2;
328334
unsigned VL = cast<FixedVectorType>(Ty)->getNumElements();
329-
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
330335
return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
331336
}
332337

@@ -338,10 +343,6 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *VTy,
338343
if (!isa<FixedVectorType>(VTy))
339344
return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);
340345

341-
// FIXME: Do not support i1 and/or reduction now.
342-
if (VTy->getElementType()->isIntegerTy(1))
343-
return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);
344-
345346
if (!ST->useRVVForFixedLengthVectors())
346347
return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);
347348

@@ -356,11 +357,14 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *VTy,
356357
ISD != ISD::FADD)
357358
return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);
358359

360+
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VTy);
361+
if (VTy->getElementType()->isIntegerTy(1))
362+
// vcpop sequences, see vreduction-mask.ll
363+
return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
364+
359365
// IR Reduction is composed by two vmv and one rvv reduction instruction.
360366
InstructionCost BaseCost = 2;
361367
unsigned VL = cast<FixedVectorType>(VTy)->getNumElements();
362-
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VTy);
363-
364368
if (TTI::requiresOrderedReduction(FMF))
365369
return (LT.first - 1) + BaseCost + VL;
366370
return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);

llvm/test/Analysis/CostModel/RISCV/reduce-add.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44

55
define i32 @reduce_i1(i32 %arg) {
66
; CHECK-LABEL: 'reduce_i1'
7-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
8-
; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
9-
; CHECK-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 121 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 321 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
12-
; CHECK-NEXT: Cost Model: Found an estimated cost of 801 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
13-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1921 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
14-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4481 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
7+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
13+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
1515
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1616
;
1717
%V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)

llvm/test/Analysis/CostModel/RISCV/reduce-and.ll

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,21 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2-
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RISCV32
3-
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RISCV64
2+
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
3+
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
44

55
define i32 @reduce_i1(i32 %arg) {
6-
; RISCV32-LABEL: 'reduce_i1'
7-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
8-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
9-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
10-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
11-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
12-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
13-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
14-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
15-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
16-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
17-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
18-
; RISCV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
19-
;
20-
; RISCV64-LABEL: 'reduce_i1'
21-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
22-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
23-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
24-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
25-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
26-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
27-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
28-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
29-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
30-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
31-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 1040 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
32-
; RISCV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
6+
; CHECK-LABEL: 'reduce_i1'
7+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
13+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
15+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
16+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
17+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
18+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
3319
;
3420
%V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
3521
%V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)

llvm/test/Analysis/CostModel/RISCV/reduce-max.ll

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44

55
define i32 @reduce_umin_i1(i32 %arg) {
66
; CHECK-LABEL: 'reduce_umin_i1'
7-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
7+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
88
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umax.v2i1(<2 x i1> undef)
9-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> undef)
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i1 @llvm.vector.reduce.umax.v8i1(<8 x i1> undef)
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i1 @llvm.vector.reduce.umax.v16i1(<16 x i1> undef)
12-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
13-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
14-
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> undef)
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umax.v8i1(<8 x i1> undef)
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.umax.v16i1(<16 x i1> undef)
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
13+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
1515
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
16+
;
1617
%V1 = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
1718
%V2 = call i1 @llvm.vector.reduce.umax.v2i1(<2 x i1> undef)
1819
%V4 = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> undef)
@@ -35,6 +36,7 @@ define i32 @reduce_umax_i8(i32 %arg) {
3536
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
3637
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
3738
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
39+
;
3840
%V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
3941
%V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
4042
%V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
@@ -57,6 +59,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
5759
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
5860
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
5961
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
62+
;
6063
%V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
6164
%V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
6265
%V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
@@ -79,6 +82,7 @@ define i32 @reduce_umax_i32(i32 %arg) {
7982
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
8083
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
8184
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
85+
;
8286
%V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
8387
%V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
8488
%V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
@@ -101,6 +105,7 @@ define i32 @reduce_umax_i64(i32 %arg) {
101105
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
102106
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
103107
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
108+
;
104109
%V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
105110
%V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
106111
%V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
@@ -114,15 +119,16 @@ define i32 @reduce_umax_i64(i32 %arg) {
114119

115120
define i32 @reduce_smin_i1(i32 %arg) {
116121
; CHECK-LABEL: 'reduce_smin_i1'
117-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
122+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
118123
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
119-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
120-
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
121-
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> undef)
122-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
123-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
124-
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
124+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
125+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
126+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> undef)
127+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
128+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
129+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
125130
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
131+
;
126132
%V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
127133
%V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
128134
%V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
@@ -145,6 +151,7 @@ define i32 @reduce_smax_i8(i32 %arg) {
145151
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
146152
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
147153
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
154+
;
148155
%V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
149156
%V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
150157
%V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
@@ -167,6 +174,7 @@ define i32 @reduce_smax_i16(i32 %arg) {
167174
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
168175
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
169176
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
177+
;
170178
%V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
171179
%V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
172180
%V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
@@ -189,6 +197,7 @@ define i32 @reduce_smax_i32(i32 %arg) {
189197
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
190198
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
191199
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
200+
;
192201
%V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
193202
%V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
194203
%V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
@@ -211,6 +220,7 @@ define i32 @reduce_smax_i64(i32 %arg) {
211220
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
212221
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
213222
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
223+
;
214224
%V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
215225
%V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
216226
%V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)

0 commit comments

Comments
 (0)