Skip to content

Commit bcbdf7a

Browse files
committed
[RISCV][TTI/SLP] Add test coverage for select of constants costing
Provides coverage for an upcoming change which accounts for the cost of materializing the vector constants in the vector select.
1 parent 3675761 commit bcbdf7a

File tree

2 files changed

+80
-0
lines changed

2 files changed

+80
-0
lines changed

llvm/test/Analysis/CostModel/RISCV/rvv-select.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,3 +390,28 @@ define void @select() {
390390

391391
ret void
392392
}
393+
394+
define void @select_of_constants() {
395+
; CHECK-LABEL: 'select_of_constants'
396+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> <i64 128, i64 128>, <2 x i64> zeroinitializer
397+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = select i1 undef, <2 x i64> <i64 128, i64 127>, <2 x i64> zeroinitializer
398+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = select i1 undef, <2 x i64> <i64 0, i64 1>, <2 x i64> zeroinitializer
399+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = select i1 undef, <2 x i64> <i64 128, i64 533>, <2 x i64> <i64 0, i64 573>
400+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = select <4 x i1> undef, <4 x i32> <i32 524288, i32 262144, i32 131072, i32 65536>, <4 x i32> zeroinitializer
401+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
402+
;
403+
; Splat constants
404+
select i1 undef, <2 x i64> <i64 128, i64 128>, <2 x i64> zeroinitializer
405+
; LHS is a VID patern
406+
select i1 undef, <2 x i64> <i64 128, i64 127>, <2 x i64> zeroinitializer
407+
select i1 undef, <2 x i64> <i64 0, i64 1>, <2 x i64> zeroinitializer
408+
; 2x general (expensive) constants
409+
select i1 undef, <2 x i64> <i64 128, i64 533>, <2 x i64> <i64 0, i64 573>
410+
411+
; powers of two (still expensive)
412+
select <4 x i1> undef, <4 x i32> <i32 524288, i32 262144, i32 131072, i32 65536>, <4 x i32> zeroinitializer
413+
414+
ret void
415+
}
416+
417+
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux -mattr=+v < %s | FileCheck %s
3+
4+
define i32 @pow2_zero_constant_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
5+
; CHECK-LABEL: define i32 @pow2_zero_constant_shift(
6+
; CHECK-SAME: i16 zeroext [[A:%.*]], i16 zeroext [[B:%.*]], i16 zeroext [[C:%.*]], i16 zeroext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
8+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1
9+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2
10+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3
11+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], <i16 1, i16 1, i16 1, i16 1>
12+
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> <i32 65536, i32 65536, i32 65536, i32 65536>, <4 x i32> zeroinitializer
13+
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]])
14+
; CHECK-NEXT: ret i32 [[TMP7]]
15+
;
16+
%t39.i0 = icmp eq i16 %a, 1
17+
%t39.i1 = icmp eq i16 %b, 1
18+
%t39.i2 = icmp eq i16 %c, 1
19+
%t39.i3 = icmp eq i16 %d, 1
20+
%t40.i0 = select i1 %t39.i0, i32 65536, i32 0
21+
%t40.i1 = select i1 %t39.i1, i32 65536, i32 0
22+
%t40.i2 = select i1 %t39.i2, i32 65536, i32 0
23+
%t40.i3 = select i1 %t39.i3, i32 65536, i32 0
24+
%or.rdx0 = or i32 %t40.i0, %t40.i1
25+
%or.rdx1 = or i32 %t40.i2, %t40.i3
26+
%or.rdx2 = or i32 %or.rdx0, %or.rdx1
27+
ret i32 %or.rdx2
28+
}
29+
30+
; TODO: This case is unprofitable, and we should not be vectorizing this.
31+
define i32 @pow2_zero_variable_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
32+
; CHECK-LABEL: define i32 @pow2_zero_variable_shift(
33+
; CHECK-SAME: i16 zeroext [[A:%.*]], i16 zeroext [[B:%.*]], i16 zeroext [[C:%.*]], i16 zeroext [[D:%.*]]) #[[ATTR0]] {
34+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
35+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1
36+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2
37+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3
38+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], <i16 1, i16 1, i16 1, i16 1>
39+
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> <i32 524288, i32 262144, i32 131072, i32 65536>, <4 x i32> zeroinitializer
40+
; CHECK-NEXT: [[OR_RDX2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]])
41+
; CHECK-NEXT: ret i32 [[OR_RDX2]]
42+
;
43+
%t39.i0 = icmp eq i16 %a, 1
44+
%t39.i1 = icmp eq i16 %b, 1
45+
%t39.i2 = icmp eq i16 %c, 1
46+
%t39.i3 = icmp eq i16 %d, 1
47+
%t40.i0 = select i1 %t39.i0, i32 524288, i32 0
48+
%t40.i1 = select i1 %t39.i1, i32 262144, i32 0
49+
%t40.i2 = select i1 %t39.i2, i32 131072, i32 0
50+
%t40.i3 = select i1 %t39.i3, i32 65536, i32 0
51+
%or.rdx0 = or i32 %t40.i0, %t40.i1
52+
%or.rdx1 = or i32 %t40.i2, %t40.i3
53+
%or.rdx2 = or i32 %or.rdx0, %or.rdx1
54+
ret i32 %or.rdx2
55+
}

0 commit comments

Comments
 (0)