Skip to content

Commit 4b7770e

Browse files
committed
[VectorCombine] Add test coverage for reduce(trunc(X)) -> trunc(reduce(X)) folds
1 parent 2ed2a3a commit 4b7770e

File tree

1 file changed

+121
-0
lines changed

1 file changed

+121
-0
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,X64
3+
; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,X64
4+
; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,X64
5+
; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
6+
7+
;
8+
; Fold reduce(trunc(X)) -> trunc(reduce(X)) if more cost efficient
9+
;
10+
11+
; TODO: Cheap AVX512 v8i64 -> v8i32 truncation
12+
define i32 @reduce_add_trunc_v8i64_i32(<8 x i64> %a0) {
13+
; CHECK-LABEL: @reduce_add_trunc_v8i64_i32(
14+
; CHECK-NEXT: [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i32>
15+
; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TR]])
16+
; CHECK-NEXT: ret i32 [[RED]]
17+
;
18+
%tr = trunc <8 x i64> %a0 to <8 x i32>
19+
%red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %tr)
20+
ret i32 %red
21+
}
22+
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
23+
24+
; TODO: No legal vXi8 multiplication so vXi16 is always cheaper
25+
define i8 @reduce_mul_trunc_v16i16_i8(<16 x i16> %a0) {
26+
; CHECK-LABEL: @reduce_mul_trunc_v16i16_i8(
27+
; CHECK-NEXT: [[TR:%.*]] = trunc <16 x i16> [[A0:%.*]] to <16 x i8>
28+
; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> [[TR]])
29+
; CHECK-NEXT: ret i8 [[RED]]
30+
;
31+
%tr = trunc <16 x i16> %a0 to <16 x i8>
32+
%red = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %tr)
33+
ret i8 %red
34+
}
35+
declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
36+
37+
define i8 @reduce_or_trunc_v8i32_i8(<8 x i32> %a0) {
38+
; CHECK-LABEL: @reduce_or_trunc_v8i32_i8(
39+
; CHECK-NEXT: [[TR:%.*]] = trunc <8 x i32> [[A0:%.*]] to <8 x i8>
40+
; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TR]])
41+
; CHECK-NEXT: ret i8 [[RED]]
42+
;
43+
%tr = trunc <8 x i32> %a0 to <8 x i8>
44+
%red = tail call i8 @llvm.vector.reduce.or.v8i32(<8 x i8> %tr)
45+
ret i8 %red
46+
}
47+
declare i32 @llvm.vector.reduce.or.v8i8(<8 x i8>)
48+
49+
define i8 @reduce_xor_trunc_v16i64_i8(<16 x i64> %a0) {
50+
; CHECK-LABEL: @reduce_xor_trunc_v16i64_i8(
51+
; CHECK-NEXT: [[TR:%.*]] = trunc <16 x i64> [[A0:%.*]] to <16 x i8>
52+
; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> [[TR]])
53+
; CHECK-NEXT: ret i8 [[RED]]
54+
;
55+
%tr = trunc <16 x i64> %a0 to <16 x i8>
56+
%red = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %tr)
57+
ret i8 %red
58+
}
59+
declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
60+
61+
; Truncation source has other uses - OK to truncate reduction
62+
define i16 @reduce_and_trunc_v16i64_i16(<16 x i64> %a0) {
63+
; CHECK-LABEL: @reduce_and_trunc_v16i64_i16(
64+
; CHECK-NEXT: [[TR:%.*]] = trunc <16 x i64> [[A0:%.*]] to <16 x i16>
65+
; CHECK-NEXT: [[RED:%.*]] = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> [[TR]])
66+
; CHECK-NEXT: call void @use_v16i64(<16 x i64> [[A0]])
67+
; CHECK-NEXT: ret i16 [[RED]]
68+
;
69+
%tr = trunc <16 x i64> %a0 to <16 x i16>
70+
%red = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %tr)
71+
call void @use_v16i64(<16 x i64> %a0)
72+
ret i16 %red
73+
}
74+
declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
75+
76+
; Negative Test: vXi16 multiply is much cheaper than vXi64
77+
define i16 @reduce_mul_trunc_v8i64_i16(<8 x i64> %a0) {
78+
; CHECK-LABEL: @reduce_mul_trunc_v8i64_i16(
79+
; CHECK-NEXT: [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i16>
80+
; CHECK-NEXT: [[RED:%.*]] = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[TR]])
81+
; CHECK-NEXT: ret i16 [[RED]]
82+
;
83+
%tr = trunc <8 x i64> %a0 to <8 x i16>
84+
%red = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %tr)
85+
ret i16 %red
86+
}
87+
declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
88+
89+
; Negative Test: min/max reductions can't use pre-truncated types.
90+
define i8 @reduce_smin_trunc_v16i16_i8(<16 x i16> %a0) {
91+
; CHECK-LABEL: @reduce_smin_trunc_v16i16_i8(
92+
; CHECK-NEXT: [[TR:%.*]] = trunc <16 x i16> [[A0:%.*]] to <16 x i8>
93+
; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[TR]])
94+
; CHECK-NEXT: ret i8 [[RED]]
95+
;
96+
%tr = trunc <16 x i16> %a0 to <16 x i8>
97+
%red = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %tr)
98+
ret i8 %red
99+
}
100+
declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
101+
102+
; Negative Test: Truncation has other uses.
103+
define i16 @reduce_and_trunc_v16i64_i16_multiuse(<16 x i64> %a0) {
104+
; CHECK-LABEL: @reduce_and_trunc_v16i64_i16_multiuse(
105+
; CHECK-NEXT: [[TR:%.*]] = trunc <16 x i64> [[A0:%.*]] to <16 x i16>
106+
; CHECK-NEXT: [[RED:%.*]] = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> [[TR]])
107+
; CHECK-NEXT: call void @use_v16i16(<16 x i16> [[TR]])
108+
; CHECK-NEXT: ret i16 [[RED]]
109+
;
110+
%tr = trunc <16 x i64> %a0 to <16 x i16>
111+
%red = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %tr)
112+
call void @use_v16i16(<16 x i16> %tr)
113+
ret i16 %red
114+
}
115+
116+
declare void @use_v16i64(<16 x i64>)
117+
declare void @use_v16i16(<16 x i16>)
118+
119+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
120+
; AVX512: {{.*}}
121+
; X64: {{.*}}

0 commit comments

Comments
 (0)