Skip to content

Commit ee5e122

Browse files
committed
[VectorCombine] Add test coverage for reduce(trunc(X)) -> trunc(reduce(X)) folds
1 parent 9cc98e3 commit ee5e122

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,X64
3+
; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,X64
4+
; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,X64
5+
; RUN: opt < %s -S --passes=vector-combine -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512
6+
7+
;
8+
; Fold reduce(trunc(X)) -> trunc(reduce(X)) if more cost efficient
9+
;
10+
11+
; TODO: Cheap AVX512 v8i64 -> v8i32 truncation
12+
define i32 @reduce_add_trunc_v8i64_i32(<8 x i64> %a0) {
13+
; CHECK-LABEL: @reduce_add_trunc_v8i64_i32(
14+
; CHECK-NEXT: [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i32>
15+
; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TR]])
16+
; CHECK-NEXT: ret i32 [[RED]]
17+
;
18+
%tr = trunc <8 x i64> %a0 to <8 x i32>
19+
%red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %tr)
20+
ret i32 %red
21+
}
22+
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
23+
24+
; TODO: No legal vXi8 multiplication so vXi16 is always cheaper
25+
define i8 @reduce_mul_trunc_v16i16_i8(<16 x i16> %a0) {
26+
; CHECK-LABEL: @reduce_mul_trunc_v16i16_i8(
27+
; CHECK-NEXT: [[TR:%.*]] = trunc <16 x i16> [[A0:%.*]] to <16 x i8>
28+
; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> [[TR]])
29+
; CHECK-NEXT: ret i8 [[RED]]
30+
;
31+
%tr = trunc <16 x i16> %a0 to <16 x i8>
32+
%red = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %tr)
33+
ret i8 %red
34+
}
35+
declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
36+
37+
define i8 @reduce_or_trunc_v8i32_i8(<8 x i32> %a0) {
38+
; CHECK-LABEL: @reduce_or_trunc_v8i32_i8(
39+
; CHECK-NEXT: [[TR:%.*]] = trunc <8 x i32> [[A0:%.*]] to <8 x i8>
40+
; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TR]])
41+
; CHECK-NEXT: ret i8 [[RED]]
42+
;
43+
%tr = trunc <8 x i32> %a0 to <8 x i8>
44+
%red = tail call i8 @llvm.vector.reduce.or.v8i32(<8 x i8> %tr)
45+
ret i8 %red
46+
}
47+
declare i32 @llvm.vector.reduce.or.v8i8(<8 x i8>)
48+
49+
define i8 @reduce_xor_trunc_v16i64_i8(<16 x i64> %a0) {
50+
; CHECK-LABEL: @reduce_xor_trunc_v16i64_i8(
51+
; CHECK-NEXT: [[TR:%.*]] = trunc <16 x i64> [[A0:%.*]] to <16 x i8>
52+
; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> [[TR]])
53+
; CHECK-NEXT: ret i8 [[RED]]
54+
;
55+
%tr = trunc <16 x i64> %a0 to <16 x i8>
56+
%red = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %tr)
57+
ret i8 %red
58+
}
59+
declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
60+
61+
; Negative Test: vXi16 multiply is much cheaper than vXi64
62+
define i16 @reduce_mul_trunc_v8i64_i16(<8 x i64> %a0) {
63+
; CHECK-LABEL: @reduce_mul_trunc_v8i64_i16(
64+
; CHECK-NEXT: [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i16>
65+
; CHECK-NEXT: [[RED:%.*]] = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[TR]])
66+
; CHECK-NEXT: ret i16 [[RED]]
67+
;
68+
%tr = trunc <8 x i64> %a0 to <8 x i16>
69+
%red = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %tr)
70+
ret i16 %red
71+
}
72+
declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
73+
74+
; Negative Test: min/max reductions can't use pre-truncated types.
75+
define i8 @reduce_smin_trunc_v16i16_i8(<16 x i16> %a0) {
76+
; CHECK-LABEL: @reduce_smin_trunc_v16i16_i8(
77+
; CHECK-NEXT: [[TR:%.*]] = trunc <16 x i16> [[A0:%.*]] to <16 x i8>
78+
; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[TR]])
79+
; CHECK-NEXT: ret i8 [[RED]]
80+
;
81+
%tr = trunc <16 x i16> %a0 to <16 x i8>
82+
%red = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %tr)
83+
ret i8 %red
84+
}
85+
declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
86+
87+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
88+
; AVX512: {{.*}}
89+
; X64: {{.*}}

0 commit comments

Comments
 (0)