8
8
; Fold reduce(trunc(X)) -> trunc(reduce(X)) if more cost efficient
9
9
;
10
10
11
- ; TODO: Cheap AVX512 v8i64 -> v8i32 truncation
11
+ ; Cheap AVX512 v8i64 -> v8i32 truncation
12
12
define i32 @reduce_add_trunc_v8i64_i32 (<8 x i64 > %a0 ) {
13
- ; CHECK-LABEL: @reduce_add_trunc_v8i64_i32(
14
- ; CHECK-NEXT: [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i32>
15
- ; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TR]])
16
- ; CHECK-NEXT: ret i32 [[RED]]
13
+ ; X64-LABEL: @reduce_add_trunc_v8i64_i32(
14
+ ; X64-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[A0:%.*]])
15
+ ; X64-NEXT: [[RED:%.*]] = trunc i64 [[TMP1]] to i32
16
+ ; X64-NEXT: ret i32 [[RED]]
17
+ ;
18
+ ; AVX512-LABEL: @reduce_add_trunc_v8i64_i32(
19
+ ; AVX512-NEXT: [[TR:%.*]] = trunc <8 x i64> [[A0:%.*]] to <8 x i32>
20
+ ; AVX512-NEXT: [[RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TR]])
21
+ ; AVX512-NEXT: ret i32 [[RED]]
17
22
;
18
23
%tr = trunc <8 x i64 > %a0 to <8 x i32 >
19
24
%red = tail call i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 > %tr )
20
25
ret i32 %red
21
26
}
22
27
declare i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 >)
23
28
24
- ; TODO: No legal vXi8 multiplication so vXi16 is always cheaper
29
+ ; No legal vXi8 multiplication so vXi16 is always cheaper
25
30
define i8 @reduce_mul_trunc_v16i16_i8 (<16 x i16 > %a0 ) {
26
31
; CHECK-LABEL: @reduce_mul_trunc_v16i16_i8(
27
- ; CHECK-NEXT: [[TR :%.*]] = trunc <16 x i16> [[A0:%.*]] to <16 x i8>
28
- ; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> [[TR]])
32
+ ; CHECK-NEXT: [[TMP1 :%.*]] = call i16 @llvm.vector.reduce.mul.v16i16( <16 x i16> [[A0:%.*]])
33
+ ; CHECK-NEXT: [[RED:%.*]] = trunc i16 [[TMP1]] to i8
29
34
; CHECK-NEXT: ret i8 [[RED]]
30
35
;
31
36
%tr = trunc <16 x i16 > %a0 to <16 x i8 >
@@ -36,8 +41,8 @@ declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
36
41
37
42
define i8 @reduce_or_trunc_v8i32_i8 (<8 x i32 > %a0 ) {
38
43
; CHECK-LABEL: @reduce_or_trunc_v8i32_i8(
39
- ; CHECK-NEXT: [[TR :%.*]] = trunc <8 x i32> [[A0:%.*]] to <8 x i8>
40
- ; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TR]])
44
+ ; CHECK-NEXT: [[TMP1 :%.*]] = call i32 @llvm.vector.reduce.or.v8i32( <8 x i32> [[A0:%.*]])
45
+ ; CHECK-NEXT: [[RED:%.*]] = trunc i32 [[TMP1]] to i8
41
46
; CHECK-NEXT: ret i8 [[RED]]
42
47
;
43
48
%tr = trunc <8 x i32 > %a0 to <8 x i8 >
@@ -48,8 +53,8 @@ declare i32 @llvm.vector.reduce.or.v8i8(<8 x i8>)
48
53
49
54
define i8 @reduce_xor_trunc_v16i64_i8 (<16 x i64 > %a0 ) {
50
55
; CHECK-LABEL: @reduce_xor_trunc_v16i64_i8(
51
- ; CHECK-NEXT: [[TR :%.*]] = trunc <16 x i64> [[A0:%.*]] to <16 x i8>
52
- ; CHECK-NEXT: [[RED:%.*]] = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> [[TR]])
56
+ ; CHECK-NEXT: [[TMP1 :%.*]] = call i64 @llvm.vector.reduce.xor.v16i64( <16 x i64> [[A0:%.*]])
57
+ ; CHECK-NEXT: [[RED:%.*]] = trunc i64 [[TMP1]] to i8
53
58
; CHECK-NEXT: ret i8 [[RED]]
54
59
;
55
60
%tr = trunc <16 x i64 > %a0 to <16 x i8 >
@@ -61,8 +66,8 @@ declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
61
66
; Truncation source has other uses - OK to truncate reduction
62
67
define i16 @reduce_and_trunc_v16i64_i16 (<16 x i64 > %a0 ) {
63
68
; CHECK-LABEL: @reduce_and_trunc_v16i64_i16(
64
- ; CHECK-NEXT: [[TR :%.*]] = trunc <16 x i64> [[A0:%.*]] to <16 x i16>
65
- ; CHECK-NEXT: [[RED:%.*]] = tail call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> [[TR]])
69
+ ; CHECK-NEXT: [[TMP1 :%.*]] = call i64 @llvm.vector.reduce.and.v16i64( <16 x i64> [[A0:%.*]])
70
+ ; CHECK-NEXT: [[RED:%.*]] = trunc i64 [[TMP1]] to i16
66
71
; CHECK-NEXT: call void @use_v16i64(<16 x i64> [[A0]])
67
72
; CHECK-NEXT: ret i16 [[RED]]
68
73
;
@@ -116,6 +121,3 @@ define i16 @reduce_and_trunc_v16i64_i16_multiuse(<16 x i64> %a0) {
116
121
declare void @use_v16i64 (<16 x i64 >)
117
122
declare void @use_v16i16 (<16 x i16 >)
118
123
119
- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
120
- ; AVX512: {{.*}}
121
- ; X64: {{.*}}
0 commit comments