1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2
- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK
3
- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK
2
+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
3
+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
4
4
5
5
declare void @use (float )
6
6
7
+ ; TODO: The insert is costed as free, so creating a shuffle appears to be a loss.
8
+
7
9
define <4 x float > @ext0_v4f32 (<4 x float > %x , <4 x float > %y ) {
8
10
; CHECK-LABEL: @ext0_v4f32(
9
11
; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
@@ -21,9 +23,8 @@ define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
21
23
22
24
define <4 x float > @ext2_v4f32 (<4 x float > %x , <4 x float > %y ) {
23
25
; CHECK-LABEL: @ext2_v4f32(
24
- ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
25
- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
26
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 2
26
+ ; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X:%.*]]
27
+ ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
27
28
; CHECK-NEXT: ret <4 x float> [[R]]
28
29
;
29
30
%e = extractelement <4 x float > %x , i32 2
@@ -36,9 +37,8 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
36
37
37
38
define <2 x double > @ext1_v2f64 (<2 x double > %x , <2 x double > %y ) {
38
39
; CHECK-LABEL: @ext1_v2f64(
39
- ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
40
- ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
41
- ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1
40
+ ; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
41
+ ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
42
42
; CHECK-NEXT: ret <2 x double> [[R]]
43
43
;
44
44
%e = extractelement <2 x double > %x , i32 1
@@ -47,26 +47,43 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
47
47
ret <2 x double > %r
48
48
}
49
49
50
+ ; The vector fneg would cost twice as much as the scalar op with SSE,
51
+ ; so we don't transform there (the shuffle would also be more expensive).
52
+
50
53
define <8 x float > @ext7_v8f32 (<8 x float > %x , <8 x float > %y ) {
51
- ; CHECK-LABEL: @ext7_v8f32(
52
- ; CHECK-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
53
- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
54
- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
55
- ; CHECK-NEXT: ret <8 x float> [[R]]
54
+ ; SSE-LABEL: @ext7_v8f32(
55
+ ; SSE-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
56
+ ; SSE-NEXT: [[N:%.*]] = fneg float [[E]]
57
+ ; SSE-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
58
+ ; SSE-NEXT: ret <8 x float> [[R]]
59
+ ;
60
+ ; AVX-LABEL: @ext7_v8f32(
61
+ ; AVX-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X:%.*]]
62
+ ; AVX-NEXT: [[R:%.*]] = shufflevector <8 x float> [[Y:%.*]], <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
63
+ ; AVX-NEXT: ret <8 x float> [[R]]
56
64
;
57
65
%e = extractelement <8 x float > %x , i32 7
58
66
%n = fneg float %e
59
67
%r = insertelement <8 x float > %y , float %n , i32 7
60
68
ret <8 x float > %r
61
69
}
62
70
71
+ ; Same as above with an extra use of the extracted element.
72
+
63
73
define <8 x float > @ext7_v8f32_use1 (<8 x float > %x , <8 x float > %y ) {
64
- ; CHECK-LABEL: @ext7_v8f32_use1(
65
- ; CHECK-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 5
66
- ; CHECK-NEXT: call void @use(float [[E]])
67
- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
68
- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 5
69
- ; CHECK-NEXT: ret <8 x float> [[R]]
74
+ ; SSE-LABEL: @ext7_v8f32_use1(
75
+ ; SSE-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 5
76
+ ; SSE-NEXT: call void @use(float [[E]])
77
+ ; SSE-NEXT: [[N:%.*]] = fneg float [[E]]
78
+ ; SSE-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 5
79
+ ; SSE-NEXT: ret <8 x float> [[R]]
80
+ ;
81
+ ; AVX-LABEL: @ext7_v8f32_use1(
82
+ ; AVX-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 5
83
+ ; AVX-NEXT: call void @use(float [[E]])
84
+ ; AVX-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X]]
85
+ ; AVX-NEXT: [[R:%.*]] = shufflevector <8 x float> [[Y:%.*]], <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 6, i32 7>
86
+ ; AVX-NEXT: ret <8 x float> [[R]]
70
87
;
71
88
%e = extractelement <8 x float > %x , i32 5
72
89
call void @use (float %e )
@@ -75,6 +92,8 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
75
92
ret <8 x float > %r
76
93
}
77
94
95
+ ; Negative test - the transform is likely not profitable if the fneg has another use.
96
+
78
97
define <8 x float > @ext7_v8f32_use2 (<8 x float > %x , <8 x float > %y ) {
79
98
; CHECK-LABEL: @ext7_v8f32_use2(
80
99
; CHECK-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 3
@@ -90,6 +109,8 @@ define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
90
109
ret <8 x float > %r
91
110
}
92
111
112
+ ; Negative test - can't convert variable index to a shuffle.
113
+
93
114
define <2 x double > @ext_index_var_v2f64 (<2 x double > %x , <2 x double > %y , i32 %index ) {
94
115
; CHECK-LABEL: @ext_index_var_v2f64(
95
116
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]]
@@ -103,6 +124,9 @@ define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %
103
124
ret <2 x double > %r
104
125
}
105
126
127
+ ; Negative test - require same extract/insert index for simple shuffle.
128
+ ; TODO: We could handle this by adjusting the cost calculation.
129
+
106
130
define <2 x double > @ext1_v2f64_ins0 (<2 x double > %x , <2 x double > %y ) {
107
131
; CHECK-LABEL: @ext1_v2f64_ins0(
108
132
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
@@ -116,6 +140,8 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
116
140
ret <2 x double > %r
117
141
}
118
142
143
+ ; Negative test - avoid changing poison ops
144
+
119
145
define <4 x float > @ext12_v4f32 (<4 x float > %x , <4 x float > %y ) {
120
146
; CHECK-LABEL: @ext12_v4f32(
121
147
; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 12
0 commit comments