@@ -134,15 +134,14 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
134
134
ret <2 x i64 > %r
135
135
}
136
136
137
- ; Negative test
138
- ; TODO: extra use can be accounted for in cost calculation.
137
+ ; Extra use is accounted for in cost calculation.
139
138
140
139
define <4 x i32 > @ins0_ins0_xor (i32 %x , i32 %y ) {
141
140
; CHECK-LABEL: @ins0_ins0_xor(
142
141
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
143
142
; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
144
- ; CHECK-NEXT: [[I1 :%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
145
- ; CHECK-NEXT: [[R:%.*]] = xor <4 x i32> [[I0]], [[I1]]
143
+ ; CHECK-NEXT: [[R_SCALAR :%.*]] = xor i32 [[X]], [[Y:%.*]]
144
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
146
145
; CHECK-NEXT: ret <4 x i32> [[R]]
147
146
;
148
147
%i0 = insertelement <4 x i32 > undef , i32 %x , i32 0
@@ -152,12 +151,14 @@ define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
152
151
ret <4 x i32 > %r
153
152
}
154
153
154
+ ; Extra use is accounted for in cost calculation.
155
+
155
156
define <4 x float > @ins1_ins1_fmul (float %x , float %y ) {
156
157
; CHECK-LABEL: @ins1_ins1_fmul(
157
- ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 1
158
158
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 1
159
159
; CHECK-NEXT: call void @usef(<4 x float> [[I1]])
160
- ; CHECK-NEXT: [[R:%.*]] = fmul <4 x float> [[I0]], [[I1]]
160
+ ; CHECK-NEXT: [[R_SCALAR:%.*]] = fmul float [[X:%.*]], [[Y]]
161
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[R_SCALAR]], i64 1
161
162
; CHECK-NEXT: ret <4 x float> [[R]]
162
163
;
163
164
%i0 = insertelement <4 x float > undef , float %x , i32 1
@@ -167,6 +168,8 @@ define <4 x float> @ins1_ins1_fmul(float %x, float %y) {
167
168
ret <4 x float > %r
168
169
}
169
170
171
+ ; If the scalar binop is not cheaper than the vector binop, extra uses can prevent the transform.
172
+
170
173
define <4 x float > @ins2_ins2_fsub (float %x , float %y ) {
171
174
; CHECK-LABEL: @ins2_ins2_fsub(
172
175
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2
@@ -184,14 +187,25 @@ define <4 x float> @ins2_ins2_fsub(float %x, float %y) {
184
187
ret <4 x float > %r
185
188
}
186
189
190
+ ; It may be worth scalarizing an expensive binop even if both inserts have extra uses.
191
+
187
192
define <4 x float > @ins3_ins3_fdiv (float %x , float %y ) {
188
- ; CHECK-LABEL: @ins3_ins3_fdiv(
189
- ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 3
190
- ; CHECK-NEXT: call void @usef(<4 x float> [[I0]])
191
- ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 3
192
- ; CHECK-NEXT: call void @usef(<4 x float> [[I1]])
193
- ; CHECK-NEXT: [[R:%.*]] = fdiv <4 x float> [[I0]], [[I1]]
194
- ; CHECK-NEXT: ret <4 x float> [[R]]
193
+ ; SSE-LABEL: @ins3_ins3_fdiv(
194
+ ; SSE-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 3
195
+ ; SSE-NEXT: call void @usef(<4 x float> [[I0]])
196
+ ; SSE-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 3
197
+ ; SSE-NEXT: call void @usef(<4 x float> [[I1]])
198
+ ; SSE-NEXT: [[R_SCALAR:%.*]] = fdiv float [[X]], [[Y]]
199
+ ; SSE-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[R_SCALAR]], i64 3
200
+ ; SSE-NEXT: ret <4 x float> [[R]]
201
+ ;
202
+ ; AVX-LABEL: @ins3_ins3_fdiv(
203
+ ; AVX-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 3
204
+ ; AVX-NEXT: call void @usef(<4 x float> [[I0]])
205
+ ; AVX-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 3
206
+ ; AVX-NEXT: call void @usef(<4 x float> [[I1]])
207
+ ; AVX-NEXT: [[R:%.*]] = fdiv <4 x float> [[I0]], [[I1]]
208
+ ; AVX-NEXT: ret <4 x float> [[R]]
195
209
;
196
210
%i0 = insertelement <4 x float > undef , float %x , i32 3
197
211
call void @usef (<4 x float > %i0 )
0 commit comments