@@ -206,6 +206,147 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
206
206
}
207
207
208
208
; store(fneg(load())) - convert scalar to integer
209
+ define void @fneg_int_rmw_half (ptr %ptr ) nounwind {
210
+ ; X86-SSE1-LABEL: fneg_int_rmw_half:
211
+ ; X86-SSE1: # %bb.0:
212
+ ; X86-SSE1-NEXT: pushl %esi
213
+ ; X86-SSE1-NEXT: subl $8, %esp
214
+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
215
+ ; X86-SSE1-NEXT: movzwl (%esi), %eax
216
+ ; X86-SSE1-NEXT: movl %eax, (%esp)
217
+ ; X86-SSE1-NEXT: calll __gnu_h2f_ieee
218
+ ; X86-SSE1-NEXT: fstps {{[0-9]+}}(%esp)
219
+ ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220
+ ; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
221
+ ; X86-SSE1-NEXT: movss %xmm0, (%esp)
222
+ ; X86-SSE1-NEXT: calll __gnu_f2h_ieee
223
+ ; X86-SSE1-NEXT: movw %ax, (%esi)
224
+ ; X86-SSE1-NEXT: addl $8, %esp
225
+ ; X86-SSE1-NEXT: popl %esi
226
+ ; X86-SSE1-NEXT: retl
227
+ ;
228
+ ; X86-SSE2-LABEL: fneg_int_rmw_half:
229
+ ; X86-SSE2: # %bb.0:
230
+ ; X86-SSE2-NEXT: pushl %esi
231
+ ; X86-SSE2-NEXT: subl $8, %esp
232
+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
233
+ ; X86-SSE2-NEXT: pinsrw $0, (%esi), %xmm0
234
+ ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
235
+ ; X86-SSE2-NEXT: movw %ax, (%esp)
236
+ ; X86-SSE2-NEXT: calll __extendhfsf2
237
+ ; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp)
238
+ ; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
239
+ ; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
240
+ ; X86-SSE2-NEXT: movd %xmm0, (%esp)
241
+ ; X86-SSE2-NEXT: calll __truncsfhf2
242
+ ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
243
+ ; X86-SSE2-NEXT: movw %ax, (%esi)
244
+ ; X86-SSE2-NEXT: addl $8, %esp
245
+ ; X86-SSE2-NEXT: popl %esi
246
+ ; X86-SSE2-NEXT: retl
247
+ ;
248
+ ; X64-SSE1-LABEL: fneg_int_rmw_half:
249
+ ; X64-SSE1: # %bb.0:
250
+ ; X64-SSE1-NEXT: pushq %rbx
251
+ ; X64-SSE1-NEXT: movq %rdi, %rbx
252
+ ; X64-SSE1-NEXT: movzwl (%rdi), %edi
253
+ ; X64-SSE1-NEXT: callq __gnu_h2f_ieee@PLT
254
+ ; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
255
+ ; X64-SSE1-NEXT: callq __gnu_f2h_ieee@PLT
256
+ ; X64-SSE1-NEXT: movw %ax, (%rbx)
257
+ ; X64-SSE1-NEXT: popq %rbx
258
+ ; X64-SSE1-NEXT: retq
259
+ ;
260
+ ; X64-SSE2-LABEL: fneg_int_rmw_half:
261
+ ; X64-SSE2: # %bb.0:
262
+ ; X64-SSE2-NEXT: pushq %rbx
263
+ ; X64-SSE2-NEXT: movq %rdi, %rbx
264
+ ; X64-SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
265
+ ; X64-SSE2-NEXT: callq __extendhfsf2@PLT
266
+ ; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
267
+ ; X64-SSE2-NEXT: callq __truncsfhf2@PLT
268
+ ; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
269
+ ; X64-SSE2-NEXT: movw %ax, (%rbx)
270
+ ; X64-SSE2-NEXT: popq %rbx
271
+ ; X64-SSE2-NEXT: retq
272
+ %1 = load half , ptr %ptr
273
+ %2 = fneg half %1
274
+ store half %2 , ptr %ptr
275
+ ret void
276
+ }
277
+
278
+ define void @fneg_int_bfloat (ptr %src , ptr %dst ) nounwind {
279
+ ; X86-SSE1-LABEL: fneg_int_bfloat:
280
+ ; X86-SSE1: # %bb.0:
281
+ ; X86-SSE1-NEXT: pushl %esi
282
+ ; X86-SSE1-NEXT: subl $8, %esp
283
+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
284
+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
285
+ ; X86-SSE1-NEXT: movzwl (%eax), %eax
286
+ ; X86-SSE1-NEXT: shll $16, %eax
287
+ ; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
288
+ ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
289
+ ; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
290
+ ; X86-SSE1-NEXT: movss %xmm0, (%esp)
291
+ ; X86-SSE1-NEXT: calll __truncsfbf2
292
+ ; X86-SSE1-NEXT: movw %ax, (%esi)
293
+ ; X86-SSE1-NEXT: addl $8, %esp
294
+ ; X86-SSE1-NEXT: popl %esi
295
+ ; X86-SSE1-NEXT: retl
296
+ ;
297
+ ; X86-SSE2-LABEL: fneg_int_bfloat:
298
+ ; X86-SSE2: # %bb.0:
299
+ ; X86-SSE2-NEXT: pushl %esi
300
+ ; X86-SSE2-NEXT: pushl %eax
301
+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
302
+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
303
+ ; X86-SSE2-NEXT: movzwl (%eax), %eax
304
+ ; X86-SSE2-NEXT: shll $16, %eax
305
+ ; X86-SSE2-NEXT: movd %eax, %xmm0
306
+ ; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
307
+ ; X86-SSE2-NEXT: movd %xmm0, (%esp)
308
+ ; X86-SSE2-NEXT: calll __truncsfbf2
309
+ ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
310
+ ; X86-SSE2-NEXT: movw %ax, (%esi)
311
+ ; X86-SSE2-NEXT: addl $4, %esp
312
+ ; X86-SSE2-NEXT: popl %esi
313
+ ; X86-SSE2-NEXT: retl
314
+ ;
315
+ ; X64-SSE1-LABEL: fneg_int_bfloat:
316
+ ; X64-SSE1: # %bb.0:
317
+ ; X64-SSE1-NEXT: pushq %rbx
318
+ ; X64-SSE1-NEXT: subq $16, %rsp
319
+ ; X64-SSE1-NEXT: movq %rsi, %rbx
320
+ ; X64-SSE1-NEXT: movzwl (%rdi), %eax
321
+ ; X64-SSE1-NEXT: shll $16, %eax
322
+ ; X64-SSE1-NEXT: movl %eax, {{[0-9]+}}(%rsp)
323
+ ; X64-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
324
+ ; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
325
+ ; X64-SSE1-NEXT: callq __truncsfbf2@PLT
326
+ ; X64-SSE1-NEXT: movw %ax, (%rbx)
327
+ ; X64-SSE1-NEXT: addq $16, %rsp
328
+ ; X64-SSE1-NEXT: popq %rbx
329
+ ; X64-SSE1-NEXT: retq
330
+ ;
331
+ ; X64-SSE2-LABEL: fneg_int_bfloat:
332
+ ; X64-SSE2: # %bb.0:
333
+ ; X64-SSE2-NEXT: pushq %rbx
334
+ ; X64-SSE2-NEXT: movq %rsi, %rbx
335
+ ; X64-SSE2-NEXT: movzwl (%rdi), %eax
336
+ ; X64-SSE2-NEXT: shll $16, %eax
337
+ ; X64-SSE2-NEXT: movd %eax, %xmm0
338
+ ; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
339
+ ; X64-SSE2-NEXT: callq __truncsfbf2@PLT
340
+ ; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
341
+ ; X64-SSE2-NEXT: movw %ax, (%rbx)
342
+ ; X64-SSE2-NEXT: popq %rbx
343
+ ; X64-SSE2-NEXT: retq
344
+ %1 = load bfloat, ptr %src
345
+ %2 = fneg bfloat %1
346
+ store bfloat %2 , ptr %dst
347
+ ret void
348
+ }
349
+
209
350
define void @fneg_int_rmw_f32 (ptr %ptr ) {
210
351
; X86-SSE-LABEL: fneg_int_rmw_f32:
211
352
; X86-SSE: # %bb.0:
0 commit comments