@@ -130,14 +130,10 @@ define void @frem_v16f32(<16 x float> %a0, <16 x float> %a1, ptr%p3) nounwind {
130
130
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
131
131
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
132
132
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
133
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
134
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
135
- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
136
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
137
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
133
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
134
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
135
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
138
136
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
139
- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
140
- ; CHECK-NEXT: vzeroupper
141
137
; CHECK-NEXT: callq fmodf@PLT
142
138
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
143
139
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -195,14 +191,10 @@ define void @frem_v16f32(<16 x float> %a0, <16 x float> %a1, ptr%p3) nounwind {
195
191
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
196
192
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
197
193
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
198
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
199
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
200
- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
201
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
202
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
194
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
195
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
196
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
203
197
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
204
- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
205
- ; CHECK-NEXT: vzeroupper
206
198
; CHECK-NEXT: callq fmodf@PLT
207
199
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
208
200
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -280,14 +272,10 @@ define void @frem_v8f32(<8 x float> %a0, <8 x float> %a1, ptr%p3) nounwind {
280
272
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
281
273
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
282
274
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
283
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
284
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
285
- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
286
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
287
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
275
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
276
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
277
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
288
278
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
289
- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
290
- ; CHECK-NEXT: vzeroupper
291
279
; CHECK-NEXT: callq fmodf@PLT
292
280
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
293
281
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -389,14 +377,10 @@ define void @frem_v8f64(<8 x double> %a0, <8 x double> %a1, ptr%p3) nounwind {
389
377
; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
390
378
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
391
379
; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
392
- ; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
393
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
394
- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
395
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
396
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
380
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
381
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
382
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
397
383
; CHECK-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill
398
- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
399
- ; CHECK-NEXT: vzeroupper
400
384
; CHECK-NEXT: callq fmod@PLT
401
385
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
402
386
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -422,14 +406,10 @@ define void @frem_v8f64(<8 x double> %a0, <8 x double> %a1, ptr%p3) nounwind {
422
406
; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
423
407
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
424
408
; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
425
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
426
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
427
- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
428
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
429
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
409
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
410
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
411
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
430
412
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
431
- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
432
- ; CHECK-NEXT: vzeroupper
433
413
; CHECK-NEXT: callq fmod@PLT
434
414
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
435
415
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -475,14 +455,10 @@ define void @frem_v4f64(<4 x double> %a0, <4 x double> %a1, ptr%p3) nounwind {
475
455
; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
476
456
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
477
457
; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
478
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
479
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
480
- ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
481
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
482
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
458
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
459
+ ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
460
+ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 32-byte Reload
483
461
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
484
- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
485
- ; CHECK-NEXT: vzeroupper
486
462
; CHECK-NEXT: callq fmod@PLT
487
463
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
488
464
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -545,11 +521,9 @@ define void @frem_v32f16(<32 x half> %a0, <32 x half> %a1, ptr%p3) nounwind {
545
521
; CHECK-NEXT: vzeroupper
546
522
; CHECK-NEXT: callq __extendhfsf2@PLT
547
523
; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill
548
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
549
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
524
+ ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
550
525
; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
551
526
; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
552
- ; CHECK-NEXT: vzeroupper
553
527
; CHECK-NEXT: callq __extendhfsf2@PLT
554
528
; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload
555
529
; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
@@ -773,18 +747,15 @@ define void @frem_v32f16(<32 x half> %a0, <32 x half> %a1, ptr%p3) nounwind {
773
747
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
774
748
; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
775
749
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
776
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
777
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
750
+ ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
778
751
; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
779
752
; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
780
753
; CHECK-NEXT: vzeroupper
781
754
; CHECK-NEXT: callq __extendhfsf2@PLT
782
755
; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
783
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
784
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
756
+ ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
785
757
; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
786
758
; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
787
- ; CHECK-NEXT: vzeroupper
788
759
; CHECK-NEXT: callq __extendhfsf2@PLT
789
760
; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
790
761
; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
@@ -1033,11 +1004,9 @@ define void @frem_v16f16(<16 x half> %a0, <16 x half> %a1, ptr%p3) nounwind {
1033
1004
; CHECK-NEXT: vzeroupper
1034
1005
; CHECK-NEXT: callq __extendhfsf2@PLT
1035
1006
; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1036
- ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1037
- ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1007
+ ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 32-byte Reload
1038
1008
; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1039
1009
; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1040
- ; CHECK-NEXT: vzeroupper
1041
1010
; CHECK-NEXT: callq __extendhfsf2@PLT
1042
1011
; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1043
1012
; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
0 commit comments