@@ -114,26 +114,12 @@ define void @store_half(half* %fptr, half %v) {
114
114
}
115
115
116
116
define void @store_float (float * %fptr , float %v ) {
117
- ; X86-SSE-LABEL: store_float:
118
- ; X86-SSE: # %bb.0:
119
- ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
120
- ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
121
- ; X86-SSE-NEXT: movss %xmm0, (%eax)
122
- ; X86-SSE-NEXT: retl
123
- ;
124
- ; X86-AVX-LABEL: store_float:
125
- ; X86-AVX: # %bb.0:
126
- ; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
127
- ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
128
- ; X86-AVX-NEXT: vmovss %xmm0, (%eax)
129
- ; X86-AVX-NEXT: retl
130
- ;
131
- ; X86-NOSSE-LABEL: store_float:
132
- ; X86-NOSSE: # %bb.0:
133
- ; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
134
- ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
135
- ; X86-NOSSE-NEXT: fstps (%eax)
136
- ; X86-NOSSE-NEXT: retl
117
+ ; X86-LABEL: store_float:
118
+ ; X86: # %bb.0:
119
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
120
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
121
+ ; X86-NEXT: movl %ecx, (%eax)
122
+ ; X86-NEXT: retl
137
123
;
138
124
; X64-SSE-LABEL: store_float:
139
125
; X64-SSE: # %bb.0:
@@ -176,16 +162,16 @@ define void @store_double(double* %fptr, double %v) {
176
162
;
177
163
; X86-SSE2-LABEL: store_double:
178
164
; X86-SSE2: # %bb.0:
179
- ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
180
165
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
181
- ; X86-SSE2-NEXT: movsd %xmm0, (%eax)
166
+ ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
167
+ ; X86-SSE2-NEXT: movlps %xmm0, (%eax)
182
168
; X86-SSE2-NEXT: retl
183
169
;
184
170
; X86-AVX-LABEL: store_double:
185
171
; X86-AVX: # %bb.0:
186
- ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
187
172
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
188
- ; X86-AVX-NEXT: vmovsd %xmm0, (%eax)
173
+ ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
174
+ ; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
189
175
; X86-AVX-NEXT: retl
190
176
;
191
177
; X86-NOSSE-LABEL: store_double:
@@ -290,12 +276,26 @@ define void @store_fp128(fp128* %fptr, fp128 %v) {
290
276
;
291
277
; X64-SSE-LABEL: store_fp128:
292
278
; X64-SSE: # %bb.0:
293
- ; X64-SSE-NEXT: movaps %xmm0, (%rdi)
279
+ ; X64-SSE-NEXT: subq $24, %rsp
280
+ ; X64-SSE-NEXT: .cfi_def_cfa_offset 32
281
+ ; X64-SSE-NEXT: movaps %xmm0, (%rsp)
282
+ ; X64-SSE-NEXT: movq (%rsp), %rsi
283
+ ; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx
284
+ ; X64-SSE-NEXT: callq __sync_lock_test_and_set_16
285
+ ; X64-SSE-NEXT: addq $24, %rsp
286
+ ; X64-SSE-NEXT: .cfi_def_cfa_offset 8
294
287
; X64-SSE-NEXT: retq
295
288
;
296
289
; X64-AVX-LABEL: store_fp128:
297
290
; X64-AVX: # %bb.0:
298
- ; X64-AVX-NEXT: vmovaps %xmm0, (%rdi)
291
+ ; X64-AVX-NEXT: subq $24, %rsp
292
+ ; X64-AVX-NEXT: .cfi_def_cfa_offset 32
293
+ ; X64-AVX-NEXT: vmovaps %xmm0, (%rsp)
294
+ ; X64-AVX-NEXT: movq (%rsp), %rsi
295
+ ; X64-AVX-NEXT: movq {{[0-9]+}}(%rsp), %rdx
296
+ ; X64-AVX-NEXT: callq __sync_lock_test_and_set_16
297
+ ; X64-AVX-NEXT: addq $24, %rsp
298
+ ; X64-AVX-NEXT: .cfi_def_cfa_offset 8
299
299
; X64-AVX-NEXT: retq
300
300
store atomic fp128 %v , fp128 * %fptr unordered , align 16
301
301
ret void
@@ -383,11 +383,53 @@ define half @load_half(half* %fptr) {
383
383
}
384
384
385
385
define float @load_float (float * %fptr ) {
386
- ; X86-LABEL: load_float:
387
- ; X86: # %bb.0:
388
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
389
- ; X86-NEXT: flds (%eax)
390
- ; X86-NEXT: retl
386
+ ; X86-SSE1-LABEL: load_float:
387
+ ; X86-SSE1: # %bb.0:
388
+ ; X86-SSE1-NEXT: pushl %eax
389
+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
390
+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
391
+ ; X86-SSE1-NEXT: movl (%eax), %eax
392
+ ; X86-SSE1-NEXT: movl %eax, (%esp)
393
+ ; X86-SSE1-NEXT: flds (%esp)
394
+ ; X86-SSE1-NEXT: popl %eax
395
+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
396
+ ; X86-SSE1-NEXT: retl
397
+ ;
398
+ ; X86-SSE2-LABEL: load_float:
399
+ ; X86-SSE2: # %bb.0:
400
+ ; X86-SSE2-NEXT: pushl %eax
401
+ ; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
402
+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
403
+ ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
404
+ ; X86-SSE2-NEXT: movss %xmm0, (%esp)
405
+ ; X86-SSE2-NEXT: flds (%esp)
406
+ ; X86-SSE2-NEXT: popl %eax
407
+ ; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
408
+ ; X86-SSE2-NEXT: retl
409
+ ;
410
+ ; X86-AVX-LABEL: load_float:
411
+ ; X86-AVX: # %bb.0:
412
+ ; X86-AVX-NEXT: pushl %eax
413
+ ; X86-AVX-NEXT: .cfi_def_cfa_offset 8
414
+ ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
415
+ ; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
416
+ ; X86-AVX-NEXT: vmovss %xmm0, (%esp)
417
+ ; X86-AVX-NEXT: flds (%esp)
418
+ ; X86-AVX-NEXT: popl %eax
419
+ ; X86-AVX-NEXT: .cfi_def_cfa_offset 4
420
+ ; X86-AVX-NEXT: retl
421
+ ;
422
+ ; X86-NOSSE-LABEL: load_float:
423
+ ; X86-NOSSE: # %bb.0:
424
+ ; X86-NOSSE-NEXT: pushl %eax
425
+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
426
+ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
427
+ ; X86-NOSSE-NEXT: movl (%eax), %eax
428
+ ; X86-NOSSE-NEXT: movl %eax, (%esp)
429
+ ; X86-NOSSE-NEXT: flds (%esp)
430
+ ; X86-NOSSE-NEXT: popl %eax
431
+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
432
+ ; X86-NOSSE-NEXT: retl
391
433
;
392
434
; X64-SSE-LABEL: load_float:
393
435
; X64-SSE: # %bb.0:
@@ -403,11 +445,61 @@ define float @load_float(float* %fptr) {
403
445
}
404
446
405
447
define double @load_double (double * %fptr ) {
406
- ; X86-LABEL: load_double:
407
- ; X86: # %bb.0:
408
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
409
- ; X86-NEXT: fldl (%eax)
410
- ; X86-NEXT: retl
448
+ ; X86-SSE1-LABEL: load_double:
449
+ ; X86-SSE1: # %bb.0:
450
+ ; X86-SSE1-NEXT: subl $20, %esp
451
+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 24
452
+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
453
+ ; X86-SSE1-NEXT: fildll (%eax)
454
+ ; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
455
+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
456
+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
457
+ ; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
458
+ ; X86-SSE1-NEXT: movl %eax, (%esp)
459
+ ; X86-SSE1-NEXT: fldl (%esp)
460
+ ; X86-SSE1-NEXT: addl $20, %esp
461
+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
462
+ ; X86-SSE1-NEXT: retl
463
+ ;
464
+ ; X86-SSE2-LABEL: load_double:
465
+ ; X86-SSE2: # %bb.0:
466
+ ; X86-SSE2-NEXT: subl $12, %esp
467
+ ; X86-SSE2-NEXT: .cfi_def_cfa_offset 16
468
+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
469
+ ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
470
+ ; X86-SSE2-NEXT: movlps %xmm0, (%esp)
471
+ ; X86-SSE2-NEXT: fldl (%esp)
472
+ ; X86-SSE2-NEXT: addl $12, %esp
473
+ ; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
474
+ ; X86-SSE2-NEXT: retl
475
+ ;
476
+ ; X86-AVX-LABEL: load_double:
477
+ ; X86-AVX: # %bb.0:
478
+ ; X86-AVX-NEXT: subl $12, %esp
479
+ ; X86-AVX-NEXT: .cfi_def_cfa_offset 16
480
+ ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
481
+ ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
482
+ ; X86-AVX-NEXT: vmovlps %xmm0, (%esp)
483
+ ; X86-AVX-NEXT: fldl (%esp)
484
+ ; X86-AVX-NEXT: addl $12, %esp
485
+ ; X86-AVX-NEXT: .cfi_def_cfa_offset 4
486
+ ; X86-AVX-NEXT: retl
487
+ ;
488
+ ; X86-NOSSE-LABEL: load_double:
489
+ ; X86-NOSSE: # %bb.0:
490
+ ; X86-NOSSE-NEXT: subl $20, %esp
491
+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24
492
+ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
493
+ ; X86-NOSSE-NEXT: fildll (%eax)
494
+ ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
495
+ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
496
+ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
497
+ ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
498
+ ; X86-NOSSE-NEXT: movl %eax, (%esp)
499
+ ; X86-NOSSE-NEXT: fldl (%esp)
500
+ ; X86-NOSSE-NEXT: addl $20, %esp
501
+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
502
+ ; X86-NOSSE-NEXT: retl
411
503
;
412
504
; X64-SSE-LABEL: load_double:
413
505
; X64-SSE: # %bb.0:
@@ -465,10 +557,10 @@ define fp128 @load_fp128(fp128* %fptr) {
465
557
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
466
558
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
467
559
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
468
- ; X86-SSE-NEXT: movl %edi, 12(%esi)
469
- ; X86-SSE-NEXT: movl %edx, 8(%esi)
470
- ; X86-SSE-NEXT: movl %ecx, 4(%esi)
560
+ ; X86-SSE-NEXT: movl %edi, 8(%esi)
561
+ ; X86-SSE-NEXT: movl %edx, 12(%esi)
471
562
; X86-SSE-NEXT: movl %eax, (%esi)
563
+ ; X86-SSE-NEXT: movl %ecx, 4(%esi)
472
564
; X86-SSE-NEXT: movl %esi, %eax
473
565
; X86-SSE-NEXT: addl $20, %esp
474
566
; X86-SSE-NEXT: .cfi_def_cfa_offset 12
@@ -546,10 +638,10 @@ define fp128 @load_fp128(fp128* %fptr) {
546
638
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
547
639
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
548
640
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
549
- ; X86-NOSSE-NEXT: movl %edi, 12(%esi)
550
- ; X86-NOSSE-NEXT: movl %edx, 8(%esi)
551
- ; X86-NOSSE-NEXT: movl %ecx, 4(%esi)
641
+ ; X86-NOSSE-NEXT: movl %edi, 8(%esi)
642
+ ; X86-NOSSE-NEXT: movl %edx, 12(%esi)
552
643
; X86-NOSSE-NEXT: movl %eax, (%esi)
644
+ ; X86-NOSSE-NEXT: movl %ecx, 4(%esi)
553
645
; X86-NOSSE-NEXT: movl %esi, %eax
554
646
; X86-NOSSE-NEXT: addl $20, %esp
555
647
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
@@ -561,12 +653,34 @@ define fp128 @load_fp128(fp128* %fptr) {
561
653
;
562
654
; X64-SSE-LABEL: load_fp128:
563
655
; X64-SSE: # %bb.0:
564
- ; X64-SSE-NEXT: movaps (%rdi), %xmm0
656
+ ; X64-SSE-NEXT: subq $24, %rsp
657
+ ; X64-SSE-NEXT: .cfi_def_cfa_offset 32
658
+ ; X64-SSE-NEXT: xorl %esi, %esi
659
+ ; X64-SSE-NEXT: xorl %edx, %edx
660
+ ; X64-SSE-NEXT: xorl %ecx, %ecx
661
+ ; X64-SSE-NEXT: xorl %r8d, %r8d
662
+ ; X64-SSE-NEXT: callq __sync_val_compare_and_swap_16
663
+ ; X64-SSE-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
664
+ ; X64-SSE-NEXT: movq %rax, (%rsp)
665
+ ; X64-SSE-NEXT: movaps (%rsp), %xmm0
666
+ ; X64-SSE-NEXT: addq $24, %rsp
667
+ ; X64-SSE-NEXT: .cfi_def_cfa_offset 8
565
668
; X64-SSE-NEXT: retq
566
669
;
567
670
; X64-AVX-LABEL: load_fp128:
568
671
; X64-AVX: # %bb.0:
569
- ; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
672
+ ; X64-AVX-NEXT: subq $24, %rsp
673
+ ; X64-AVX-NEXT: .cfi_def_cfa_offset 32
674
+ ; X64-AVX-NEXT: xorl %esi, %esi
675
+ ; X64-AVX-NEXT: xorl %edx, %edx
676
+ ; X64-AVX-NEXT: xorl %ecx, %ecx
677
+ ; X64-AVX-NEXT: xorl %r8d, %r8d
678
+ ; X64-AVX-NEXT: callq __sync_val_compare_and_swap_16
679
+ ; X64-AVX-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
680
+ ; X64-AVX-NEXT: movq %rax, (%rsp)
681
+ ; X64-AVX-NEXT: vmovaps (%rsp), %xmm0
682
+ ; X64-AVX-NEXT: addq $24, %rsp
683
+ ; X64-AVX-NEXT: .cfi_def_cfa_offset 8
570
684
; X64-AVX-NEXT: retq
571
685
%v = load atomic fp128 , fp128 * %fptr unordered , align 16
572
686
ret fp128 %v
0 commit comments