Skip to content

Commit 027aa27

Browse files
committed
[X86/Atomics] (Semantically) revert G246098, switch back to the old atomic example
When writing an email for a follow up proposal, I realized one of the diffs in the committed change was incorrect. Digging into it revealed that the fix is complicated enough to require some thought, so reverting in the meantime. The problem is visible in this diff (from the revert): ; X64-SSE-LABEL: store_fp128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps %xmm0, (%rdi) +; X64-SSE-NEXT: subq $24, %rsp +; X64-SSE-NEXT: .cfi_def_cfa_offset 32 +; X64-SSE-NEXT: movaps %xmm0, (%rsp) +; X64-SSE-NEXT: movq (%rsp), %rsi +; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-SSE-NEXT: callq __sync_lock_test_and_set_16 +; X64-SSE-NEXT: addq $24, %rsp +; X64-SSE-NEXT: .cfi_def_cfa_offset 8 ; X64-SSE-NEXT: retq store atomic fp128 %v, fp128* %fptr unordered, align 16 ret void The problem here is three fold: 1) x86-64 doesn't guarantee atomicity of anything larger than 8 bytes. Some platforms observably break this guarantee, others don't, but the codegen isn't considering this, so it's wrong on at least some platforms. 2) When I started to track down the problem, I discovered that DAGCombiner had stripped the atomicity off the store entirely. This comes down to idiomatic usage of DAG.getStore passing all MMO components separately as opposed to just passing the MMO. 3) On x86 (not -64), there are cases where 8 byte atomiciy is supported, but only for floating point operations. This would seem to imply that operation typing matters for correctness, and DAGCombine happily folds away bitcasts. I'm not 100% sure there's a problem here, but I'm not entirely sure there isn't either. I plan on returning to each issue in turn; sorry for the churn here.
1 parent 0a220de commit 027aa27

File tree

4 files changed

+168
-47
lines changed

4 files changed

+168
-47
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ static cl::opt<bool> MulConstantOptimization(
8787
cl::Hidden);
8888

8989
static cl::opt<bool> ExperimentalUnorderedISEL(
90-
"x86-experimental-unordered-atomic-isel", cl::init(true),
90+
"x86-experimental-unordered-atomic-isel", cl::init(false),
9191
cl::desc("Use LoadSDNode and StoreSDNode instead of "
9292
"AtomicSDNode for unordered atomic loads and "
9393
"stores respectively."),

llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,14 @@ define void @store_fp128(fp128* %fptr, fp128 %v) {
2121
;
2222
; X64-SSE-LABEL: store_fp128:
2323
; X64-SSE: # %bb.0:
24-
; X64-SSE-NEXT: movaps %xmm0, (%rdi)
24+
; X64-SSE-NEXT: subq $24, %rsp
25+
; X64-SSE-NEXT: .cfi_def_cfa_offset 32
26+
; X64-SSE-NEXT: movaps %xmm0, (%rsp)
27+
; X64-SSE-NEXT: movq (%rsp), %rsi
28+
; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx
29+
; X64-SSE-NEXT: callq __sync_lock_test_and_set_16
30+
; X64-SSE-NEXT: addq $24, %rsp
31+
; X64-SSE-NEXT: .cfi_def_cfa_offset 8
2532
; X64-SSE-NEXT: retq
2633
store atomic fp128 %v, fp128* %fptr unordered, align 16
2734
ret void

llvm/test/CodeGen/X86/atomic-non-integer.ll

Lines changed: 158 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -114,26 +114,12 @@ define void @store_half(half* %fptr, half %v) {
114114
}
115115

116116
define void @store_float(float* %fptr, float %v) {
117-
; X86-SSE-LABEL: store_float:
118-
; X86-SSE: # %bb.0:
119-
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
120-
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
121-
; X86-SSE-NEXT: movss %xmm0, (%eax)
122-
; X86-SSE-NEXT: retl
123-
;
124-
; X86-AVX-LABEL: store_float:
125-
; X86-AVX: # %bb.0:
126-
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
127-
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
128-
; X86-AVX-NEXT: vmovss %xmm0, (%eax)
129-
; X86-AVX-NEXT: retl
130-
;
131-
; X86-NOSSE-LABEL: store_float:
132-
; X86-NOSSE: # %bb.0:
133-
; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
134-
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
135-
; X86-NOSSE-NEXT: fstps (%eax)
136-
; X86-NOSSE-NEXT: retl
117+
; X86-LABEL: store_float:
118+
; X86: # %bb.0:
119+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
120+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
121+
; X86-NEXT: movl %ecx, (%eax)
122+
; X86-NEXT: retl
137123
;
138124
; X64-SSE-LABEL: store_float:
139125
; X64-SSE: # %bb.0:
@@ -176,16 +162,16 @@ define void @store_double(double* %fptr, double %v) {
176162
;
177163
; X86-SSE2-LABEL: store_double:
178164
; X86-SSE2: # %bb.0:
179-
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
180165
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
181-
; X86-SSE2-NEXT: movsd %xmm0, (%eax)
166+
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
167+
; X86-SSE2-NEXT: movlps %xmm0, (%eax)
182168
; X86-SSE2-NEXT: retl
183169
;
184170
; X86-AVX-LABEL: store_double:
185171
; X86-AVX: # %bb.0:
186-
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
187172
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
188-
; X86-AVX-NEXT: vmovsd %xmm0, (%eax)
173+
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
174+
; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
189175
; X86-AVX-NEXT: retl
190176
;
191177
; X86-NOSSE-LABEL: store_double:
@@ -290,12 +276,26 @@ define void @store_fp128(fp128* %fptr, fp128 %v) {
290276
;
291277
; X64-SSE-LABEL: store_fp128:
292278
; X64-SSE: # %bb.0:
293-
; X64-SSE-NEXT: movaps %xmm0, (%rdi)
279+
; X64-SSE-NEXT: subq $24, %rsp
280+
; X64-SSE-NEXT: .cfi_def_cfa_offset 32
281+
; X64-SSE-NEXT: movaps %xmm0, (%rsp)
282+
; X64-SSE-NEXT: movq (%rsp), %rsi
283+
; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx
284+
; X64-SSE-NEXT: callq __sync_lock_test_and_set_16
285+
; X64-SSE-NEXT: addq $24, %rsp
286+
; X64-SSE-NEXT: .cfi_def_cfa_offset 8
294287
; X64-SSE-NEXT: retq
295288
;
296289
; X64-AVX-LABEL: store_fp128:
297290
; X64-AVX: # %bb.0:
298-
; X64-AVX-NEXT: vmovaps %xmm0, (%rdi)
291+
; X64-AVX-NEXT: subq $24, %rsp
292+
; X64-AVX-NEXT: .cfi_def_cfa_offset 32
293+
; X64-AVX-NEXT: vmovaps %xmm0, (%rsp)
294+
; X64-AVX-NEXT: movq (%rsp), %rsi
295+
; X64-AVX-NEXT: movq {{[0-9]+}}(%rsp), %rdx
296+
; X64-AVX-NEXT: callq __sync_lock_test_and_set_16
297+
; X64-AVX-NEXT: addq $24, %rsp
298+
; X64-AVX-NEXT: .cfi_def_cfa_offset 8
299299
; X64-AVX-NEXT: retq
300300
store atomic fp128 %v, fp128* %fptr unordered, align 16
301301
ret void
@@ -383,11 +383,53 @@ define half @load_half(half* %fptr) {
383383
}
384384

385385
define float @load_float(float* %fptr) {
386-
; X86-LABEL: load_float:
387-
; X86: # %bb.0:
388-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
389-
; X86-NEXT: flds (%eax)
390-
; X86-NEXT: retl
386+
; X86-SSE1-LABEL: load_float:
387+
; X86-SSE1: # %bb.0:
388+
; X86-SSE1-NEXT: pushl %eax
389+
; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
390+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
391+
; X86-SSE1-NEXT: movl (%eax), %eax
392+
; X86-SSE1-NEXT: movl %eax, (%esp)
393+
; X86-SSE1-NEXT: flds (%esp)
394+
; X86-SSE1-NEXT: popl %eax
395+
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
396+
; X86-SSE1-NEXT: retl
397+
;
398+
; X86-SSE2-LABEL: load_float:
399+
; X86-SSE2: # %bb.0:
400+
; X86-SSE2-NEXT: pushl %eax
401+
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
402+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
403+
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
404+
; X86-SSE2-NEXT: movss %xmm0, (%esp)
405+
; X86-SSE2-NEXT: flds (%esp)
406+
; X86-SSE2-NEXT: popl %eax
407+
; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
408+
; X86-SSE2-NEXT: retl
409+
;
410+
; X86-AVX-LABEL: load_float:
411+
; X86-AVX: # %bb.0:
412+
; X86-AVX-NEXT: pushl %eax
413+
; X86-AVX-NEXT: .cfi_def_cfa_offset 8
414+
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
415+
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
416+
; X86-AVX-NEXT: vmovss %xmm0, (%esp)
417+
; X86-AVX-NEXT: flds (%esp)
418+
; X86-AVX-NEXT: popl %eax
419+
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
420+
; X86-AVX-NEXT: retl
421+
;
422+
; X86-NOSSE-LABEL: load_float:
423+
; X86-NOSSE: # %bb.0:
424+
; X86-NOSSE-NEXT: pushl %eax
425+
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
426+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
427+
; X86-NOSSE-NEXT: movl (%eax), %eax
428+
; X86-NOSSE-NEXT: movl %eax, (%esp)
429+
; X86-NOSSE-NEXT: flds (%esp)
430+
; X86-NOSSE-NEXT: popl %eax
431+
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
432+
; X86-NOSSE-NEXT: retl
391433
;
392434
; X64-SSE-LABEL: load_float:
393435
; X64-SSE: # %bb.0:
@@ -403,11 +445,61 @@ define float @load_float(float* %fptr) {
403445
}
404446

405447
define double @load_double(double* %fptr) {
406-
; X86-LABEL: load_double:
407-
; X86: # %bb.0:
408-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
409-
; X86-NEXT: fldl (%eax)
410-
; X86-NEXT: retl
448+
; X86-SSE1-LABEL: load_double:
449+
; X86-SSE1: # %bb.0:
450+
; X86-SSE1-NEXT: subl $20, %esp
451+
; X86-SSE1-NEXT: .cfi_def_cfa_offset 24
452+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
453+
; X86-SSE1-NEXT: fildll (%eax)
454+
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
455+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
456+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
457+
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
458+
; X86-SSE1-NEXT: movl %eax, (%esp)
459+
; X86-SSE1-NEXT: fldl (%esp)
460+
; X86-SSE1-NEXT: addl $20, %esp
461+
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
462+
; X86-SSE1-NEXT: retl
463+
;
464+
; X86-SSE2-LABEL: load_double:
465+
; X86-SSE2: # %bb.0:
466+
; X86-SSE2-NEXT: subl $12, %esp
467+
; X86-SSE2-NEXT: .cfi_def_cfa_offset 16
468+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
469+
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
470+
; X86-SSE2-NEXT: movlps %xmm0, (%esp)
471+
; X86-SSE2-NEXT: fldl (%esp)
472+
; X86-SSE2-NEXT: addl $12, %esp
473+
; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
474+
; X86-SSE2-NEXT: retl
475+
;
476+
; X86-AVX-LABEL: load_double:
477+
; X86-AVX: # %bb.0:
478+
; X86-AVX-NEXT: subl $12, %esp
479+
; X86-AVX-NEXT: .cfi_def_cfa_offset 16
480+
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
481+
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
482+
; X86-AVX-NEXT: vmovlps %xmm0, (%esp)
483+
; X86-AVX-NEXT: fldl (%esp)
484+
; X86-AVX-NEXT: addl $12, %esp
485+
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
486+
; X86-AVX-NEXT: retl
487+
;
488+
; X86-NOSSE-LABEL: load_double:
489+
; X86-NOSSE: # %bb.0:
490+
; X86-NOSSE-NEXT: subl $20, %esp
491+
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24
492+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
493+
; X86-NOSSE-NEXT: fildll (%eax)
494+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
495+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
496+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
497+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
498+
; X86-NOSSE-NEXT: movl %eax, (%esp)
499+
; X86-NOSSE-NEXT: fldl (%esp)
500+
; X86-NOSSE-NEXT: addl $20, %esp
501+
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
502+
; X86-NOSSE-NEXT: retl
411503
;
412504
; X64-SSE-LABEL: load_double:
413505
; X64-SSE: # %bb.0:
@@ -465,10 +557,10 @@ define fp128 @load_fp128(fp128* %fptr) {
465557
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
466558
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
467559
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
468-
; X86-SSE-NEXT: movl %edi, 12(%esi)
469-
; X86-SSE-NEXT: movl %edx, 8(%esi)
470-
; X86-SSE-NEXT: movl %ecx, 4(%esi)
560+
; X86-SSE-NEXT: movl %edi, 8(%esi)
561+
; X86-SSE-NEXT: movl %edx, 12(%esi)
471562
; X86-SSE-NEXT: movl %eax, (%esi)
563+
; X86-SSE-NEXT: movl %ecx, 4(%esi)
472564
; X86-SSE-NEXT: movl %esi, %eax
473565
; X86-SSE-NEXT: addl $20, %esp
474566
; X86-SSE-NEXT: .cfi_def_cfa_offset 12
@@ -546,10 +638,10 @@ define fp128 @load_fp128(fp128* %fptr) {
546638
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
547639
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
548640
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
549-
; X86-NOSSE-NEXT: movl %edi, 12(%esi)
550-
; X86-NOSSE-NEXT: movl %edx, 8(%esi)
551-
; X86-NOSSE-NEXT: movl %ecx, 4(%esi)
641+
; X86-NOSSE-NEXT: movl %edi, 8(%esi)
642+
; X86-NOSSE-NEXT: movl %edx, 12(%esi)
552643
; X86-NOSSE-NEXT: movl %eax, (%esi)
644+
; X86-NOSSE-NEXT: movl %ecx, 4(%esi)
553645
; X86-NOSSE-NEXT: movl %esi, %eax
554646
; X86-NOSSE-NEXT: addl $20, %esp
555647
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
@@ -561,12 +653,34 @@ define fp128 @load_fp128(fp128* %fptr) {
561653
;
562654
; X64-SSE-LABEL: load_fp128:
563655
; X64-SSE: # %bb.0:
564-
; X64-SSE-NEXT: movaps (%rdi), %xmm0
656+
; X64-SSE-NEXT: subq $24, %rsp
657+
; X64-SSE-NEXT: .cfi_def_cfa_offset 32
658+
; X64-SSE-NEXT: xorl %esi, %esi
659+
; X64-SSE-NEXT: xorl %edx, %edx
660+
; X64-SSE-NEXT: xorl %ecx, %ecx
661+
; X64-SSE-NEXT: xorl %r8d, %r8d
662+
; X64-SSE-NEXT: callq __sync_val_compare_and_swap_16
663+
; X64-SSE-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
664+
; X64-SSE-NEXT: movq %rax, (%rsp)
665+
; X64-SSE-NEXT: movaps (%rsp), %xmm0
666+
; X64-SSE-NEXT: addq $24, %rsp
667+
; X64-SSE-NEXT: .cfi_def_cfa_offset 8
565668
; X64-SSE-NEXT: retq
566669
;
567670
; X64-AVX-LABEL: load_fp128:
568671
; X64-AVX: # %bb.0:
569-
; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
672+
; X64-AVX-NEXT: subq $24, %rsp
673+
; X64-AVX-NEXT: .cfi_def_cfa_offset 32
674+
; X64-AVX-NEXT: xorl %esi, %esi
675+
; X64-AVX-NEXT: xorl %edx, %edx
676+
; X64-AVX-NEXT: xorl %ecx, %ecx
677+
; X64-AVX-NEXT: xorl %r8d, %r8d
678+
; X64-AVX-NEXT: callq __sync_val_compare_and_swap_16
679+
; X64-AVX-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
680+
; X64-AVX-NEXT: movq %rax, (%rsp)
681+
; X64-AVX-NEXT: vmovaps (%rsp), %xmm0
682+
; X64-AVX-NEXT: addq $24, %rsp
683+
; X64-AVX-NEXT: .cfi_def_cfa_offset 8
570684
; X64-AVX-NEXT: retq
571685
%v = load atomic fp128, fp128* %fptr unordered, align 16
572686
ret fp128 %v

llvm/test/CodeGen/X86/combineIncDecVector-crash.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ define void @TestvMeth(i32 %0, i64 %1) gc "statepoint-example" !prof !1 {
1919
; CHECK-NEXT: callq newarray
2020
; CHECK-NEXT: .Ltmp0:
2121
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
22+
; CHECK-NEXT: addss (%rax), %xmm0
2223
; CHECK-NEXT: movdqu (%rax), %xmm1
2324
; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
2425
; CHECK-NEXT: psubd %xmm2, %xmm1
2526
; CHECK-NEXT: movdqu %xmm1, (%rax)
26-
; CHECK-NEXT: addss {{.*}}(%rip), %xmm0
2727
; CHECK-NEXT: movss %xmm0, (%rax)
2828
bci_0:
2929
%token418 = call token (i64, i32, i8 * (i64, i32, i32, i32)*, i32,

0 commit comments

Comments
 (0)