Skip to content

Commit e79ef93

Browse files
committed
[X86] Rearrange a few atomics tests. NFC.
1 parent c60ac50 commit e79ef93

File tree

6 files changed

+407
-928
lines changed

6 files changed

+407
-928
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=corei7 -mattr=-cx16 | FileCheck %s
2+
; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck -check-prefix=CHECK %s
3+
4+
;; Verify that 128-bit atomics emit a libcall without cx16
5+
;; available.
6+
;;
7+
;; We test 32-bit mode with -mattr=cx16, because it should have no
8+
;; effect for 32-bit mode.
9+
10+
; CHECK-LABEL: test:
11+
define void @test(ptr %a) nounwind {
12+
entry:
13+
; CHECK: __sync_val_compare_and_swap_16
14+
%0 = cmpxchg ptr %a, i128 1, i128 1 seq_cst seq_cst
15+
; CHECK: __sync_lock_test_and_set_16
16+
%1 = atomicrmw xchg ptr %a, i128 1 seq_cst
17+
; CHECK: __sync_fetch_and_add_16
18+
%2 = atomicrmw add ptr %a, i128 1 seq_cst
19+
; CHECK: __sync_fetch_and_sub_16
20+
%3 = atomicrmw sub ptr %a, i128 1 seq_cst
21+
; CHECK: __sync_fetch_and_and_16
22+
%4 = atomicrmw and ptr %a, i128 1 seq_cst
23+
; CHECK: __sync_fetch_and_nand_16
24+
%5 = atomicrmw nand ptr %a, i128 1 seq_cst
25+
; CHECK: __sync_fetch_and_or_16
26+
%6 = atomicrmw or ptr %a, i128 1 seq_cst
27+
; CHECK: __sync_fetch_and_xor_16
28+
%7 = atomicrmw xor ptr %a, i128 1 seq_cst
29+
; CHECK: __sync_val_compare_and_swap_16
30+
%8 = load atomic i128, ptr %a seq_cst, align 16
31+
; CHECK: __sync_lock_test_and_set_16
32+
store atomic i128 %8, ptr %a seq_cst, align 16
33+
ret void
34+
}
35+
36+
; CHECK-LABEL: test_fp:
37+
define void @test_fp(fp128* %a) nounwind {
38+
entry:
39+
; CHECK: __sync_lock_test_and_set_16
40+
%0 = atomicrmw xchg fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
41+
; Currently fails to compile:
42+
; %1 = atomicrmw fadd fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
43+
; %2 = atomicrmw fsub fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
44+
; CHECK: __sync_val_compare_and_swap_16
45+
%1 = load atomic fp128, fp128* %a seq_cst, align 16
46+
; CHECK: __sync_lock_test_and_set_16
47+
store atomic fp128 %1, fp128* %a seq_cst, align 16
48+
ret void
49+
}
Lines changed: 128 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,139 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=-sse | FileCheck %s --check-prefix=X64-NOSSE
3-
; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs | FileCheck %s --check-prefix=X64-SSE
2+
; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2,cx16 | FileCheck %s --check-prefixes=X64-SSE
3+
; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx,cx16 | FileCheck %s --check-prefixes=X64-AVX
4+
; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx512f,cx16 | FileCheck %s --check-prefixes=X64-AVX
45

5-
; Note: This test is testing that the lowering for atomics matches what we
6-
; currently emit for non-atomics + the atomic restriction. The presence of
7-
; particular lowering detail in these tests should not be read as requiring
8-
; that detail for correctness unless it's related to the atomicity itself.
9-
; (Specifically, there were reviewer questions about the lowering for halfs
10-
; and their calling convention which remain unresolved.)
6+
; Codegen of fp128 without cx16 is tested in atomic-nocx16.ll
117

128
define void @store_fp128(ptr %fptr, fp128 %v) {
13-
; X64-NOSSE-LABEL: store_fp128:
14-
; X64-NOSSE: # %bb.0:
15-
; X64-NOSSE-NEXT: pushq %rax
16-
; X64-NOSSE-NEXT: .cfi_def_cfa_offset 16
17-
; X64-NOSSE-NEXT: callq __sync_lock_test_and_set_16@PLT
18-
; X64-NOSSE-NEXT: popq %rax
19-
; X64-NOSSE-NEXT: .cfi_def_cfa_offset 8
20-
; X64-NOSSE-NEXT: retq
21-
;
229
; X64-SSE-LABEL: store_fp128:
2310
; X64-SSE: # %bb.0:
24-
; X64-SSE-NEXT: subq $24, %rsp
25-
; X64-SSE-NEXT: .cfi_def_cfa_offset 32
26-
; X64-SSE-NEXT: movaps %xmm0, (%rsp)
27-
; X64-SSE-NEXT: movq (%rsp), %rsi
28-
; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx
29-
; X64-SSE-NEXT: callq __sync_lock_test_and_set_16@PLT
30-
; X64-SSE-NEXT: addq $24, %rsp
11+
; X64-SSE-NEXT: pushq %rbx
12+
; X64-SSE-NEXT: .cfi_def_cfa_offset 16
13+
; X64-SSE-NEXT: .cfi_offset %rbx, -16
14+
; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
15+
; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rbx
16+
; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
17+
; X64-SSE-NEXT: movq (%rdi), %rax
18+
; X64-SSE-NEXT: movq 8(%rdi), %rdx
19+
; X64-SSE-NEXT: .p2align 4, 0x90
20+
; X64-SSE-NEXT: .LBB0_1: # %atomicrmw.start
21+
; X64-SSE-NEXT: # =>This Inner Loop Header: Depth=1
22+
; X64-SSE-NEXT: lock cmpxchg16b (%rdi)
23+
; X64-SSE-NEXT: jne .LBB0_1
24+
; X64-SSE-NEXT: # %bb.2: # %atomicrmw.end
25+
; X64-SSE-NEXT: popq %rbx
3126
; X64-SSE-NEXT: .cfi_def_cfa_offset 8
3227
; X64-SSE-NEXT: retq
28+
;
29+
; X64-AVX-LABEL: store_fp128:
30+
; X64-AVX: # %bb.0:
31+
; X64-AVX-NEXT: pushq %rbx
32+
; X64-AVX-NEXT: .cfi_def_cfa_offset 16
33+
; X64-AVX-NEXT: .cfi_offset %rbx, -16
34+
; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
35+
; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rbx
36+
; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
37+
; X64-AVX-NEXT: movq (%rdi), %rax
38+
; X64-AVX-NEXT: movq 8(%rdi), %rdx
39+
; X64-AVX-NEXT: .p2align 4, 0x90
40+
; X64-AVX-NEXT: .LBB0_1: # %atomicrmw.start
41+
; X64-AVX-NEXT: # =>This Inner Loop Header: Depth=1
42+
; X64-AVX-NEXT: lock cmpxchg16b (%rdi)
43+
; X64-AVX-NEXT: jne .LBB0_1
44+
; X64-AVX-NEXT: # %bb.2: # %atomicrmw.end
45+
; X64-AVX-NEXT: popq %rbx
46+
; X64-AVX-NEXT: .cfi_def_cfa_offset 8
47+
; X64-AVX-NEXT: retq
3348
store atomic fp128 %v, ptr %fptr unordered, align 16
3449
ret void
3550
}
51+
52+
define fp128 @load_fp128(ptr %fptr) {
53+
; X64-SSE-LABEL: load_fp128:
54+
; X64-SSE: # %bb.0:
55+
; X64-SSE-NEXT: pushq %rbx
56+
; X64-SSE-NEXT: .cfi_def_cfa_offset 16
57+
; X64-SSE-NEXT: .cfi_offset %rbx, -16
58+
; X64-SSE-NEXT: xorl %eax, %eax
59+
; X64-SSE-NEXT: xorl %edx, %edx
60+
; X64-SSE-NEXT: xorl %ecx, %ecx
61+
; X64-SSE-NEXT: xorl %ebx, %ebx
62+
; X64-SSE-NEXT: lock cmpxchg16b (%rdi)
63+
; X64-SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
64+
; X64-SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
65+
; X64-SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
66+
; X64-SSE-NEXT: popq %rbx
67+
; X64-SSE-NEXT: .cfi_def_cfa_offset 8
68+
; X64-SSE-NEXT: retq
69+
;
70+
; X64-AVX-LABEL: load_fp128:
71+
; X64-AVX: # %bb.0:
72+
; X64-AVX-NEXT: pushq %rbx
73+
; X64-AVX-NEXT: .cfi_def_cfa_offset 16
74+
; X64-AVX-NEXT: .cfi_offset %rbx, -16
75+
; X64-AVX-NEXT: xorl %eax, %eax
76+
; X64-AVX-NEXT: xorl %edx, %edx
77+
; X64-AVX-NEXT: xorl %ecx, %ecx
78+
; X64-AVX-NEXT: xorl %ebx, %ebx
79+
; X64-AVX-NEXT: lock cmpxchg16b (%rdi)
80+
; X64-AVX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
81+
; X64-AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
82+
; X64-AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
83+
; X64-AVX-NEXT: popq %rbx
84+
; X64-AVX-NEXT: .cfi_def_cfa_offset 8
85+
; X64-AVX-NEXT: retq
86+
%v = load atomic fp128, ptr %fptr unordered, align 16
87+
ret fp128 %v
88+
}
89+
90+
define fp128 @exchange_fp128(ptr %fptr, fp128 %x) {
91+
; X64-SSE-LABEL: exchange_fp128:
92+
; X64-SSE: # %bb.0:
93+
; X64-SSE-NEXT: pushq %rbx
94+
; X64-SSE-NEXT: .cfi_def_cfa_offset 16
95+
; X64-SSE-NEXT: .cfi_offset %rbx, -16
96+
; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
97+
; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rbx
98+
; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
99+
; X64-SSE-NEXT: movq (%rdi), %rax
100+
; X64-SSE-NEXT: movq 8(%rdi), %rdx
101+
; X64-SSE-NEXT: .p2align 4, 0x90
102+
; X64-SSE-NEXT: .LBB2_1: # %atomicrmw.start
103+
; X64-SSE-NEXT: # =>This Inner Loop Header: Depth=1
104+
; X64-SSE-NEXT: lock cmpxchg16b (%rdi)
105+
; X64-SSE-NEXT: jne .LBB2_1
106+
; X64-SSE-NEXT: # %bb.2: # %atomicrmw.end
107+
; X64-SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
108+
; X64-SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
109+
; X64-SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
110+
; X64-SSE-NEXT: popq %rbx
111+
; X64-SSE-NEXT: .cfi_def_cfa_offset 8
112+
; X64-SSE-NEXT: retq
113+
;
114+
; X64-AVX-LABEL: exchange_fp128:
115+
; X64-AVX: # %bb.0:
116+
; X64-AVX-NEXT: pushq %rbx
117+
; X64-AVX-NEXT: .cfi_def_cfa_offset 16
118+
; X64-AVX-NEXT: .cfi_offset %rbx, -16
119+
; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
120+
; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rbx
121+
; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
122+
; X64-AVX-NEXT: movq (%rdi), %rax
123+
; X64-AVX-NEXT: movq 8(%rdi), %rdx
124+
; X64-AVX-NEXT: .p2align 4, 0x90
125+
; X64-AVX-NEXT: .LBB2_1: # %atomicrmw.start
126+
; X64-AVX-NEXT: # =>This Inner Loop Header: Depth=1
127+
; X64-AVX-NEXT: lock cmpxchg16b (%rdi)
128+
; X64-AVX-NEXT: jne .LBB2_1
129+
; X64-AVX-NEXT: # %bb.2: # %atomicrmw.end
130+
; X64-AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
131+
; X64-AVX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
132+
; X64-AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
133+
; X64-AVX-NEXT: popq %rbx
134+
; X64-AVX-NEXT: .cfi_def_cfa_offset 8
135+
; X64-AVX-NEXT: retq
136+
%v = atomicrmw xchg ptr %fptr, fp128 %x monotonic, align 16
137+
ret fp128 %v
138+
}
139+

0 commit comments

Comments
 (0)