Skip to content

Commit bf9b6b7

Browse files
JonPssonJonPsson1
authored andcommitted
[SystemZ] Improve support for 16 byte atomic int/fp types and operations.
- Clang FE now has MaxAtomicPromoteWidth and MaxAtomicInlineWidth with a value of 128. It now produces IR instead of calls to __atomic instrinsics for 16 bytes as well. FP loads are first loaded as i128 and then casted to fp128. - Atomic __int128 (and long double) variables are aligned to 16 bytes (like gcc 14). - AtomicExpand pass now expands also 16 byte operations. - tests for __atomic builtins for all integer widths, with test for i128 in both align=8 and align=16 cases. - Resulting behavior of __atomic_is_lock_free / __atomic_always_lock_free / __c11_atomic_is_lock_free is tested in gnu-atomic_is_lock_free.c - shouldExpandAtomicRMWInIR() was already returning true for any FP type. Now that the backend is acepting 16 byte atomics, 16 byte FP atomicrmw:s now also get expanded by AtomicExpand. The default (and used) shouldCastAtomicRMWIInIR() says that if the type is FP, it is casted to integer (see atomicrmw-xchg-07.ll). - TODO: AtomicExpand pass handles with this patch expansion of i128 atomicrmw:s. As a next step smaller integer types should also be possible to handle this way instead of in backend. Original patch rebased. Remove the regalloc handling for CDSG loops. Tests improved.
1 parent 521b468 commit bf9b6b7

13 files changed

+2030
-62
lines changed

clang/lib/Basic/Targets/SystemZ.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
6060
resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
6161
"-v128:64-a:8:16-n32:64");
6262
}
63-
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
63+
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 128;
6464
HasStrictFP = true;
6565
}
6666

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// RUN: %clang_cc1 -triple s390x-linux-gnu -O3 -emit-llvm %s -o - | FileCheck %s
2+
//
3+
// Test alignment of 128 bit Atomic int/fp types, as well as loading
4+
// from memory with a simple addition. The fp128 is loaded as i128 and
5+
// then casted.
6+
7+
// CHECK: @Atomic_int128 = {{.*}} i128 0, align 16
8+
// CHECK: @Atomic_fp128 = {{.*}} fp128 0xL00000000000000000000000000000000, align 16
9+
10+
// CHECK-LABEL: @f1
11+
// CHECK: %atomic-load = load atomic i128, ptr @Atomic_int128 seq_cst, align 16
12+
// CHECK-NEXT: %add = add nsw i128 %atomic-load, 1
13+
// CHECK-NEXT: store i128 %add, ptr %agg.result, align 8
14+
// CHECK-NEXT: ret void
15+
16+
// CHECK-LABEL: @f2
17+
// CHECK: %atomic-load = load atomic i128, ptr @Atomic_fp128 seq_cst, align 16
18+
// CHECK-NEXT: %0 = bitcast i128 %atomic-load to fp128
19+
// CHECK-NEXT: %add = fadd fp128 %0, 0xL00000000000000003FFF000000000000
20+
// CHECK-NEXT: store fp128 %add, ptr %agg.result, align 8
21+
// CHECK-NEXT: ret void
22+
23+
24+
#include <stdatomic.h>
25+
26+
_Atomic __int128 Atomic_int128;
27+
_Atomic long double Atomic_fp128;
28+
29+
__int128 f1() {
30+
return Atomic_int128 + 1;
31+
}
32+
33+
long double f2() {
34+
return Atomic_fp128 + 1.0;
35+
}
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s
3+
//
4+
// Test GNU atomic builtins for __int128 aligned to 16 bytes, which should be
5+
// expanded to LLVM I/R by the front end.
6+
7+
#include <stdatomic.h>
8+
#include <stdint.h>
9+
10+
__int128 Ptr __attribute__((aligned(16)));
11+
__int128 Ret __attribute__((aligned(16)));
12+
__int128 Val __attribute__((aligned(16)));
13+
__int128 Exp __attribute__((aligned(16)));
14+
__int128 Des __attribute__((aligned(16)));
15+
16+
// CHECK-LABEL: @f1(
17+
// CHECK-NEXT: entry:
18+
// CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 16
19+
// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2:![0-9]+]]
20+
// CHECK-NEXT: ret void
21+
//
22+
__int128 f1() {
23+
return __atomic_load_n(&Ptr, memory_order_seq_cst);
24+
}
25+
26+
// CHECK-LABEL: @f2(
27+
// CHECK-NEXT: entry:
28+
// CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 16
29+
// CHECK-NEXT: store i128 [[TMP0]], ptr @Ret, align 16
30+
// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
31+
// CHECK-NEXT: ret void
32+
//
33+
__int128 f2() {
34+
__atomic_load(&Ptr, &Ret, memory_order_seq_cst);
35+
return Ret;
36+
}
37+
38+
// CHECK-LABEL: @f3(
39+
// CHECK-NEXT: entry:
40+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
41+
// CHECK-NEXT: store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 16
42+
// CHECK-NEXT: ret void
43+
//
44+
void f3() {
45+
__atomic_store_n(&Ptr, Val, memory_order_seq_cst);
46+
}
47+
48+
// CHECK-LABEL: @f4(
49+
// CHECK-NEXT: entry:
50+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16
51+
// CHECK-NEXT: store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 16
52+
// CHECK-NEXT: ret void
53+
//
54+
void f4() {
55+
__atomic_store(&Ptr, &Val, memory_order_seq_cst);
56+
}
57+
58+
// CHECK-LABEL: @f5(
59+
// CHECK-NEXT: entry:
60+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
61+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
62+
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
63+
// CHECK-NEXT: ret void
64+
//
65+
__int128 f5() {
66+
return __atomic_exchange_n(&Ptr, Val, memory_order_seq_cst);
67+
}
68+
69+
// CHECK-LABEL: @f6(
70+
// CHECK-NEXT: entry:
71+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16
72+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
73+
// CHECK-NEXT: store i128 [[TMP1]], ptr @Ret, align 16
74+
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
75+
// CHECK-NEXT: ret void
76+
//
77+
__int128 f6() {
78+
__atomic_exchange(&Ptr, &Val, &Ret, memory_order_seq_cst);
79+
return Ret;
80+
}
81+
82+
// CHECK-LABEL: @f7(
83+
// CHECK-NEXT: entry:
84+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 16, !tbaa [[TBAA2]]
85+
// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Exp, align 16
86+
// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 16
87+
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
88+
// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
89+
// CHECK: cmpxchg.store_expected:
90+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
91+
// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 16
92+
// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
93+
// CHECK: cmpxchg.continue:
94+
// CHECK-NEXT: ret i1 [[TMP3]]
95+
//
96+
_Bool f7() {
97+
return __atomic_compare_exchange_n(&Ptr, &Exp, Des, 0,
98+
memory_order_seq_cst, memory_order_seq_cst);
99+
}
100+
101+
// CHECK-LABEL: @f8(
102+
// CHECK-NEXT: entry:
103+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Exp, align 16
104+
// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Des, align 16
105+
// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16
106+
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
107+
// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
108+
// CHECK: cmpxchg.store_expected:
109+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
110+
// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 16
111+
// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
112+
// CHECK: cmpxchg.continue:
113+
// CHECK-NEXT: ret i1 [[TMP3]]
114+
//
115+
_Bool f8() {
116+
return __atomic_compare_exchange(&Ptr, &Exp, &Des, 0,
117+
memory_order_seq_cst, memory_order_seq_cst);
118+
}
119+
120+
// CHECK-LABEL: @f9(
121+
// CHECK-NEXT: entry:
122+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
123+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
124+
// CHECK-NEXT: [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]]
125+
// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
126+
// CHECK-NEXT: ret void
127+
//
128+
__int128 f9() {
129+
return __atomic_add_fetch(&Ptr, Val, memory_order_seq_cst);
130+
}
131+
132+
// CHECK-LABEL: @f10(
133+
// CHECK-NEXT: entry:
134+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
135+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
136+
// CHECK-NEXT: [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]]
137+
// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
138+
// CHECK-NEXT: ret void
139+
//
140+
__int128 f10() {
141+
return __atomic_sub_fetch(&Ptr, Val, memory_order_seq_cst);
142+
}
143+
144+
// CHECK-LABEL: @f11(
145+
// CHECK-NEXT: entry:
146+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
147+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
148+
// CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
149+
// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
150+
// CHECK-NEXT: ret void
151+
//
152+
__int128 f11() {
153+
return __atomic_and_fetch(&Ptr, Val, memory_order_seq_cst);
154+
}
155+
156+
// CHECK-LABEL: @f12(
157+
// CHECK-NEXT: entry:
158+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
159+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
160+
// CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]]
161+
// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
162+
// CHECK-NEXT: ret void
163+
//
164+
__int128 f12() {
165+
return __atomic_xor_fetch(&Ptr, Val, memory_order_seq_cst);
166+
}
167+
168+
// CHECK-LABEL: @f13(
169+
// CHECK-NEXT: entry:
170+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
171+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
172+
// CHECK-NEXT: [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]]
173+
// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
174+
// CHECK-NEXT: ret void
175+
//
176+
__int128 f13() {
177+
return __atomic_or_fetch(&Ptr, Val, memory_order_seq_cst);
178+
}
179+
180+
// CHECK-LABEL: @f14(
181+
// CHECK-NEXT: entry:
182+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
183+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
184+
// CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
185+
// CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP2]], -1
186+
// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
187+
// CHECK-NEXT: ret void
188+
//
189+
__int128 f14() {
190+
return __atomic_nand_fetch(&Ptr, Val, memory_order_seq_cst);
191+
}
192+
193+
// CHECK-LABEL: @f15(
194+
// CHECK-NEXT: entry:
195+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
196+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
197+
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
198+
// CHECK-NEXT: ret void
199+
//
200+
__int128 f15() {
201+
return __atomic_fetch_add(&Ptr, Val, memory_order_seq_cst);
202+
}
203+
204+
// CHECK-LABEL: @f16(
205+
// CHECK-NEXT: entry:
206+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
207+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
208+
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
209+
// CHECK-NEXT: ret void
210+
//
211+
__int128 f16() {
212+
return __atomic_fetch_sub(&Ptr, Val, memory_order_seq_cst);
213+
}
214+
215+
// CHECK-LABEL: @f17(
216+
// CHECK-NEXT: entry:
217+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
218+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
219+
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
220+
// CHECK-NEXT: ret void
221+
//
222+
__int128 f17() {
223+
return __atomic_fetch_and(&Ptr, Val, memory_order_seq_cst);
224+
}
225+
226+
// CHECK-LABEL: @f18(
227+
// CHECK-NEXT: entry:
228+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
229+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
230+
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
231+
// CHECK-NEXT: ret void
232+
//
233+
__int128 f18() {
234+
return __atomic_fetch_xor(&Ptr, Val, memory_order_seq_cst);
235+
}
236+
237+
// CHECK-LABEL: @f19(
238+
// CHECK-NEXT: entry:
239+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
240+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
241+
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
242+
// CHECK-NEXT: ret void
243+
//
244+
__int128 f19() {
245+
return __atomic_fetch_or(&Ptr, Val, memory_order_seq_cst);
246+
}
247+
248+
// CHECK-LABEL: @f20(
249+
// CHECK-NEXT: entry:
250+
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
251+
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
252+
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
253+
// CHECK-NEXT: ret void
254+
//
255+
__int128 f20() {
256+
return __atomic_fetch_nand(&Ptr, Val, memory_order_seq_cst);
257+
}

0 commit comments

Comments
 (0)