3
3
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=atomic-expand %s | FileCheck -check-prefixes=GCN,GFX9 %s
4
4
5
5
define float @test_atomicrmw_fmax_f32_flat (ptr %ptr , float %value ) {
6
- ; GFX7-LABEL: @test_atomicrmw_fmax_f32_flat(
7
- ; GFX7-NEXT: [[RES:%.*]] = atomicrmw fmax ptr [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
8
- ; GFX7-NEXT: ret float [[RES]]
9
- ;
10
- ; GFX9-LABEL: @test_atomicrmw_fmax_f32_flat(
11
- ; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
12
- ; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
13
- ; GFX9: atomicrmw.start:
14
- ; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
15
- ; GFX9-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE:%.*]])
16
- ; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
17
- ; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
18
- ; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
19
- ; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
20
- ; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
21
- ; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
22
- ; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
23
- ; GFX9: atomicrmw.end:
24
- ; GFX9-NEXT: ret float [[TMP6]]
6
+ ; GCN-LABEL: @test_atomicrmw_fmax_f32_flat(
7
+ ; GCN-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
8
+ ; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
9
+ ; GCN: atomicrmw.start:
10
+ ; GCN-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
11
+ ; GCN-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE:%.*]])
12
+ ; GCN-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
13
+ ; GCN-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
14
+ ; GCN-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
15
+ ; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
16
+ ; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
17
+ ; GCN-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
18
+ ; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
19
+ ; GCN: atomicrmw.end:
20
+ ; GCN-NEXT: ret float [[TMP6]]
25
21
;
26
22
%res = atomicrmw fmax ptr %ptr , float %value seq_cst
27
23
ret float %res
28
24
}
29
25
30
26
define float @test_atomicrmw_fmax_f32_global (ptr addrspace (1 ) %ptr , float %value ) {
31
- ; GFX7-LABEL: @test_atomicrmw_fmax_f32_global(
32
- ; GFX7-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4
33
- ; GFX7-NEXT: ret float [[RES]]
34
- ;
35
- ; GFX9-LABEL: @test_atomicrmw_fmax_f32_global(
36
- ; GFX9-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
37
- ; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
38
- ; GFX9: atomicrmw.start:
39
- ; GFX9-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
40
- ; GFX9-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE:%.*]])
41
- ; GFX9-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
42
- ; GFX9-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
43
- ; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
44
- ; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
45
- ; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
46
- ; GFX9-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
47
- ; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
48
- ; GFX9: atomicrmw.end:
49
- ; GFX9-NEXT: ret float [[TMP6]]
27
+ ; GCN-LABEL: @test_atomicrmw_fmax_f32_global(
28
+ ; GCN-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4
29
+ ; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
30
+ ; GCN: atomicrmw.start:
31
+ ; GCN-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
32
+ ; GCN-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE:%.*]])
33
+ ; GCN-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
34
+ ; GCN-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
35
+ ; GCN-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
36
+ ; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
37
+ ; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
38
+ ; GCN-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
39
+ ; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
40
+ ; GCN: atomicrmw.end:
41
+ ; GCN-NEXT: ret float [[TMP6]]
50
42
;
51
43
%res = atomicrmw fmax ptr addrspace (1 ) %ptr , float %value seq_cst
52
44
ret float %res
@@ -195,50 +187,42 @@ define half @test_atomicrmw_fmax_f16_local(ptr addrspace(3) %ptr, half %value) {
195
187
}
196
188
197
189
define double @test_atomicrmw_fmax_f64_flat (ptr %ptr , double %value ) {
198
- ; GFX7-LABEL: @test_atomicrmw_fmax_f64_flat(
199
- ; GFX7-NEXT: [[RES:%.*]] = atomicrmw fmax ptr [[PTR:%.*]], double [[VALUE:%.*]] seq_cst, align 8
200
- ; GFX7-NEXT: ret double [[RES]]
201
- ;
202
- ; GFX9-LABEL: @test_atomicrmw_fmax_f64_flat(
203
- ; GFX9-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
204
- ; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
205
- ; GFX9: atomicrmw.start:
206
- ; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
207
- ; GFX9-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE:%.*]])
208
- ; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
209
- ; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
210
- ; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
211
- ; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
212
- ; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
213
- ; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
214
- ; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
215
- ; GFX9: atomicrmw.end:
216
- ; GFX9-NEXT: ret double [[TMP6]]
190
+ ; GCN-LABEL: @test_atomicrmw_fmax_f64_flat(
191
+ ; GCN-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8
192
+ ; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
193
+ ; GCN: atomicrmw.start:
194
+ ; GCN-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
195
+ ; GCN-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE:%.*]])
196
+ ; GCN-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
197
+ ; GCN-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
198
+ ; GCN-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
199
+ ; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
200
+ ; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
201
+ ; GCN-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
202
+ ; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
203
+ ; GCN: atomicrmw.end:
204
+ ; GCN-NEXT: ret double [[TMP6]]
217
205
;
218
206
%res = atomicrmw fmax ptr %ptr , double %value seq_cst
219
207
ret double %res
220
208
}
221
209
222
210
define double @test_atomicrmw_fmax_f64_global (ptr addrspace (1 ) %ptr , double %value ) {
223
- ; GFX7-LABEL: @test_atomicrmw_fmax_f64_global(
224
- ; GFX7-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] seq_cst, align 8
225
- ; GFX7-NEXT: ret double [[RES]]
226
- ;
227
- ; GFX9-LABEL: @test_atomicrmw_fmax_f64_global(
228
- ; GFX9-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
229
- ; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
230
- ; GFX9: atomicrmw.start:
231
- ; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
232
- ; GFX9-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE:%.*]])
233
- ; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
234
- ; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
235
- ; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
236
- ; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
237
- ; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
238
- ; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
239
- ; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
240
- ; GFX9: atomicrmw.end:
241
- ; GFX9-NEXT: ret double [[TMP6]]
211
+ ; GCN-LABEL: @test_atomicrmw_fmax_f64_global(
212
+ ; GCN-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
213
+ ; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
214
+ ; GCN: atomicrmw.start:
215
+ ; GCN-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
216
+ ; GCN-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE:%.*]])
217
+ ; GCN-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
218
+ ; GCN-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
219
+ ; GCN-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
220
+ ; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
221
+ ; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
222
+ ; GCN-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
223
+ ; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
224
+ ; GCN: atomicrmw.end:
225
+ ; GCN-NEXT: ret double [[TMP6]]
242
226
;
243
227
%res = atomicrmw fmax ptr addrspace (1 ) %ptr , double %value seq_cst
244
228
ret double %res
@@ -254,26 +238,25 @@ define double @test_atomicrmw_fmax_f64_local(ptr addrspace(3) %ptr, double %valu
254
238
}
255
239
256
240
define double @test_atomicrmw_fmax_f64_global_strictfp (ptr addrspace (1 ) %ptr , double %value ) strictfp {
257
- ; GFX7-LABEL: @test_atomicrmw_fmax_f64_global_strictfp(
258
- ; GFX7-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] seq_cst, align 8
259
- ; GFX7-NEXT: ret double [[RES]]
260
- ;
261
- ; GFX9-LABEL: @test_atomicrmw_fmax_f64_global_strictfp(
262
- ; GFX9-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
263
- ; GFX9-NEXT: br label [[ATOMICRMW_START:%.*]]
264
- ; GFX9: atomicrmw.start:
265
- ; GFX9-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
266
- ; GFX9-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[LOADED]], double [[VALUE:%.*]], metadata !"fpexcept.strict") #[[ATTR4:[0-9]+]]
267
- ; GFX9-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
268
- ; GFX9-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
269
- ; GFX9-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
270
- ; GFX9-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
271
- ; GFX9-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
272
- ; GFX9-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
273
- ; GFX9-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
274
- ; GFX9: atomicrmw.end:
275
- ; GFX9-NEXT: ret double [[TMP6]]
241
+ ; GCN-LABEL: @test_atomicrmw_fmax_f64_global_strictfp(
242
+ ; GCN-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8
243
+ ; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
244
+ ; GCN: atomicrmw.start:
245
+ ; GCN-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
246
+ ; GCN-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[LOADED]], double [[VALUE:%.*]], metadata !"fpexcept.strict") #[[ATTR4:[0-9]+]]
247
+ ; GCN-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
248
+ ; GCN-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
249
+ ; GCN-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
250
+ ; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
251
+ ; GCN-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
252
+ ; GCN-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
253
+ ; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
254
+ ; GCN: atomicrmw.end:
255
+ ; GCN-NEXT: ret double [[TMP6]]
276
256
;
277
257
%res = atomicrmw fmax ptr addrspace (1 ) %ptr , double %value seq_cst
278
258
ret double %res
279
259
}
260
+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
261
+ ; GFX7: {{.*}}
262
+ ; GFX9: {{.*}}
0 commit comments