Skip to content

Commit 74aa1ba

Browse files
committed
Use monotonic ordering
1 parent e7e1699 commit 74aa1ba

File tree

6 files changed

+16
-16
lines changed

6 files changed

+16
-16
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19290,7 +19290,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1929019290
// The global/flat cases need to use agent scope to consistently produce
1929119291
// the native instruction instead of a cmpxchg expansion.
1929219292
SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19293-
AO = AtomicOrdering::SequentiallyConsistent;
19293+
AO = AtomicOrdering::Monotonic;
1929419294

1929519295
// The v2bf16 builtin uses i16 instead of a natural bfloat type.
1929619296
if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ void test_s_wait_event_export_ready() {
4949
}
5050

5151
// CHECK-LABEL: @test_global_add_f32
52-
// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
52+
// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
5353
void test_global_add_f32(float *rtn, global float *addr, float x) {
5454
*rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
5555
}

clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ typedef short __attribute__((ext_vector_type(2))) short2;
1111

1212
// CHECK-LABEL: test_local_add_2bf16
1313
// CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
14-
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4
14+
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4
1515
// CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
1616

1717
// GFX12-LABEL: test_local_add_2bf16
@@ -22,7 +22,7 @@ short2 test_local_add_2bf16(__local short2 *addr, short2 x) {
2222

2323
// CHECK-LABEL: test_local_add_2bf16_noret
2424
// CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
25-
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4
25+
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4
2626
// CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
2727

2828
// GFX12-LABEL: test_local_add_2bf16_noret
@@ -32,15 +32,15 @@ void test_local_add_2bf16_noret(__local short2 *addr, short2 x) {
3232
}
3333

3434
// CHECK-LABEL: test_local_add_2f16
35-
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} seq_cst, align 4
35+
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4
3636
// GFX12-LABEL: test_local_add_2f16
3737
// GFX12: ds_pk_add_rtn_f16
3838
half2 test_local_add_2f16(__local half2 *addr, half2 x) {
3939
return __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
4040
}
4141

4242
// CHECK-LABEL: test_local_add_2f16_noret
43-
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} seq_cst, align 4
43+
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4
4444
// GFX12-LABEL: test_local_add_2f16_noret
4545
// GFX12: ds_pk_add_f16
4646
void test_local_add_2f16_noret(__local half2 *addr, half2 x) {

clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// REQUIRES: amdgpu-registered-target
77

88
// CHECK-LABEL: test_fadd_local
9-
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} seq_cst, align 4
9+
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} monotonic, align 4
1010
// GFX8-LABEL: test_fadd_local$local:
1111
// GFX8: ds_add_rtn_f32 v2, v0, v1
1212
// GFX8: s_endpgm
@@ -16,7 +16,7 @@ kernel void test_fadd_local(__local float *ptr, float val){
1616
}
1717

1818
// CHECK-LABEL: test_fadd_local_volatile
19-
// CHECK: = atomicrmw volatile fadd ptr addrspace(3) %{{.+}}, float %{{.+}} seq_cst, align 4
19+
// CHECK: = atomicrmw volatile fadd ptr addrspace(3) %{{.+}}, float %{{.+}} monotonic, align 4
2020
kernel void test_fadd_local_volatile(volatile __local float *ptr, float val){
2121
volatile float *res;
2222
*res = __builtin_amdgcn_ds_atomic_fadd_f32(ptr, val);

clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
typedef half __attribute__((ext_vector_type(2))) half2;
1010

1111
// CHECK-LABEL: test_global_add_f64
12-
// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
12+
// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
1313
// GFX90A-LABEL: test_global_add_f64$local:
1414
// GFX90A: global_atomic_add_f64
1515
void test_global_add_f64(__global double *addr, double x) {
@@ -99,7 +99,7 @@ void test_flat_global_max_f64(__global double *addr, double x){
9999
}
100100

101101
// CHECK-LABEL: test_ds_add_local_f64
102-
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} seq_cst, align 8
102+
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} monotonic, align 8
103103
// GFX90A: test_ds_add_local_f64$local
104104
// GFX90A: ds_add_rtn_f64
105105
void test_ds_add_local_f64(__local double *addr, double x){
@@ -108,7 +108,7 @@ void test_ds_add_local_f64(__local double *addr, double x){
108108
}
109109

110110
// CHECK-LABEL: test_ds_addf_local_f32
111-
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} seq_cst, align 4
111+
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} monotonic, align 4
112112
// GFX90A-LABEL: test_ds_addf_local_f32$local
113113
// GFX90A: ds_add_rtn_f32
114114
void test_ds_addf_local_f32(__local float *addr, float x){
@@ -117,7 +117,7 @@ void test_ds_addf_local_f32(__local float *addr, float x){
117117
}
118118

119119
// CHECK-LABEL: @test_global_add_f32
120-
// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, float %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
120+
// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, float %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
121121
void test_global_add_f32(float *rtn, global float *addr, float x) {
122122
*rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
123123
}

clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ short2 test_global_add_2bf16(__global short2 *addr, short2 x) {
4444
// CHECK-LABEL: test_local_add_2bf16
4545

4646
// CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
47-
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4{{$}}
47+
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4{{$}}
4848
// CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
4949

5050
// GFX940-LABEL: test_local_add_2bf16
@@ -54,23 +54,23 @@ short2 test_local_add_2bf16(__local short2 *addr, short2 x) {
5454
}
5555

5656
// CHECK-LABEL: test_local_add_2f16
57-
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} seq_cst, align 4
57+
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4
5858
// GFX940-LABEL: test_local_add_2f16
5959
// GFX940: ds_pk_add_rtn_f16
6060
half2 test_local_add_2f16(__local half2 *addr, half2 x) {
6161
return __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
6262
}
6363

6464
// CHECK-LABEL: test_local_add_2f16_noret
65-
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} seq_cst, align 4
65+
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4
6666
// GFX940-LABEL: test_local_add_2f16_noret
6767
// GFX940: ds_pk_add_f16
6868
void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
6969
__builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
7070
}
7171

7272
// CHECK-LABEL: @test_global_add_f32
73-
// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, float %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
73+
// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, float %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
7474
void test_global_add_f32(float *rtn, global float *addr, float x) {
7575
*rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
7676
}

0 commit comments

Comments
 (0)