@@ -11,7 +11,7 @@ typedef short __attribute__((ext_vector_type(2))) short2;
11
11
12
12
// CHECK-LABEL: test_local_add_2bf16
13
13
// CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
14
- // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4
14
+ // CHECK-NEXT : [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4
15
15
// CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
16
16
17
17
// GFX12-LABEL: test_local_add_2bf16
@@ -48,7 +48,7 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
48
48
}
49
49
50
50
// CHECK-LABEL: test_flat_add_2f16
51
- // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst , align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
51
+ // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic , align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
52
52
53
53
// GFX12-LABEL: test_flat_add_2f16
54
54
// GFX12: flat_atomic_pk_add_f16
@@ -57,15 +57,18 @@ half2 test_flat_add_2f16(__generic half2 *addr, half2 x) {
57
57
}
58
58
59
59
// CHECK-LABEL: test_flat_add_2bf16
60
- // CHECK: call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %{{.*}}, <2 x i16> %{{.*}})
60
+ // CHECK: [[BC:%.+]] = bitcast <2 x i16> %{{.+}} to <2 x bfloat>
61
+ // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x bfloat> [[BC]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
62
+ // CHECK: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
63
+
61
64
// GFX12-LABEL: test_flat_add_2bf16
62
65
// GFX12: flat_atomic_pk_add_bf16
63
66
short2 test_flat_add_2bf16 (__generic short2 * addr , short2 x ) {
64
67
return __builtin_amdgcn_flat_atomic_fadd_v2bf16 (addr , x );
65
68
}
66
69
67
70
// CHECK-LABEL: test_global_add_half2
68
- // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst , align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
71
+ // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic , align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
69
72
70
73
// GFX12-LABEL: test_global_add_half2
71
74
// GFX12: global_atomic_pk_add_f16 v2, v[0:1], v2, off th:TH_ATOMIC_RETURN
@@ -75,7 +78,7 @@ void test_global_add_half2(__global half2 *addr, half2 x) {
75
78
}
76
79
77
80
// CHECK-LABEL: test_global_add_half2_noret
78
- // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst , align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
81
+ // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic , align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
79
82
80
83
// GFX12-LABEL: test_global_add_half2_noret
81
84
// GFX12: global_atomic_pk_add_f16 v[0:1], v2, off
@@ -84,7 +87,11 @@ void test_global_add_half2_noret(__global half2 *addr, half2 x) {
84
87
}
85
88
86
89
// CHECK-LABEL: test_global_add_2bf16
87
- // CHECK: call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %{{.*}}, <2 x i16> %{{.*}})
90
+ // CHECK: [[BC:%.+]] = bitcast <2 x i16> %{{.+}} to <2 x bfloat>
91
+ // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x bfloat> [[BC]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
92
+ // CHECK: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
93
+
94
+
88
95
// GFX12-LABEL: test_global_add_2bf16
89
96
// GFX12: global_atomic_pk_add_bf16 v2, v[0:1], v2, off th:TH_ATOMIC_RETURN
90
97
void test_global_add_2bf16 (__global short2 * addr , short2 x ) {
@@ -93,7 +100,10 @@ void test_global_add_2bf16(__global short2 *addr, short2 x) {
93
100
}
94
101
95
102
// CHECK-LABEL: test_global_add_2bf16_noret
96
- // CHECK: call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %{{.*}}, <2 x i16> %{{.*}})
103
+ // CHECK: [[BC:%.+]] = bitcast <2 x i16> %{{.+}} to <2 x bfloat>
104
+ // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x bfloat> [[BC]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
105
+ // CHECK: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
106
+
97
107
// GFX12-LABEL: test_global_add_2bf16_noret
98
108
// GFX12: global_atomic_pk_add_bf16 v[0:1], v2, off
99
109
void test_global_add_2bf16_noret (__global short2 * addr , short2 x ) {
0 commit comments