Skip to content

Commit 9d3d399

Browse files
committed
clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins
1 parent 89d0046 commit 9d3d399

File tree

4 files changed

+15
-19
lines changed

4 files changed

+15
-19
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18633,22 +18633,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1863318633
Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
1863418634
return Builder.CreateCall(F, { Src0, Builder.getFalse() });
1863518635
}
18636-
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1863718636
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1863818637
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1863918638
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1864018639
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1864118640
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18642-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18643-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
18641+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: {
1864418642
Intrinsic::ID IID;
1864518643
llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
1864618644
switch (BuiltinID) {
18647-
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18648-
ArgTy = llvm::FixedVectorType::get(
18649-
llvm::Type::getHalfTy(getLLVMContext()), 2);
18650-
IID = Intrinsic::amdgcn_global_atomic_fadd;
18651-
break;
1865218645
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1865318646
IID = Intrinsic::amdgcn_global_atomic_fmin;
1865418647
break;
@@ -18668,11 +18661,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1866818661
ArgTy = llvm::Type::getFloatTy(getLLVMContext());
1866918662
IID = Intrinsic::amdgcn_flat_atomic_fadd;
1867018663
break;
18671-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
18672-
ArgTy = llvm::FixedVectorType::get(
18673-
llvm::Type::getHalfTy(getLLVMContext()), 2);
18674-
IID = Intrinsic::amdgcn_flat_atomic_fadd;
18675-
break;
1867618664
}
1867718665
llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
1867818666
llvm::Value *Val = EmitScalarExpr(E->getArg(1));
@@ -19065,7 +19053,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1906519053
case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1906619054
case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1906719055
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
19068-
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: {
19056+
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
19057+
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
19058+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
1906919059
llvm::AtomicRMWInst::BinOp BinOp;
1907019060
switch (BuiltinID) {
1907119061
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19083,6 +19073,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1908319073
case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1908419074
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1908519075
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
19076+
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
19077+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1908619078
BinOp = llvm::AtomicRMWInst::FAdd;
1908719079
break;
1908819080
case AMDGPU::BI__builtin_amdgcn_ds_fminf:

clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
4848
}
4949

5050
// CHECK-LABEL: test_flat_add_2f16
51-
// CHECK: call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %{{.*}}, <2 x half> %{{.*}})
51+
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
52+
5253
// GFX12-LABEL: test_flat_add_2f16
5354
// GFX12: flat_atomic_pk_add_f16
5455
half2 test_flat_add_2f16(__generic half2 *addr, half2 x) {
@@ -64,7 +65,8 @@ short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) {
6465
}
6566

6667
// CHECK-LABEL: test_global_add_half2
67-
// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %{{.*}}, <2 x half> %{{.*}})
68+
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
69+
6870
// GFX12-LABEL: test_global_add_half2
6971
// GFX12: global_atomic_pk_add_f16 v2, v[0:1], v2, off th:TH_ATOMIC_RETURN
7072
void test_global_add_half2(__global half2 *addr, half2 x) {
@@ -73,7 +75,8 @@ void test_global_add_half2(__global half2 *addr, half2 x) {
7375
}
7476

7577
// CHECK-LABEL: test_global_add_half2_noret
76-
// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %{{.*}}, <2 x half> %{{.*}})
78+
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
79+
7780
// GFX12-LABEL: test_global_add_half2_noret
7881
// GFX12: global_atomic_pk_add_f16 v[0:1], v2, off
7982
void test_global_add_half2_noret(__global half2 *addr, half2 x) {

clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ void test_global_add_f64(__global double *addr, double x) {
1818
}
1919

2020
// CHECK-LABEL: test_global_add_half2
21-
// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %{{.*}}, <2 x half> %{{.*}})
21+
// CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
2222
// GFX90A-LABEL: test_global_add_half2
2323
// GFX90A: global_atomic_pk_add_f16 v2, v[0:1], v2, off glc
2424
void test_global_add_half2(__global half2 *addr, half2 x) {

clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ half2 test_flat_add_f32(__generic float *addr, float x) {
1818
}
1919

2020
// CHECK-LABEL: test_flat_add_2f16
21-
// CHECK: call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %{{.*}}, <2 x half> %{{.*}})
21+
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
22+
2223
// GFX940-LABEL: test_flat_add_2f16
2324
// GFX940: flat_atomic_pk_add_f16
2425
half2 test_flat_add_2f16(__generic half2 *addr, half2 x) {

0 commit comments

Comments
 (0)