@@ -18946,22 +18946,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18946
18946
CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18947
18947
return Builder.CreateCall(F, {Addr, Val});
18948
18948
}
18949
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18950
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18951
- Intrinsic::ID IID;
18952
- switch (BuiltinID) {
18953
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18954
- IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18955
- break;
18956
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18957
- IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18958
- break;
18959
- }
18960
- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18961
- llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18962
- llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18963
- return Builder.CreateCall(F, {Addr, Val});
18964
- }
18965
18949
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18966
18950
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18967
18951
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19343,7 +19327,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19343
19327
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
19344
19328
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19345
19329
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19346
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
19330
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19331
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19332
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
19347
19333
llvm::AtomicRMWInst::BinOp BinOp;
19348
19334
switch (BuiltinID) {
19349
19335
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19365,6 +19351,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19365
19351
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19366
19352
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19367
19353
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19354
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19355
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
19368
19356
BinOp = llvm::AtomicRMWInst::FAdd;
19369
19357
break;
19370
19358
case AMDGPU::BI__builtin_amdgcn_ds_fminf:
@@ -19409,7 +19397,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19409
19397
AO = AtomicOrdering::Monotonic;
19410
19398
19411
19399
// The v2bf16 builtin uses i16 instead of a natural bfloat type.
19412
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
19400
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
19401
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
19402
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
19413
19403
llvm::Type *V2BF16Ty = FixedVectorType::get(
19414
19404
llvm::Type::getBFloatTy(Builder.getContext()), 2);
19415
19405
Val = Builder.CreateBitCast(Val, V2BF16Ty);
0 commit comments