@@ -18955,22 +18955,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18955
18955
CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18956
18956
return Builder.CreateCall(F, {Addr, Val});
18957
18957
}
18958
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18959
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18960
- Intrinsic::ID IID;
18961
- switch (BuiltinID) {
18962
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18963
- IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18964
- break;
18965
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18966
- IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18967
- break;
18968
- }
18969
- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18970
- llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18971
- llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18972
- return Builder.CreateCall(F, {Addr, Val});
18973
- }
18974
18958
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18975
18959
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18976
18960
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19352,7 +19336,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19352
19336
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
19353
19337
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19354
19338
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19355
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
19339
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19340
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19341
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
19356
19342
llvm::AtomicRMWInst::BinOp BinOp;
19357
19343
switch (BuiltinID) {
19358
19344
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19374,6 +19360,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19374
19360
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19375
19361
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19376
19362
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19363
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19364
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
19377
19365
BinOp = llvm::AtomicRMWInst::FAdd;
19378
19366
break;
19379
19367
case AMDGPU::BI__builtin_amdgcn_ds_fminf:
@@ -19418,7 +19406,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19418
19406
AO = AtomicOrdering::Monotonic;
19419
19407
19420
19408
// The v2bf16 builtin uses i16 instead of a natural bfloat type.
19421
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
19409
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
19410
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
19411
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
19422
19412
llvm::Type *V2BF16Ty = FixedVectorType::get(
19423
19413
llvm::Type::getBFloatTy(Builder.getContext()), 2);
19424
19414
Val = Builder.CreateBitCast(Val, V2BF16Ty);
0 commit comments