@@ -18681,22 +18681,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18681
18681
CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18682
18682
return Builder.CreateCall(F, {Addr, Val});
18683
18683
}
18684
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18685
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18686
- Intrinsic::ID IID;
18687
- switch (BuiltinID) {
18688
- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18689
- IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18690
- break;
18691
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18692
- IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18693
- break;
18694
- }
18695
- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18696
- llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18697
- llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18698
- return Builder.CreateCall(F, {Addr, Val});
18699
- }
18700
18684
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18701
18685
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18702
18686
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19068,7 +19052,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19068
19052
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
19069
19053
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19070
19054
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19071
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
19055
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19056
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19057
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
19072
19058
llvm::AtomicRMWInst::BinOp BinOp;
19073
19059
switch (BuiltinID) {
19074
19060
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19090,6 +19076,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19090
19076
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19091
19077
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19092
19078
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19079
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19080
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
19093
19081
BinOp = llvm::AtomicRMWInst::FAdd;
19094
19082
break;
19095
19083
}
@@ -19126,7 +19114,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19126
19114
AO = AtomicOrdering::SequentiallyConsistent;
19127
19115
19128
19116
// The v2bf16 builtin uses i16 instead of a natural bfloat type.
19129
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
19117
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
19118
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
19119
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
19130
19120
llvm::Type *V2BF16Ty = FixedVectorType::get(
19131
19121
llvm::Type::getBFloatTy(Builder.getContext()), 2);
19132
19122
Val = Builder.CreateBitCast(Val, V2BF16Ty);
0 commit comments