Skip to content

[AMDGPU] Allow overload of __builtin_amdgcn_mov/update_dpp #112447

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Oct 21, 2024
Merged
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,8 @@ TARGET_BUILTIN(__builtin_amdgcn_frexp_exph, "sh", "nc", "16-bit-insts")
TARGET_BUILTIN(__builtin_amdgcn_fracth, "hh", "nc", "16-bit-insts")
TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "WUi", "n", "s-memrealtime")
TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp")
TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nct", "dpp")
TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nct", "dpp")
TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "gfx8-insts")
TARGET_BUILTIN(__builtin_amdgcn_perm, "UiUiUiUi", "nc", "gfx8-insts")

Expand Down
31 changes: 24 additions & 7 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19037,15 +19037,32 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
ASTContext::GetBuiltinTypeError Error;
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
assert(Error == ASTContext::GE_None && "Should not codegen an error");
llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
unsigned Size = DataTy->getPrimitiveSizeInBits();
llvm::Type *IntTy =
llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, IntTy);
assert(E->getNumArgs() == 5 || E->getNumArgs() == 6);
bool InsertOld = E->getNumArgs() == 5;
if (InsertOld)
Args.push_back(llvm::PoisonValue::get(IntTy));
for (unsigned I = 0; I != E->getNumArgs(); ++I) {
Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
if (I <= !InsertOld && Size < 32) {
if (!DataTy->isIntegerTy())
V = Builder.CreateBitCast(
V, llvm::IntegerType::get(Builder.getContext(), Size));
V = Builder.CreateZExtOrBitCast(V, IntTy);
}
llvm::Type *ExpTy =
F->getFunctionType()->getFunctionParamType(I + InsertOld);
Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
}
assert(Args.size() == 5 || Args.size() == 6);
if (Args.size() == 5)
Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
Value *V = Builder.CreateCall(F, Args);
if (Size < 32 && !DataTy->isIntegerTy())
V = Builder.CreateTrunc(
V, llvm::IntegerType::get(Builder.getContext(), Size));
return Builder.CreateTruncOrBitCast(V, DataTy);
}
case AMDGPU::BI__builtin_amdgcn_permlane16:
case AMDGPU::BI__builtin_amdgcn_permlanex16:
Expand Down
38 changes: 38 additions & 0 deletions clang/lib/Sema/SemaAMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,44 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
OrderIndex = 0;
ScopeIndex = 1;
break;
case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
if (SemaRef.checkArgCountRange(TheCall, 5, 5))
return true;
Expr *ValArg = TheCall->getArg(0);
QualType Ty = ValArg->getType();
// TODO: Vectors can also be supported.
if (!Ty->isArithmeticType() || Ty->isAnyComplexType()) {
SemaRef.Diag(ValArg->getBeginLoc(),
diag::err_typecheck_cond_expect_int_float)
<< Ty << ValArg->getSourceRange();
return true;
}
return false;
}
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
if (SemaRef.checkArgCountRange(TheCall, 6, 6))
return true;
Expr *Args[2];
QualType ArgTys[2];
for (unsigned I = 0; I != 2; ++I) {
Args[I] = TheCall->getArg(I);
ArgTys[I] = Args[I]->getType();
// TODO: Vectors can also be supported.
if (!ArgTys[I]->isArithmeticType() || ArgTys[I]->isAnyComplexType()) {
SemaRef.Diag(Args[I]->getBeginLoc(),
diag::err_typecheck_cond_expect_int_float)
<< ArgTys[I] << Args[I]->getSourceRange();
return true;
}
}
if (ArgTys[0] != ArgTys[1]) {
SemaRef.Diag(Args[1]->getBeginLoc(),
diag::err_typecheck_call_different_arg_types)
<< ArgTys[0] << ArgTys[1];
return true;
}
return false;
}
default:
return false;
}
Expand Down
110 changes: 106 additions & 4 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -102,20 +102,122 @@ void test_s_dcache_wb()
__builtin_amdgcn_s_dcache_wb();
}

// CHECK-LABEL: @test_mov_dpp
// CHECK-LABEL: @test_mov_dpp_int
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
void test_mov_dpp(global int* out, int src)
void test_mov_dpp_int(global int* out, int src)
{
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
}

// CHECK-LABEL: @test_update_dpp
// CHECK-LABEL: @test_mov_dpp_long
// CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %x, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %0,
void test_mov_dpp_long(long x, global long *p) {
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_mov_dpp_float
// CHECK: %0 = bitcast float %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i32 %1,
void test_mov_dpp_float(float x, global float *p) {
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_mov_dpp_double
// CHECK: %0 = bitcast double %x to i64
// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %1,
void test_mov_dpp_double(double x, global double *p) {
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_mov_dpp_short
// CHECK: %0 = zext i16 %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %2 = trunc i32 %1 to i16
// CHECK-NEXT: store i16 %2,
void test_mov_dpp_short(short x, global short *p) {
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_mov_dpp_char
// CHECK: %0 = zext i8 %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %2 = trunc i32 %1 to i8
// CHECK-NEXT: store i8 %2,
void test_mov_dpp_char(char x, global char *p) {
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_mov_dpp_half
// CHECK: %0 = load i16,
// CHECK: %1 = zext i16 %0 to i32
// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %1, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %3 = trunc i32 %2 to i16
// CHECK-NEXT: store i16 %3,
void test_mov_dpp_half(half *x, global half *p) {
*p = __builtin_amdgcn_mov_dpp(*x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_update_dpp_int
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
void test_update_dpp(global int* out, int arg1, int arg2)
void test_update_dpp_int(global int* out, int arg1, int arg2)
{
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
}

// CHECK-LABEL: @test_update_dpp_long
// CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %x, i32 257, i32 15, i32 15, i1 false)
// CHECk-NEXT: store i64 %0,
void test_update_dpp_long(long x, global long *p) {
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_update_dpp_float
// CHECK: %0 = bitcast float %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i32 %1,
void test_update_dpp_float(float x, global float *p) {
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_update_dpp_double
// CHECK: %0 = bitcast double %x to i64
// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %0, i64 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %1,
void test_update_dpp_double(double x, global double *p) {
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_update_dpp_short
// CHECK: %0 = zext i16 %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %2 = trunc i32 %1 to i16
// CHECK-NEXT: store i16 %2,
void test_update_dpp_short(short x, global short *p) {
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_update_dpp_char
// CHECK: %0 = zext i8 %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %2 = trunc i32 %1 to i8
// CHECK-NEXT: store i8 %2,
void test_update_dpp_char(char x, global char *p) {
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_update_dpp_half
// CHECK: %0 = load i16,
// CHECK: %1 = zext i16 %0 to i32
// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %1, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %3 = trunc i32 %2 to i16
// CHECK-NEXT: store i16 %3,
void test_update_dpp_half(half *x, global half *p) {
*p = __builtin_amdgcn_update_dpp(*x, *x, 0x101, 0xf, 0xf, 0);
}

// CHECK-LABEL: @test_ds_fadd
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
// CHECK: atomicrmw volatile fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
Expand Down
50 changes: 50 additions & 0 deletions clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,57 @@

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

typedef int int2 __attribute__((ext_vector_type(2)));

struct S {
int x;
};

void test_gfx9_fmed3h(global half *out, half a, half b, half c)
{
*out = __builtin_amdgcn_fmed3h(a, b, c); // expected-error {{'__builtin_amdgcn_fmed3h' needs target feature gfx9-insts}}
}

void test_mov_dpp(global int* out, int src, int i, int2 i2, struct S s, float _Complex fc)
{
*out = __builtin_amdgcn_mov_dpp(src, i, 0, 0, false); // expected-error{{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
*out = __builtin_amdgcn_mov_dpp(src, 0, i, 0, false); // expected-error{{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, i, false); // expected-error{{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, i); // expected-error{{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
*out = __builtin_amdgcn_mov_dpp(src, 0.1, 0, 0, false); // expected-error{{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
*out = __builtin_amdgcn_mov_dpp(src, 0, 0.1, 0, false); // expected-error{{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0.1, false); // expected-error{{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, 0.1); // expected-error{{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0); // expected-error{{too few arguments to function call, expected 5, have 4}}
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false, 1); // expected-error{{too many arguments to function call, expected at most 5, have 6}}
*out = __builtin_amdgcn_mov_dpp(out, 0, 0, 0, false); // expected-error{{used type '__global int *__private' where integer or floating point type is required}}
*out = __builtin_amdgcn_mov_dpp("aa", 0, 0, 0, false); // expected-error{{used type '__constant char[3]' where integer or floating point type is required}}
*out = __builtin_amdgcn_mov_dpp(i2, 0, 0, 0, false); // expected-error{{used type '__private int2' (vector of 2 'int' values) where integer or floating point type is required}}
*out = __builtin_amdgcn_mov_dpp(s, 0, 0, 0, false); // expected-error{{used type '__private struct S' where integer or floating point type is required}}
*out = __builtin_amdgcn_mov_dpp(fc, 0, 0, 0, false); // expected-error{{used type '__private _Complex float' where integer or floating point type is required}}
}

void test_update_dpp(global int* out, int arg1, int arg2, int i, int2 i2, long l, struct S s, float _Complex fc)
{
*out = __builtin_amdgcn_update_dpp(arg1, arg2, i, 0, 0, false); // expected-error{{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, i, 0, false); // expected-error{{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, i, false); // expected-error{{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, i); // expected-error{{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0.1, 0, 0, false); // expected-error{{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0.1, 0, false); // expected-error{{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0.1, false); // expected-error{{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, 0.1); // expected-error{{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0); // expected-error{{too few arguments to function call, expected 6, have 5}}
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false, 1); // expected-error{{too many arguments to function call, expected at most 6, have 7}}
*out = __builtin_amdgcn_update_dpp(out, arg2, 0, 0, 0, false); // expected-error{{used type '__global int *__private' where integer or floating point type is required}}
*out = __builtin_amdgcn_update_dpp(arg1, out, 0, 0, 0, false); // expected-error{{used type '__global int *__private' where integer or floating point type is required}}
*out = __builtin_amdgcn_update_dpp("aa", arg2, 0, 0, 0, false); // expected-error{{used type '__constant char[3]' where integer or floating point type is required}}
*out = __builtin_amdgcn_update_dpp(arg1, "aa", 0, 0, 0, false); // expected-error{{used type '__constant char[3]' where integer or floating point type is required}}
*out = __builtin_amdgcn_update_dpp(i2, arg2, 0, 0, 0, false); // expected-error{{used type '__private int2' (vector of 2 'int' values) where integer or floating point type is required}}
*out = __builtin_amdgcn_update_dpp(arg1, i2, 0, 0, 0, false); // expected-error{{used type '__private int2' (vector of 2 'int' values) where integer or floating point type is required}}
*out = __builtin_amdgcn_update_dpp(s, arg2, 0, 0, 0, false); // expected-error{{used type '__private struct S' where integer or floating point type is required}}
*out = __builtin_amdgcn_update_dpp(arg1, s, 0, 0, 0, false); // expected-error{{used type '__private struct S' where integer or floating point type is required}}
*out = __builtin_amdgcn_update_dpp(fc, arg2, 0, 0, 0, false); // expected-error{{used type '__private _Complex float' where integer or floating point type is required}}
*out = __builtin_amdgcn_update_dpp(arg1, fc, 0, 0, 0, false); // expected-error{{used type '__private _Complex float' where integer or floating point type is required}}
*out = __builtin_amdgcn_update_dpp(i, l, 0, 0, 0, false); // expected-error{{arguments are of different types ('__private int' vs '__private long')}}
}