-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Allow overload of __builtin_amdgcn_mov_dpp8 #113610
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The same handling as for __builtin_amdgcn_mov_dpp.
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Stanislav Mekhanoshin (rampitec) ChangesThe same handling as for __builtin_amdgcn_mov_dpp. Full diff: https://github.com/llvm/llvm-project/pull/113610.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index e887213aa945e6..29001e32085151 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -282,7 +282,7 @@ TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_bf8, "fUiUif", "nc", "dot11-insts")
//===----------------------------------------------------------------------===//
TARGET_BUILTIN(__builtin_amdgcn_permlane16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts")
TARGET_BUILTIN(__builtin_amdgcn_permlanex16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nc", "gfx10-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nct", "gfx10-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_ttracedata_imm, "vIs", "n", "gfx10-insts")
//===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h
index 3fdb39202610d0..7da7a42ede99d9 100644
--- a/clang/include/clang/Sema/SemaAMDGPU.h
+++ b/clang/include/clang/Sema/SemaAMDGPU.h
@@ -26,6 +26,9 @@ class SemaAMDGPU : public SemaBase {
bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+ bool CheckMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
+ unsigned NumDataArgs);
+
/// Create an AMDGPUWavesPerEUAttr attribute.
AMDGPUFlatWorkGroupSizeAttr *
CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min,
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3f28b7f26c36fe..a6435e0a19d72c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19033,8 +19033,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return emitBuiltinWithOneOverloadedType<2>(*this, E,
Intrinsic::amdgcn_ds_swizzle);
case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
- return emitBuiltinWithOneOverloadedType<2>(*this, E,
- Intrinsic::amdgcn_mov_dpp8);
case AMDGPU::BI__builtin_amdgcn_mov_dpp:
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
llvm::SmallVector<llvm::Value *, 6> Args;
@@ -19048,14 +19046,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
unsigned Size = DataTy->getPrimitiveSizeInBits();
llvm::Type *IntTy =
llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, IntTy);
- assert(E->getNumArgs() == 5 || E->getNumArgs() == 6);
- bool InsertOld = E->getNumArgs() == 5;
+ Function *F =
+ CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
+ ? Intrinsic::amdgcn_mov_dpp8
+ : Intrinsic::amdgcn_update_dpp,
+ IntTy);
+ assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
+ E->getNumArgs() == 2);
+ bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
if (InsertOld)
Args.push_back(llvm::PoisonValue::get(IntTy));
for (unsigned I = 0; I != E->getNumArgs(); ++I) {
llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
- if (I <= !InsertOld && Size < 32) {
+ if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2 : 1) &&
+ Size < 32) {
if (!DataTy->isIntegerTy())
V = Builder.CreateBitCast(
V, llvm::IntegerType::get(Builder.getContext(), Size));
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index f59654c14f08fb..dc411cfb11066c 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -63,49 +63,12 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
OrderIndex = 0;
ScopeIndex = 1;
break;
- case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
- if (SemaRef.checkArgCountRange(TheCall, 5, 5))
- return true;
- Expr *ValArg = TheCall->getArg(0);
- QualType Ty = ValArg->getType();
- // TODO: Vectors can also be supported.
- if (!Ty->isArithmeticType() || Ty->isAnyComplexType()) {
- SemaRef.Diag(ValArg->getBeginLoc(),
- diag::err_typecheck_cond_expect_int_float)
- << Ty << ValArg->getSourceRange();
- return true;
- }
- return false;
- }
+ case AMDGPU::BI__builtin_amdgcn_mov_dpp:
+ return CheckMovDPPFunctionCall(TheCall, 5, 1);
+ case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
+ return CheckMovDPPFunctionCall(TheCall, 2, 1);
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
- if (SemaRef.checkArgCountRange(TheCall, 6, 6))
- return true;
- Expr *Args[2];
- QualType ArgTys[2];
- for (unsigned I = 0; I != 2; ++I) {
- Args[I] = TheCall->getArg(I);
- ArgTys[I] = Args[I]->getType();
- // TODO: Vectors can also be supported.
- if (!ArgTys[I]->isArithmeticType() || ArgTys[I]->isAnyComplexType()) {
- SemaRef.Diag(Args[I]->getBeginLoc(),
- diag::err_typecheck_cond_expect_int_float)
- << ArgTys[I] << Args[I]->getSourceRange();
- return true;
- }
- }
- if (getASTContext().hasSameUnqualifiedType(ArgTys[0], ArgTys[1]))
- return false;
- if (((ArgTys[0]->isUnsignedIntegerType() &&
- ArgTys[1]->isSignedIntegerType()) ||
- (ArgTys[0]->isSignedIntegerType() &&
- ArgTys[1]->isUnsignedIntegerType())) &&
- getASTContext().getTypeSize(ArgTys[0]) ==
- getASTContext().getTypeSize(ArgTys[1]))
- return false;
- SemaRef.Diag(Args[1]->getBeginLoc(),
- diag::err_typecheck_call_different_arg_types)
- << ArgTys[0] << ArgTys[1];
- return true;
+ return CheckMovDPPFunctionCall(TheCall, 6, 2);
}
default:
return false;
@@ -152,6 +115,44 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
return false;
}
+bool SemaAMDGPU::CheckMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
+ unsigned NumDataArgs) {
+ assert(NumDataArgs <= 2);
+ if (SemaRef.checkArgCountRange(TheCall, NumArgs, NumArgs))
+ return true;
+ Expr *Args[2];
+ QualType ArgTys[2];
+ for (unsigned I = 0; I != NumDataArgs; ++I) {
+ Args[I] = TheCall->getArg(I);
+ ArgTys[I] = Args[I]->getType();
+ // TODO: Vectors can also be supported.
+ if (!ArgTys[I]->isArithmeticType() || ArgTys[I]->isAnyComplexType()) {
+ SemaRef.Diag(Args[I]->getBeginLoc(),
+ diag::err_typecheck_cond_expect_int_float)
+ << ArgTys[I] << Args[I]->getSourceRange();
+ return true;
+ }
+ }
+ if (NumDataArgs < 2)
+ return false;
+
+ if (getASTContext().hasSameUnqualifiedType(ArgTys[0], ArgTys[1]))
+ return false;
+
+ if (((ArgTys[0]->isUnsignedIntegerType() &&
+ ArgTys[1]->isSignedIntegerType()) ||
+ (ArgTys[0]->isSignedIntegerType() &&
+ ArgTys[1]->isUnsignedIntegerType())) &&
+ getASTContext().getTypeSize(ArgTys[0]) ==
+ getASTContext().getTypeSize(ArgTys[1]))
+ return false;
+
+ SemaRef.Diag(Args[1]->getBeginLoc(),
+ diag::err_typecheck_call_different_arg_types)
+ << ArgTys[0] << ArgTys[1];
+ return true;
+}
+
static bool
checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr,
const AMDGPUFlatWorkGroupSizeAttr &Attr) {
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
index 3cf1056cf4f48b..a4054cba236dd2 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
@@ -4,6 +4,8 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
typedef unsigned int uint;
typedef unsigned long ulong;
@@ -19,12 +21,64 @@ void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
}
-// CHECK-LABEL: @test_mov_dpp8(
-// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
-void test_mov_dpp8(global uint* out, uint a) {
+// CHECK-LABEL: @test_mov_dpp8_uint(
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
+// CHECK-NEXT: store i32 %0,
+void test_mov_dpp8_uint(global uint* out, uint a) {
+ *out = __builtin_amdgcn_mov_dpp8(a, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8_long(
+// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %a, i32 1)
+// CHECK-NEXT: store i64 %0,
+void test_mov_dpp8_long(global long* out, long a) {
*out = __builtin_amdgcn_mov_dpp8(a, 1);
}
+// CHECK-LABEL: @test_mov_dpp8_float(
+// CHECK: %0 = bitcast float %a to i32
+// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
+// CHECK-NEXT: store i32 %1,
+void test_mov_dpp8_float(global float* out, float a) {
+ *out = __builtin_amdgcn_mov_dpp8(a, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8_double
+// CHECK: %0 = bitcast double %x to i64
+// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %0, i32 1)
+// CHECK-NEXT: store i64 %1,
+void test_mov_dpp8_double(double x, global double *p) {
+ *p = __builtin_amdgcn_mov_dpp8(x, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8_short
+// CHECK: %0 = zext i16 %x to i32
+// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
+// CHECK-NEXT: %2 = trunc i32 %1 to i16
+// CHECK-NEXT: store i16 %2,
+void test_mov_dpp8_short(short x, global short *p) {
+ *p = __builtin_amdgcn_mov_dpp8(x, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8_char
+// CHECK: %0 = zext i8 %x to i32
+// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
+// CHECK-NEXT: %2 = trunc i32 %1 to i8
+// CHECK-NEXT: store i8 %2,
+void test_mov_dpp8_char(char x, global char *p) {
+ *p = __builtin_amdgcn_mov_dpp8(x, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8_half
+// CHECK: %0 = load i16,
+// CHECK: %1 = zext i16 %0 to i32
+// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %1, i32 1)
+// CHECK-NEXT: %3 = trunc i32 %2 to i16
+// CHECK-NEXT: store i16 %3,
+void test_mov_dpp8_half(half *x, global half *p) {
+ *p = __builtin_amdgcn_mov_dpp8(*x, 1);
+}
+
// CHECK-LABEL: @test_s_memtime
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
void test_s_memtime(global ulong* out)
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl
index 02c8dc8c1339ee..daae017142c790 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl
@@ -5,11 +5,30 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx908 -verify -S -o - %s
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
typedef unsigned int uint;
+typedef int int2 __attribute__((ext_vector_type(2)));
+struct S {
+ int x;
+};
void test(global uint* out, uint a, uint b, uint c, uint d) {
*out = __builtin_amdgcn_permlane16(a, b, c, d, 1, 1); // expected-error {{'__builtin_amdgcn_permlane16' needs target feature gfx10-insts}}
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, 1); // expected-error {{'__builtin_amdgcn_permlanex16' needs target feature gfx10-insts}}
*out = __builtin_amdgcn_mov_dpp8(a, 1); // expected-error {{'__builtin_amdgcn_mov_dpp8' needs target feature gfx10-insts}}
}
+
+void test_mov_dpp8(global int* out, int src, int i, int2 i2, struct S s, float _Complex fc)
+{
+ *out = __builtin_amdgcn_mov_dpp8(src, i); // expected-error{{argument to '__builtin_amdgcn_mov_dpp8' must be a constant integer}}
+ *out = __builtin_amdgcn_mov_dpp8(src, 0.1); // expected-error{{argument to '__builtin_amdgcn_mov_dpp8' must be a constant integer}}
+ *out = __builtin_amdgcn_mov_dpp8(src); // expected-error{{too few arguments to function call, expected 2, have 1}}
+ *out = __builtin_amdgcn_mov_dpp8(src, 0, 0); // expected-error{{too many arguments to function call, expected at most 2, have 3}}
+ *out = __builtin_amdgcn_mov_dpp8(out, 0); // expected-error{{used type '__global int *__private' where integer or floating point type is required}}
+ *out = __builtin_amdgcn_mov_dpp8("aa", 0); // expected-error{{used type '__constant char[3]' where integer or floating point type is required}}
+ *out = __builtin_amdgcn_mov_dpp8(i2, 0); // expected-error{{used type '__private int2' (vector of 2 'int' values) where integer or floating point type is required}}
+ *out = __builtin_amdgcn_mov_dpp8(s, 0); // expected-error{{used type '__private struct S' where integer or floating point type is required}}
+ *out = __builtin_amdgcn_mov_dpp8(fc, 0); // expected-error{{used type '__private _Complex float' where integer or floating point type is required}}
+}
|
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesThe same handling as for __builtin_amdgcn_mov_dpp. Full diff: https://github.com/llvm/llvm-project/pull/113610.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index e887213aa945e6..29001e32085151 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -282,7 +282,7 @@ TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_bf8, "fUiUif", "nc", "dot11-insts")
//===----------------------------------------------------------------------===//
TARGET_BUILTIN(__builtin_amdgcn_permlane16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts")
TARGET_BUILTIN(__builtin_amdgcn_permlanex16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nc", "gfx10-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nct", "gfx10-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_ttracedata_imm, "vIs", "n", "gfx10-insts")
//===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h
index 3fdb39202610d0..7da7a42ede99d9 100644
--- a/clang/include/clang/Sema/SemaAMDGPU.h
+++ b/clang/include/clang/Sema/SemaAMDGPU.h
@@ -26,6 +26,9 @@ class SemaAMDGPU : public SemaBase {
bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+ bool CheckMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
+ unsigned NumDataArgs);
+
/// Create an AMDGPUWavesPerEUAttr attribute.
AMDGPUFlatWorkGroupSizeAttr *
CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min,
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3f28b7f26c36fe..a6435e0a19d72c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19033,8 +19033,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return emitBuiltinWithOneOverloadedType<2>(*this, E,
Intrinsic::amdgcn_ds_swizzle);
case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
- return emitBuiltinWithOneOverloadedType<2>(*this, E,
- Intrinsic::amdgcn_mov_dpp8);
case AMDGPU::BI__builtin_amdgcn_mov_dpp:
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
llvm::SmallVector<llvm::Value *, 6> Args;
@@ -19048,14 +19046,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
unsigned Size = DataTy->getPrimitiveSizeInBits();
llvm::Type *IntTy =
llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, IntTy);
- assert(E->getNumArgs() == 5 || E->getNumArgs() == 6);
- bool InsertOld = E->getNumArgs() == 5;
+ Function *F =
+ CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
+ ? Intrinsic::amdgcn_mov_dpp8
+ : Intrinsic::amdgcn_update_dpp,
+ IntTy);
+ assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
+ E->getNumArgs() == 2);
+ bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
if (InsertOld)
Args.push_back(llvm::PoisonValue::get(IntTy));
for (unsigned I = 0; I != E->getNumArgs(); ++I) {
llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
- if (I <= !InsertOld && Size < 32) {
+ if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2 : 1) &&
+ Size < 32) {
if (!DataTy->isIntegerTy())
V = Builder.CreateBitCast(
V, llvm::IntegerType::get(Builder.getContext(), Size));
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index f59654c14f08fb..dc411cfb11066c 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -63,49 +63,12 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
OrderIndex = 0;
ScopeIndex = 1;
break;
- case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
- if (SemaRef.checkArgCountRange(TheCall, 5, 5))
- return true;
- Expr *ValArg = TheCall->getArg(0);
- QualType Ty = ValArg->getType();
- // TODO: Vectors can also be supported.
- if (!Ty->isArithmeticType() || Ty->isAnyComplexType()) {
- SemaRef.Diag(ValArg->getBeginLoc(),
- diag::err_typecheck_cond_expect_int_float)
- << Ty << ValArg->getSourceRange();
- return true;
- }
- return false;
- }
+ case AMDGPU::BI__builtin_amdgcn_mov_dpp:
+ return CheckMovDPPFunctionCall(TheCall, 5, 1);
+ case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
+ return CheckMovDPPFunctionCall(TheCall, 2, 1);
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
- if (SemaRef.checkArgCountRange(TheCall, 6, 6))
- return true;
- Expr *Args[2];
- QualType ArgTys[2];
- for (unsigned I = 0; I != 2; ++I) {
- Args[I] = TheCall->getArg(I);
- ArgTys[I] = Args[I]->getType();
- // TODO: Vectors can also be supported.
- if (!ArgTys[I]->isArithmeticType() || ArgTys[I]->isAnyComplexType()) {
- SemaRef.Diag(Args[I]->getBeginLoc(),
- diag::err_typecheck_cond_expect_int_float)
- << ArgTys[I] << Args[I]->getSourceRange();
- return true;
- }
- }
- if (getASTContext().hasSameUnqualifiedType(ArgTys[0], ArgTys[1]))
- return false;
- if (((ArgTys[0]->isUnsignedIntegerType() &&
- ArgTys[1]->isSignedIntegerType()) ||
- (ArgTys[0]->isSignedIntegerType() &&
- ArgTys[1]->isUnsignedIntegerType())) &&
- getASTContext().getTypeSize(ArgTys[0]) ==
- getASTContext().getTypeSize(ArgTys[1]))
- return false;
- SemaRef.Diag(Args[1]->getBeginLoc(),
- diag::err_typecheck_call_different_arg_types)
- << ArgTys[0] << ArgTys[1];
- return true;
+ return CheckMovDPPFunctionCall(TheCall, 6, 2);
}
default:
return false;
@@ -152,6 +115,44 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
return false;
}
+bool SemaAMDGPU::CheckMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
+ unsigned NumDataArgs) {
+ assert(NumDataArgs <= 2);
+ if (SemaRef.checkArgCountRange(TheCall, NumArgs, NumArgs))
+ return true;
+ Expr *Args[2];
+ QualType ArgTys[2];
+ for (unsigned I = 0; I != NumDataArgs; ++I) {
+ Args[I] = TheCall->getArg(I);
+ ArgTys[I] = Args[I]->getType();
+ // TODO: Vectors can also be supported.
+ if (!ArgTys[I]->isArithmeticType() || ArgTys[I]->isAnyComplexType()) {
+ SemaRef.Diag(Args[I]->getBeginLoc(),
+ diag::err_typecheck_cond_expect_int_float)
+ << ArgTys[I] << Args[I]->getSourceRange();
+ return true;
+ }
+ }
+ if (NumDataArgs < 2)
+ return false;
+
+ if (getASTContext().hasSameUnqualifiedType(ArgTys[0], ArgTys[1]))
+ return false;
+
+ if (((ArgTys[0]->isUnsignedIntegerType() &&
+ ArgTys[1]->isSignedIntegerType()) ||
+ (ArgTys[0]->isSignedIntegerType() &&
+ ArgTys[1]->isUnsignedIntegerType())) &&
+ getASTContext().getTypeSize(ArgTys[0]) ==
+ getASTContext().getTypeSize(ArgTys[1]))
+ return false;
+
+ SemaRef.Diag(Args[1]->getBeginLoc(),
+ diag::err_typecheck_call_different_arg_types)
+ << ArgTys[0] << ArgTys[1];
+ return true;
+}
+
static bool
checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr,
const AMDGPUFlatWorkGroupSizeAttr &Attr) {
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
index 3cf1056cf4f48b..a4054cba236dd2 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
@@ -4,6 +4,8 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
typedef unsigned int uint;
typedef unsigned long ulong;
@@ -19,12 +21,64 @@ void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
}
-// CHECK-LABEL: @test_mov_dpp8(
-// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
-void test_mov_dpp8(global uint* out, uint a) {
+// CHECK-LABEL: @test_mov_dpp8_uint(
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
+// CHECK-NEXT: store i32 %0,
+void test_mov_dpp8_uint(global uint* out, uint a) {
+ *out = __builtin_amdgcn_mov_dpp8(a, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8_long(
+// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %a, i32 1)
+// CHECK-NEXT: store i64 %0,
+void test_mov_dpp8_long(global long* out, long a) {
*out = __builtin_amdgcn_mov_dpp8(a, 1);
}
+// CHECK-LABEL: @test_mov_dpp8_float(
+// CHECK: %0 = bitcast float %a to i32
+// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
+// CHECK-NEXT: store i32 %1,
+void test_mov_dpp8_float(global float* out, float a) {
+ *out = __builtin_amdgcn_mov_dpp8(a, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8_double
+// CHECK: %0 = bitcast double %x to i64
+// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %0, i32 1)
+// CHECK-NEXT: store i64 %1,
+void test_mov_dpp8_double(double x, global double *p) {
+ *p = __builtin_amdgcn_mov_dpp8(x, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8_short
+// CHECK: %0 = zext i16 %x to i32
+// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
+// CHECK-NEXT: %2 = trunc i32 %1 to i16
+// CHECK-NEXT: store i16 %2,
+void test_mov_dpp8_short(short x, global short *p) {
+ *p = __builtin_amdgcn_mov_dpp8(x, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8_char
+// CHECK: %0 = zext i8 %x to i32
+// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
+// CHECK-NEXT: %2 = trunc i32 %1 to i8
+// CHECK-NEXT: store i8 %2,
+void test_mov_dpp8_char(char x, global char *p) {
+ *p = __builtin_amdgcn_mov_dpp8(x, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8_half
+// CHECK: %0 = load i16,
+// CHECK: %1 = zext i16 %0 to i32
+// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %1, i32 1)
+// CHECK-NEXT: %3 = trunc i32 %2 to i16
+// CHECK-NEXT: store i16 %3,
+void test_mov_dpp8_half(half *x, global half *p) {
+ *p = __builtin_amdgcn_mov_dpp8(*x, 1);
+}
+
// CHECK-LABEL: @test_s_memtime
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
void test_s_memtime(global ulong* out)
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl
index 02c8dc8c1339ee..daae017142c790 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl
@@ -5,11 +5,30 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx908 -verify -S -o - %s
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
typedef unsigned int uint;
+typedef int int2 __attribute__((ext_vector_type(2)));
+struct S {
+ int x;
+};
void test(global uint* out, uint a, uint b, uint c, uint d) {
*out = __builtin_amdgcn_permlane16(a, b, c, d, 1, 1); // expected-error {{'__builtin_amdgcn_permlane16' needs target feature gfx10-insts}}
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, 1); // expected-error {{'__builtin_amdgcn_permlanex16' needs target feature gfx10-insts}}
*out = __builtin_amdgcn_mov_dpp8(a, 1); // expected-error {{'__builtin_amdgcn_mov_dpp8' needs target feature gfx10-insts}}
}
+
+void test_mov_dpp8(global int* out, int src, int i, int2 i2, struct S s, float _Complex fc)
+{
+ *out = __builtin_amdgcn_mov_dpp8(src, i); // expected-error{{argument to '__builtin_amdgcn_mov_dpp8' must be a constant integer}}
+ *out = __builtin_amdgcn_mov_dpp8(src, 0.1); // expected-error{{argument to '__builtin_amdgcn_mov_dpp8' must be a constant integer}}
+ *out = __builtin_amdgcn_mov_dpp8(src); // expected-error{{too few arguments to function call, expected 2, have 1}}
+ *out = __builtin_amdgcn_mov_dpp8(src, 0, 0); // expected-error{{too many arguments to function call, expected at most 2, have 3}}
+ *out = __builtin_amdgcn_mov_dpp8(out, 0); // expected-error{{used type '__global int *__private' where integer or floating point type is required}}
+ *out = __builtin_amdgcn_mov_dpp8("aa", 0); // expected-error{{used type '__constant char[3]' where integer or floating point type is required}}
+ *out = __builtin_amdgcn_mov_dpp8(i2, 0); // expected-error{{used type '__private int2' (vector of 2 'int' values) where integer or floating point type is required}}
+ *out = __builtin_amdgcn_mov_dpp8(s, 0); // expected-error{{used type '__private struct S' where integer or floating point type is required}}
+ *out = __builtin_amdgcn_mov_dpp8(fc, 0); // expected-error{{used type '__private _Complex float' where integer or floating point type is required}}
+}
|
It does not really work w/o #113500 though. |
The same handling as for __builtin_amdgcn_mov_dpp.
The same handling as for __builtin_amdgcn_mov_dpp.
The same handling as for __builtin_amdgcn_mov_dpp.