Skip to content

Commit e540965

Browse files
committed
[AMDGPU] Added __builtin_amdgcn_ds_bvh_stack_rtn
Differential Revision: https://reviews.llvm.org/D133966
1 parent 55e6078 commit e540965

File tree

4 files changed

+41
-0
lines changed

4 files changed

+41
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,8 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64, "V4iIbV2iIbV2iV4iIb",
281281
TARGET_BUILTIN(__builtin_amdgcn_s_sendmsg_rtn, "UiUIi", "n", "gfx11-insts")
282282
TARGET_BUILTIN(__builtin_amdgcn_s_sendmsg_rtnl, "UWiUIi", "n", "gfx11-insts")
283283

284+
TARGET_BUILTIN(__builtin_amdgcn_ds_bvh_stack_rtn, "V2UiUiUiV4UiIi", "n", "gfx11-insts")
285+
284286
//===----------------------------------------------------------------------===//
285287
// Special builtins.
286288
//===----------------------------------------------------------------------===//

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16897,6 +16897,21 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1689716897
RayInverseDir, TextureDescr});
1689816898
}
1689916899

16900+
case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
16901+
SmallVector<Value *, 4> Args;
16902+
for (int i = 0, e = E->getNumArgs(); i != e; ++i)
16903+
Args.push_back(EmitScalarExpr(E->getArg(i)));
16904+
16905+
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
16906+
Value *Call = Builder.CreateCall(F, Args);
16907+
Value *Rtn = Builder.CreateExtractValue(Call, 0);
16908+
Value *A = Builder.CreateExtractValue(Call, 1);
16909+
llvm::Type *RetTy = ConvertType(E->getType());
16910+
Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
16911+
(uint64_t)0);
16912+
return Builder.CreateInsertElement(I0, A, 1);
16913+
}
16914+
1690016915
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1690116916
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1690216917
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// REQUIRES: amdgpu-registered-target
2+
3+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -verify -S -emit-llvm -o - %s
4+
5+
typedef unsigned int uint;
6+
typedef uint uint2 __attribute__((ext_vector_type(2)));
7+
typedef uint uint4 __attribute__((ext_vector_type(4)));
8+
9+
kernel void builtins_amdgcn_bvh_err(global uint2* out, uint addr, uint data, uint4 data1, uint offset) {
10+
*out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, offset); // expected-error {{'__builtin_amdgcn_ds_bvh_stack_rtn' must be a constant integer}}
11+
}

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
typedef unsigned int uint;
88
typedef unsigned long ulong;
9+
typedef uint uint2 __attribute__((ext_vector_type(2)));
10+
typedef uint uint4 __attribute__((ext_vector_type(4)));
911

1012
// CHECK-LABEL: @test_s_sendmsg_rtn(
1113
// CHECK: call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 0)
@@ -18,3 +20,14 @@ void test_s_sendmsg_rtn(global uint* out) {
1820
void test_s_sendmsg_rtnl(global ulong* out) {
1921
*out = __builtin_amdgcn_s_sendmsg_rtnl(0);
2022
}
23+
24+
// CHECK-LABEL: @test_ds_bvh_stack_rtn(
25+
// CHECK: %0 = tail call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data, <4 x i32> %data1, i32 128)
26+
// CHECK: %1 = extractvalue { i32, i32 } %0, 0
27+
// CHECK: %2 = extractvalue { i32, i32 } %0, 1
28+
// CHECK: %3 = insertelement <2 x i32> poison, i32 %1, i64 0
29+
// CHECK: %4 = insertelement <2 x i32> %3, i32 %2, i64 1
30+
void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1)
31+
{
32+
*out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, 128);
33+
}

0 commit comments

Comments
 (0)