Skip to content

Commit fb17bf6

Browse files
committed
[AMDGPU] Change enqueue kernel handle type
Currently the handle type is a global pointer which holds 8 bytes. We need a larger type which hold 16 bytes, therefore change it to [i64 x 2]. Differential Revision: https://reviews.llvm.org/D48094 llvm-svn: 334625
1 parent 4bf9b56 commit fb17bf6

File tree

2 files changed

+7
-6
lines changed

2 files changed

+7
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "llvm/ADT/DenseSet.h"
3737
#include "llvm/ADT/StringRef.h"
3838
#include "llvm/IR/Constants.h"
39+
#include "llvm/IR/DerivedTypes.h"
3940
#include "llvm/IR/Instructions.h"
4041
#include "llvm/IR/Mangler.h"
4142
#include "llvm/IR/Module.h"
@@ -116,7 +117,7 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
116117
}
117118
LLVM_DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n');
118119
auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
119-
auto T = Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS);
120+
auto T = ArrayType::get(Type::getInt64Ty(C), 2);
120121
auto *GV = new GlobalVariable(
121122
M, T,
122123
/*IsConstant=*/false, GlobalValue::ExternalLinkage,

llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s
22

3-
; CHECK: @__test_block_invoke_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null
4-
; CHECK: @__test_block_invoke_2_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null
5-
; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null
6-
; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = addrspace(1) global i8 addrspace(1)* null
3+
; CHECK: @__test_block_invoke_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
4+
; CHECK: @__test_block_invoke_2_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
5+
; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
6+
; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
77

88
%struct.ndrange_t = type { i32 }
99
%opencl.queue_t = type opaque
@@ -84,7 +84,7 @@ entry:
8484
; CHECK-LABEL: define amdgpu_kernel void @inlined_caller
8585
; CHECK-SAME: #[[AT_CALLER]]
8686
; CHECK-NOT: @__test_block_invoke_kernel
87-
; CHECK: load i64, i64 addrspace(1)* bitcast (i8 addrspace(1)* addrspace(1)* @__test_block_invoke_kernel.runtime_handle to i64 addrspace(1)*)
87+
; CHECK: load i64, i64 addrspace(1)* getelementptr inbounds ([2 x i64], [2 x i64] addrspace(1)* @__test_block_invoke_kernel.runtime_handle, i32 0, i32 0)
8888
define amdgpu_kernel void @inlined_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
8989
!kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
9090
entry:

0 commit comments

Comments
 (0)