Skip to content

Commit 6d05831

Browse files
Enable .ptr .global .align attributes for kernel attributes for CUDA (#114874)
Emit .ptr, .address-space, and .align attributes for kernel args in CUDA (previously handled only for OpenCL). This allows for more vectorization opportunities if the PTX consumer is able to know about the pointer alignments. If no alignment is explicitly specified, .align 1 will be emitted to match the LLVM IR semantics in this case. PTX ISA doc - https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#kernel-parameter-attribute-ptr This is a rework of the original patch proposed in #79646 --------- Co-authored-by: Vandana <[email protected]>
1 parent 3d47473 commit 6d05831

File tree

3 files changed

+63
-24
lines changed

3 files changed

+63
-24
lines changed

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,30 +1600,27 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
16001600

16011601
if (isKernelFunc) {
16021602
if (PTy) {
1603-
// Special handling for pointer arguments to kernel
1604-
O << "\t.param .u" << PTySizeInBits << " ";
1605-
1606-
if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1607-
NVPTX::CUDA) {
1608-
int addrSpace = PTy->getAddressSpace();
1609-
switch (addrSpace) {
1610-
default:
1611-
O << ".ptr ";
1612-
break;
1613-
case ADDRESS_SPACE_CONST:
1614-
O << ".ptr .const ";
1615-
break;
1616-
case ADDRESS_SPACE_SHARED:
1617-
O << ".ptr .shared ";
1618-
break;
1619-
case ADDRESS_SPACE_GLOBAL:
1620-
O << ".ptr .global ";
1621-
break;
1622-
}
1623-
Align ParamAlign = I->getParamAlign().valueOrOne();
1624-
O << ".align " << ParamAlign.value() << " ";
1603+
O << "\t.param .u" << PTySizeInBits << " .ptr";
1604+
1605+
switch (PTy->getAddressSpace()) {
1606+
default:
1607+
break;
1608+
case ADDRESS_SPACE_GLOBAL:
1609+
O << " .global";
1610+
break;
1611+
case ADDRESS_SPACE_SHARED:
1612+
O << " .shared";
1613+
break;
1614+
case ADDRESS_SPACE_CONST:
1615+
O << " .const";
1616+
break;
1617+
case ADDRESS_SPACE_LOCAL:
1618+
O << " .local";
1619+
break;
16251620
}
1626-
O << TLI->getParamName(F, paramIndex);
1621+
1622+
O << " .align " << I->getParamAlign().valueOrOne().value();
1623+
O << " " << TLI->getParamName(F, paramIndex);
16271624
continue;
16281625
}
16291626

llvm/test/CodeGen/NVPTX/i1-param.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ target triple = "nvptx-nvidia-cuda"
88

99
; CHECK: .entry foo
1010
; CHECK: .param .u8 foo_param_0
11-
; CHECK: .param .u64 foo_param_1
11+
; CHECK: .param .u64 .ptr .align 1 foo_param_1
1212
define void @foo(i1 %p, ptr %out) {
1313
%val = zext i1 %p to i32
1414
store i32 %val, ptr %out
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
2+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
3+
4+
%struct.Large = type { [16 x double] }
5+
6+
; CHECK-LABEL: .entry func_align(
7+
; CHECK: .param .u64 .ptr .align 1 func_align_param_0
8+
; CHECK: .param .u64 .ptr .align 2 func_align_param_1
9+
; CHECK: .param .u64 .ptr .global .align 4 func_align_param_2
10+
; CHECK: .param .u64 .ptr .shared .align 8 func_align_param_3
11+
; CHECK: .param .u64 .ptr .const .align 16 func_align_param_4
12+
; CHECK: .param .u64 .ptr .local .align 32 func_align_param_5
13+
define void @func_align(ptr nocapture readonly align 1 %input,
14+
ptr nocapture align 2 %out,
15+
ptr addrspace(1) align 4 %global,
16+
ptr addrspace(3) align 8 %shared,
17+
ptr addrspace(4) align 16 %const,
18+
ptr addrspace(5) align 32 %local) {
19+
entry:
20+
ret void
21+
}
22+
23+
; CHECK-LABEL: .entry func_noalign(
24+
; CHECK: .param .u64 .ptr .align 1 func_noalign_param_0
25+
; CHECK: .param .u64 .ptr .align 1 func_noalign_param_1
26+
; CHECK: .param .u64 .ptr .global .align 1 func_noalign_param_2
27+
; CHECK: .param .u64 .ptr .shared .align 1 func_noalign_param_3
28+
; CHECK: .param .u64 .ptr .const .align 1 func_noalign_param_4
29+
; CHECK: .param .u64 .ptr .local .align 1 func_noalign_param_5
30+
define void @func_noalign(ptr nocapture readonly %input,
31+
ptr nocapture %out,
32+
ptr addrspace(1) %global,
33+
ptr addrspace(3) %shared,
34+
ptr addrspace(4) %const,
35+
ptr addrspace(5) %local) {
36+
entry:
37+
ret void
38+
}
39+
40+
!nvvm.annotations = !{!0, !1}
41+
!0 = !{ptr @func_align, !"kernel", i32 1}
42+
!1 = !{ptr @func_noalign, !"kernel", i32 1}

0 commit comments

Comments
 (0)