Skip to content

Commit 7daa65a

Browse files
authored
Reland "[NVPTX] Use .common linkage for common globals" (#86824)
Switch from `.weak` to `.common` linkage for common global variables where possible. The `.common` linkage is described in [PTX ISA 11.6.4. Linking Directives: .common] (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#linking-directives-common) > Declares identifier to be globally visible but “common”. > >Common symbols are similar to globally visible symbols. However multiple object files may declare the same common symbol and they may have different types and sizes and references to a symbol get resolved against a common symbol with the largest size. > >Only one object file can initialize a common symbol and that must have the largest size among all other definitions of that common symbol from different object files. > >.common linking directive can be used only on variables with .global storage. It cannot be used on function symbols or on symbols with opaque type. I've updated the logic and tests to only use `.common` for PTX 5.0 or greater and verified that the new tests now pass with `ptxas`.
1 parent fe893c9 commit 7daa65a

File tree

3 files changed

+44
-10
lines changed

3 files changed

+44
-10
lines changed

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,14 +1022,16 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
10221022
const DataLayout &DL = getDataLayout();
10231023

10241024
// GlobalVariables are always constant pointers themselves.
1025-
PointerType *PTy = GVar->getType();
10261025
Type *ETy = GVar->getValueType();
10271026

10281027
if (GVar->hasExternalLinkage()) {
10291028
if (GVar->hasInitializer())
10301029
O << ".visible ";
10311030
else
10321031
O << ".extern ";
1032+
} else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
1033+
GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) {
1034+
O << ".common ";
10331035
} else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
10341036
GVar->hasAvailableExternallyLinkage() ||
10351037
GVar->hasCommonLinkage()) {
@@ -1141,7 +1143,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
11411143
}
11421144

11431145
O << ".";
1144-
emitPTXAddressSpace(PTy->getAddressSpace(), O);
1146+
emitPTXAddressSpace(GVar->getAddressSpace(), O);
11451147

11461148
if (isManaged(*GVar)) {
11471149
if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
@@ -1170,8 +1172,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
11701172
// Ptx allows variable initilization only for constant and global state
11711173
// spaces.
11721174
if (GVar->hasInitializer()) {
1173-
if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1174-
(PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1175+
if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1176+
(GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
11751177
const Constant *Initializer = GVar->getInitializer();
11761178
// 'undef' is treated as there is no value specified.
11771179
if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
@@ -1186,7 +1188,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
11861188
!isa<UndefValue>(GVar->getInitializer())) {
11871189
report_fatal_error("initial value of '" + GVar->getName() +
11881190
"' is not allowed in addrspace(" +
1189-
Twine(PTy->getAddressSpace()) + ")");
1191+
Twine(GVar->getAddressSpace()) + ")");
11901192
}
11911193
}
11921194
}
@@ -1205,8 +1207,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
12051207
ElementSize = DL.getTypeStoreSize(ETy);
12061208
// Ptx allows variable initilization only for constant and
12071209
// global state spaces.
1208-
if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1209-
(PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1210+
if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1211+
(GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
12101212
GVar->hasInitializer()) {
12111213
const Constant *Initializer = GVar->getInitializer();
12121214
if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefixes CHECK,PTX43
2+
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx50 | FileCheck %s --check-prefixes CHECK,PTX50
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx43 | %ptxas-verify %}
4+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx50 | %ptxas-verify %}
5+
6+
; PTX43: .weak .global .align 4 .u32 g
7+
; PTX50: .common .global .align 4 .u32 g
8+
@g = common addrspace(1) global i32 0, align 4
9+
10+
; CHECK: .weak .const .align 4 .u32 c
11+
@c = common addrspace(4) global i32 0, align 4
12+
13+
; CHECK: .weak .shared .align 4 .u32 s
14+
@s = common addrspace(3) global i32 0, align 4
15+
16+
define i32 @f1() {
17+
%1 = load i32, ptr addrspace(1) @g
18+
ret i32 %1
19+
}
20+
21+
define i32 @f4() {
22+
%1 = load i32, ptr addrspace(4) @c
23+
ret i32 %1
24+
}
25+
26+
define i32 @f3() {
27+
%1 = load i32, ptr addrspace(3) @s
28+
ret i32 %1
29+
}

llvm/test/CodeGen/NVPTX/weak-global.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
2-
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
1+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefix PTX43
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx50 | FileCheck %s --check-prefix PTX50
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx43 | %ptxas-verify %}
4+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx50 | %ptxas-verify %}
35

4-
; CHECK: .weak .global .align 4 .u32 g
6+
; PTX43: .weak .global .align 4 .u32 g
7+
; PTX50: .common .global .align 4 .u32 g
58
@g = common addrspace(1) global i32 zeroinitializer
69

710
define i32 @func0() {

0 commit comments

Comments
 (0)