Skip to content

Commit d6a68be

Browse files
authored
[NVPTX] Add support for Shared Cluster Memory address space [1/2] (#135444)
Adds support for new Shared Cluster Memory Address Space (SHARED_CLUSTER, addrspace 7). See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#distributed-shared-memory for details. 1. Update address space structures and datalayout to contain the new space 2. Add new intrinsics that use this new address space 3. Update NVPTX alias analysis The existing intrinsics are updated in #136768
1 parent afda4c2 commit d6a68be

File tree

17 files changed

+549
-40
lines changed

17 files changed

+549
-40
lines changed

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,11 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
7171

7272
if (TargetPointerWidth == 32)
7373
resetDataLayout(
74-
"e-p:32:32-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
74+
"e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
7575
else if (Opts.NVPTXUseShortPointers)
76-
resetDataLayout("e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-i128:128-v16:"
77-
"16-v32:32-n16:32:64");
76+
resetDataLayout(
77+
"e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:"
78+
"16-v32:32-n16:32:64");
7879
else
7980
resetDataLayout("e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
8081

clang/test/CodeGen/target-data.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@
160160

161161
// RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \
162162
// RUN: FileCheck %s -check-prefix=NVPTX
163-
// NVPTX: target datalayout = "e-p:32:32-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
163+
// NVPTX: target datalayout = "e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
164164

165165
// RUN: %clang_cc1 -triple nvptx64-unknown -o - -emit-llvm %s | \
166166
// RUN: FileCheck %s -check-prefix=NVPTX64

llvm/include/llvm/Support/NVPTXAddrSpace.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ enum AddressSpace : unsigned {
2525
ADDRESS_SPACE_CONST = 4,
2626
ADDRESS_SPACE_LOCAL = 5,
2727
ADDRESS_SPACE_TENSOR = 6,
28+
ADDRESS_SPACE_SHARED_CLUSTER = 7,
2829

2930
ADDRESS_SPACE_PARAM = 101,
3031
};

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
285285
case NVPTX::AddressSpace::Global:
286286
case NVPTX::AddressSpace::Const:
287287
case NVPTX::AddressSpace::Shared:
288+
case NVPTX::AddressSpace::SharedCluster:
288289
case NVPTX::AddressSpace::Param:
289290
case NVPTX::AddressSpace::Local:
290291
O << "." << A;

llvm/lib/Target/NVPTX/NVPTX.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ enum AddressSpace : AddressSpaceUnderlyingType {
176176
Shared = 3,
177177
Const = 4,
178178
Local = 5,
179+
SharedCluster = 7,
179180

180181
// NVPTX Backend Private:
181182
Param = 101

llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@ static AliasResult::Kind getAliasResult(unsigned AS1, unsigned AS2) {
8686
// TODO: cvta.param is not yet supported. We need to change aliasing
8787
// rules once it is added.
8888

89+
// Distributed shared memory aliases with shared memory.
90+
if (((AS1 == ADDRESS_SPACE_SHARED) &&
91+
(AS2 == ADDRESS_SPACE_SHARED_CLUSTER)) ||
92+
((AS1 == ADDRESS_SPACE_SHARED_CLUSTER) && (AS2 == ADDRESS_SPACE_SHARED)))
93+
return AliasResult::MayAlias;
94+
8995
return (AS1 == AS2 ? AliasResult::MayAlias : AliasResult::NoAlias);
9096
}
9197

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,8 @@ static std::optional<unsigned> convertAS(unsigned AS) {
513513
return NVPTX::AddressSpace::Global;
514514
case llvm::ADDRESS_SPACE_SHARED:
515515
return NVPTX::AddressSpace::Shared;
516+
case llvm::ADDRESS_SPACE_SHARED_CLUSTER:
517+
return NVPTX::AddressSpace::SharedCluster;
516518
case llvm::ADDRESS_SPACE_GENERIC:
517519
return NVPTX::AddressSpace::Generic;
518520
case llvm::ADDRESS_SPACE_PARAM:
@@ -658,7 +660,8 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
658660
bool AddrGenericOrGlobalOrShared =
659661
(CodeAddrSpace == NVPTX::AddressSpace::Generic ||
660662
CodeAddrSpace == NVPTX::AddressSpace::Global ||
661-
CodeAddrSpace == NVPTX::AddressSpace::Shared);
663+
CodeAddrSpace == NVPTX::AddressSpace::Shared ||
664+
CodeAddrSpace == NVPTX::AddressSpace::SharedCluster);
662665
if (!AddrGenericOrGlobalOrShared)
663666
return NVPTX::Ordering::NotAtomic;
664667

@@ -979,6 +982,12 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
979982
case ADDRESS_SPACE_SHARED:
980983
Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;
981984
break;
985+
case ADDRESS_SPACE_SHARED_CLUSTER:
986+
if (!TM.is64Bit())
987+
report_fatal_error(
988+
"Shared cluster address space is only supported in 64-bit mode");
989+
Opc = NVPTX::cvta_shared_cluster_64;
990+
break;
982991
case ADDRESS_SPACE_CONST:
983992
Opc = TM.is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const;
984993
break;
@@ -1004,6 +1013,12 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
10041013
case ADDRESS_SPACE_SHARED:
10051014
Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared;
10061015
break;
1016+
case ADDRESS_SPACE_SHARED_CLUSTER:
1017+
if (!TM.is64Bit())
1018+
report_fatal_error(
1019+
"Shared cluster address space is only supported in 64-bit mode");
1020+
Opc = NVPTX::cvta_to_shared_cluster_64;
1021+
break;
10071022
case ADDRESS_SPACE_CONST:
10081023
Opc = TM.is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const;
10091024
break;

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3043,8 +3043,27 @@ SDValue NVPTXTargetLowering::LowerADDRSPACECAST(SDValue Op,
30433043
unsigned SrcAS = N->getSrcAddressSpace();
30443044
unsigned DestAS = N->getDestAddressSpace();
30453045
if (SrcAS != llvm::ADDRESS_SPACE_GENERIC &&
3046-
DestAS != llvm::ADDRESS_SPACE_GENERIC)
3046+
DestAS != llvm::ADDRESS_SPACE_GENERIC) {
3047+
// Shared and SharedCluster can be converted to each other through generic
3048+
// space
3049+
if ((SrcAS == llvm::ADDRESS_SPACE_SHARED &&
3050+
DestAS == llvm::ADDRESS_SPACE_SHARED_CLUSTER) ||
3051+
(SrcAS == llvm::ADDRESS_SPACE_SHARED_CLUSTER &&
3052+
DestAS == llvm::ADDRESS_SPACE_SHARED)) {
3053+
SDLoc DL(Op.getNode());
3054+
const MVT GenerictVT =
3055+
getPointerTy(DAG.getDataLayout(), ADDRESS_SPACE_GENERIC);
3056+
SDValue GenericConversion = DAG.getAddrSpaceCast(
3057+
DL, GenerictVT, Op.getOperand(0), SrcAS, ADDRESS_SPACE_GENERIC);
3058+
SDValue SharedClusterConversion =
3059+
DAG.getAddrSpaceCast(DL, Op.getValueType(), GenericConversion,
3060+
ADDRESS_SPACE_GENERIC, DestAS);
3061+
return SharedClusterConversion;
3062+
}
3063+
30473064
return DAG.getUNDEF(Op.getValueType());
3065+
}
3066+
30483067
return Op;
30493068
}
30503069

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">;
137137
def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">;
138138
def hasVote : Predicate<"Subtarget->hasVote()">;
139139
def hasDouble : Predicate<"Subtarget->hasDouble()">;
140+
def hasClusters : Predicate<"Subtarget->hasClusters()">;
140141
def hasLDG : Predicate<"Subtarget->hasLDG()">;
141142
def hasLDU : Predicate<"Subtarget->hasLDU()">;
142143
def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ def AS_match {
3333
code shared = [{
3434
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
3535
}];
36+
code shared_cluster = [{
37+
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED_CLUSTER);
38+
}];
3639
code global = [{
3740
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
3841
}];
@@ -2039,10 +2042,11 @@ class ATOMIC_GLOBAL_CHK <dag frag>
20392042
: PatFrag<!setdagop(frag, ops), frag, AS_match.global>;
20402043
class ATOMIC_SHARED_CHK <dag frag>
20412044
: PatFrag<!setdagop(frag, ops), frag, AS_match.shared>;
2045+
class ATOMIC_SHARED_CLUSTER_CHK <dag frag>
2046+
: PatFrag<!setdagop(frag, ops), frag, AS_match.shared_cluster>;
20422047
class ATOMIC_GENERIC_CHK <dag frag>
20432048
: PatFrag<!setdagop(frag, ops), frag, AS_match.generic>;
20442049

2045-
20462050
multiclass F_ATOMIC_2<RegTyInfo t, string sem_str, string as_str, string op_str,
20472051
SDPatternOperator op, list<Predicate> preds> {
20482052
defvar asm_str = "atom" # sem_str # as_str # "." # op_str # " \t$dst, [$addr], $b;";
@@ -2094,13 +2098,15 @@ multiclass F_ATOMIC_2_AS<RegTyInfo t, SDPatternOperator frag, string op_str, lis
20942098
defvar frag_pat = (frag node:$a, node:$b);
20952099
defm _G : F_ATOMIC_2<t, "", ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
20962100
defm _S : F_ATOMIC_2<t, "", ".shared", op_str, ATOMIC_SHARED_CHK<frag_pat>, preds>;
2101+
defm _S_C : F_ATOMIC_2<t, "", ".shared::cluster", op_str, ATOMIC_SHARED_CLUSTER_CHK<frag_pat>, !listconcat([hasClusters], preds)>;
20972102
defm _GEN : F_ATOMIC_2<t, "", "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
20982103
}
20992104

21002105
multiclass F_ATOMIC_3_AS<RegTyInfo t, SDPatternOperator frag, string sem_str, string op_str, list<Predicate> preds = []> {
21012106
defvar frag_pat = (frag node:$a, node:$b, node:$c);
21022107
defm _G : F_ATOMIC_3<t, sem_str, ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
21032108
defm _S : F_ATOMIC_3<t, sem_str, ".shared", op_str, ATOMIC_SHARED_CHK<frag_pat>, preds>;
2109+
defm _S_C : F_ATOMIC_3<t, sem_str, ".shared::cluster", op_str, ATOMIC_SHARED_CLUSTER_CHK<frag_pat>, !listconcat([hasClusters], preds)>;
21042110
defm _GEN : F_ATOMIC_3<t, sem_str, "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
21052111
}
21062112

@@ -2381,25 +2387,32 @@ def INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>;
23812387
def INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>;
23822388

23832389

2384-
multiclass NG_TO_G<string Str> {
2385-
def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2386-
"cvta." # Str # ".u32 \t$result, $src;", []>;
2387-
def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2388-
"cvta." # Str # ".u64 \t$result, $src;", []>;
2390+
multiclass NG_TO_G<string Str, bit Supports32 = 1, list<Predicate> Preds = []> {
2391+
if Supports32 then
2392+
def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2393+
"cvta." # Str # ".u32 \t$result, $src;", []>, Requires<Preds>;
2394+
2395+
def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2396+
"cvta." # Str # ".u64 \t$result, $src;", []>, Requires<Preds>;
23892397
}
23902398

2391-
multiclass G_TO_NG<string Str> {
2392-
def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2393-
"cvta.to." # Str # ".u32 \t$result, $src;", []>;
2394-
def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2395-
"cvta.to." # Str # ".u64 \t$result, $src;", []>;
2399+
multiclass G_TO_NG<string Str, bit Supports32 = 1, list<Predicate> Preds = []> {
2400+
if Supports32 then
2401+
def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2402+
"cvta.to." # Str # ".u32 \t$result, $src;", []>, Requires<Preds>;
2403+
2404+
def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2405+
"cvta.to." # Str # ".u64 \t$result, $src;", []>, Requires<Preds>;
23962406
}
23972407

23982408
foreach space = ["local", "shared", "global", "const", "param"] in {
23992409
defm cvta_#space : NG_TO_G<space>;
24002410
defm cvta_to_#space : G_TO_NG<space>;
24012411
}
24022412

2413+
defm cvta_shared_cluster : NG_TO_G<"shared::cluster", false, [hasClusters]>;
2414+
defm cvta_to_shared_cluster : G_TO_NG<"shared::cluster", false, [hasClusters]>;
2415+
24032416
def : Pat<(int_nvvm_ptr_param_to_gen i32:$src),
24042417
(cvta_param $src)>;
24052418

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,15 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
117117
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
118118
std::string Ret = "e";
119119

120-
if (!is64Bit)
121-
Ret += "-p:32:32";
122-
else if (UseShortPointers)
123-
Ret += "-p3:32:32-p4:32:32-p5:32:32";
124-
125120
// Tensor Memory (addrspace:6) is always 32-bits.
126-
Ret += "-p6:32:32";
121+
// Distributed Shared Memory (addrspace:7) follows shared memory
122+
// (addrspace:3).
123+
if (!is64Bit)
124+
Ret += "-p:32:32-p6:32:32-p7:32:32";
125+
else if (UseShortPointers) {
126+
Ret += "-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32";
127+
} else
128+
Ret += "-p6:32:32";
127129

128130
Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
129131

@@ -280,8 +282,10 @@ NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
280282
case Intrinsic::nvvm_isspacep_local:
281283
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
282284
case Intrinsic::nvvm_isspacep_shared:
283-
case Intrinsic::nvvm_isspacep_shared_cluster:
284285
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
286+
case Intrinsic::nvvm_isspacep_shared_cluster:
287+
return std::make_pair(II->getArgOperand(0),
288+
llvm::ADDRESS_SPACE_SHARED_CLUSTER);
285289
default:
286290
break;
287291
}

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,12 +424,13 @@ static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
424424
case Intrinsic::nvvm_isspacep_local:
425425
return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
426426
case Intrinsic::nvvm_isspacep_shared:
427+
// If shared cluster this can't be evaluated at compile time.
428+
if (AS == NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER)
429+
return std::nullopt;
427430
return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
428431
case Intrinsic::nvvm_isspacep_shared_cluster:
429-
// We can't tell shared from shared_cluster at compile time from AS alone,
430-
// but it can't be either is AS is not shared.
431-
return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
432-
: std::optional{false};
432+
return AS == NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER ||
433+
AS == NVPTXAS::ADDRESS_SPACE_SHARED;
433434
case Intrinsic::nvvm_isspacep_const:
434435
return AS == NVPTXAS::ADDRESS_SPACE_CONST;
435436
default:

llvm/lib/Target/NVPTX/NVPTXUtilities.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ inline std::string AddressSpaceToString(AddressSpace A) {
168168
return "const";
169169
case AddressSpace::Shared:
170170
return "shared";
171+
case AddressSpace::SharedCluster:
172+
return "shared::cluster";
171173
case AddressSpace::Param:
172174
return "param";
173175
case AddressSpace::Local:

0 commit comments

Comments
 (0)