Skip to content

Commit 4844c92

Browse files
committed
initial implementation
1 parent 2b002d6 commit 4844c92

File tree

23 files changed

+558
-112
lines changed

23 files changed

+558
-112
lines changed

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,11 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
7171

7272
if (TargetPointerWidth == 32)
7373
resetDataLayout(
74-
"e-p:32:32-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
74+
"e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
7575
else if (Opts.NVPTXUseShortPointers)
76-
resetDataLayout("e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-i128:128-v16:"
77-
"16-v32:32-n16:32:64");
76+
resetDataLayout(
77+
"e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:"
78+
"16-v32:32-n16:32:64");
7879
else
7980
resetDataLayout("e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
8081

clang/test/CodeGen/target-data.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@
160160

161161
// RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \
162162
// RUN: FileCheck %s -check-prefix=NVPTX
163-
// NVPTX: target datalayout = "e-p:32:32-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
163+
// NVPTX: target datalayout = "e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
164164

165165
// RUN: %clang_cc1 -triple nvptx64-unknown -o - -emit-llvm %s | \
166166
// RUN: FileCheck %s -check-prefix=NVPTX64

clang/test/CodeGenCUDA/builtins-sm90.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ __attribute__((global)) void kernel(long *out, void *ptr, unsigned u) {
5050
auto * sptr = (__attribute__((address_space(3))) void *)ptr;
5151
// CHECK: call ptr @llvm.nvvm.mapa(ptr %{{.*}}, i32 %{{.*}})
5252
out[i++] = (long) __nvvm_mapa(ptr, u);
53-
// CHECK: call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
53+
// CHECK: call ptr addrspace(7) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
5454
out[i++] = (long) __nvvm_mapa_shared_cluster(sptr, u);
5555
// CHECK: call i32 @llvm.nvvm.getctarank(ptr {{.*}})
5656
out[i++] = __nvvm_getctarank(ptr);

llvm/docs/NVPTXUsage.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,7 @@ Syntax:
552552

553553
.. code-block:: llvm
554554
555-
declare void @llvm.nvvm.cp.async.bulk.global.to.shared.cluster(ptr addrspace(3) %dst, ptr addrspace(3) %mbar, ptr addrspace(1) %src, i32 %size, i16 %mc, i64 %ch, i1 %flag_mc, i1 %flag_ch)
555+
declare void @llvm.nvvm.cp.async.bulk.global.to.shared.cluster(ptr addrspace(7) %dst, ptr addrspace(3) %mbar, ptr addrspace(1) %src, i32 %size, i16 %mc, i64 %ch, i1 %flag_mc, i1 %flag_ch)
556556
557557
Overview:
558558
"""""""""
@@ -616,7 +616,7 @@ Syntax:
616616

617617
.. code-block:: llvm
618618
619-
declare void @llvm.nvvm.cp.async.bulk.shared.cta.to.cluster(ptr addrspace(3) %dst, ptr addrspace(3) %mbar, ptr addrspace(3) %src, i32 %size)
619+
declare void @llvm.nvvm.cp.async.bulk.shared.cta.to.cluster(ptr addrspace(7) %dst, ptr addrspace(3) %mbar, ptr addrspace(3) %src, i32 %size)
620620
621621
Overview:
622622
"""""""""
@@ -771,7 +771,7 @@ Syntax:
771771

772772
.. code-block:: llvm
773773
774-
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.1d(ptr addrspace(3) %dst, ptr addrspace(3) %bar, ptr %tensor_map, i32 %d0, i16 %mc, i64 %ch, i1 %flag_mc, i1 %flag_ch)
774+
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.1d(ptr addrspace(7) %dst, ptr addrspace(3) %bar, ptr %tensor_map, i32 %d0, i16 %mc, i64 %ch, i1 %flag_mc, i1 %flag_ch)
775775
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.2d(..., i32 %d0, i32 %d1, ...)
776776
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.3d(..., i32 %d0, i32 %d1, i32 %d2, ...)
777777
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.4d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, ...)

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
131131
def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
132132
def llvm_local_ptr_ty : LLVMQualPointerType<5>; // (local)ptr
133133
def llvm_tmem_ptr_ty : LLVMQualPointerType<6>; // (tensor memory)ptr
134+
def llvm_dshared_ptr_ty : LLVMQualPointerType<7>; // (dshared)ptr
134135

135136
//
136137
// MISC
@@ -691,15 +692,15 @@ class CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, string mode> {
691692
list<LLVMType> Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets);
692693
list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
693694
list<LLVMType> ArgsTy = !listconcat(
694-
[llvm_shared_ptr_ty, // dst_smem_ptr
695-
llvm_shared_ptr_ty, // mbarrier_smem_ptr
696-
llvm_ptr_ty], // tensormap_ptr
697-
TensorDimsTy, // actual tensor dims
698-
Im2ColOffsetsTy, // im2col offsets
699-
[llvm_i16_ty, // cta_mask
700-
llvm_i64_ty, // cache_hint
701-
llvm_i1_ty, // Flag for cta_mask
702-
llvm_i1_ty] // Flag for cache_hint
695+
[llvm_dshared_ptr_ty, // dst_smem_ptr
696+
llvm_shared_ptr_ty, // mbarrier_smem_ptr
697+
llvm_ptr_ty], // tensormap_ptr
698+
TensorDimsTy, // actual tensor dims
699+
Im2ColOffsetsTy, // im2col offsets
700+
[llvm_i16_ty, // cta_mask
701+
llvm_i64_ty, // cache_hint
702+
llvm_i1_ty, // Flag for cta_mask
703+
llvm_i1_ty] // Flag for cache_hint
703704
);
704705

705706
int TempFlagsStartIdx = !add(dim, 5);
@@ -5118,7 +5119,7 @@ def int_nvvm_mapa
51185119
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
51195120
"llvm.nvvm.mapa">;
51205121
def int_nvvm_mapa_shared_cluster
5121-
: DefaultAttrsIntrinsic<[llvm_shared_ptr_ty], [llvm_shared_ptr_ty, llvm_i32_ty],
5122+
: DefaultAttrsIntrinsic<[llvm_dshared_ptr_ty], [llvm_shared_ptr_ty, llvm_i32_ty],
51225123
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
51235124
"llvm.nvvm.mapa.shared.cluster">;
51245125
def int_nvvm_getctarank
@@ -5218,14 +5219,14 @@ def int_nvvm_discard_L2 : DefaultAttrsIntrinsic<[],
52185219
// From Global to Shared Cluster
52195220
def int_nvvm_cp_async_bulk_global_to_shared_cluster
52205221
: DefaultAttrsIntrinsic<[],
5221-
[llvm_shared_ptr_ty, // dst_smem_ptr
5222-
llvm_shared_ptr_ty, // mbarrier_ptr
5223-
llvm_global_ptr_ty, // src_gmem_ptr
5224-
llvm_i32_ty, // copy_size
5225-
llvm_i16_ty, // cta_mask
5226-
llvm_i64_ty, // cache_hint
5227-
llvm_i1_ty, // Flag for cta_mask
5228-
llvm_i1_ty], // Flag for cache_hint
5222+
[llvm_dshared_ptr_ty, // dst_dsmem_ptr
5223+
llvm_shared_ptr_ty, // mbarrier_ptr
5224+
llvm_global_ptr_ty, // src_gmem_ptr
5225+
llvm_i32_ty, // copy_size
5226+
llvm_i16_ty, // cta_mask
5227+
llvm_i64_ty, // cache_hint
5228+
llvm_i1_ty, // Flag for cta_mask
5229+
llvm_i1_ty], // Flag for cache_hint
52295230
[IntrConvergent, IntrArgMemOnly,
52305231
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
52315232
NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
@@ -5235,10 +5236,10 @@ def int_nvvm_cp_async_bulk_global_to_shared_cluster
52355236
// From Shared CTA to Shared Cluster
52365237
def int_nvvm_cp_async_bulk_shared_cta_to_cluster
52375238
: DefaultAttrsIntrinsic<[],
5238-
[llvm_shared_ptr_ty, // dst_smem_ptr
5239-
llvm_shared_ptr_ty, // mbarrier_ptr
5240-
llvm_shared_ptr_ty, // src_smem_ptr
5241-
llvm_i32_ty], // copy_size
5239+
[llvm_dshared_ptr_ty, // dst_dsmem_ptr
5240+
llvm_shared_ptr_ty, // mbarrier_ptr
5241+
llvm_shared_ptr_ty, // src_smem_ptr
5242+
llvm_i32_ty], // copy_size
52425243
[IntrConvergent, IntrArgMemOnly,
52435244
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
52445245
NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,

llvm/include/llvm/Support/NVPTXAddrSpace.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ enum AddressSpace : unsigned {
2525
ADDRESS_SPACE_CONST = 4,
2626
ADDRESS_SPACE_LOCAL = 5,
2727
ADDRESS_SPACE_TENSOR = 6,
28+
ADDRESS_SPACE_DSHARED = 7,
2829

2930
ADDRESS_SPACE_PARAM = 101,
3031
};

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "llvm/Support/AMDGPUAddrSpace.h"
4747
#include "llvm/Support/CommandLine.h"
4848
#include "llvm/Support/ErrorHandling.h"
49+
#include "llvm/Support/NVPTXAddrSpace.h"
4950
#include "llvm/Support/Regex.h"
5051
#include "llvm/TargetParser/Triple.h"
5152
#include <cstdint>
@@ -938,6 +939,47 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
938939
return false; // No other 'arm.*', 'aarch64.*'.
939940
}
940941

942+
static Intrinsic::ID shouldUpgradeNVPTXDSharedIntrinsic(Function *F,
943+
StringRef Name) {
944+
if (Name.consume_front("mapa.shared.cluster"))
945+
if (F->getReturnType()->getPointerAddressSpace() ==
946+
NVPTXAS::ADDRESS_SPACE_SHARED)
947+
return Intrinsic::nvvm_mapa_shared_cluster;
948+
949+
if (Name.consume_front("cp.async.bulk.")) {
950+
Intrinsic::ID ID =
951+
StringSwitch<Intrinsic::ID>(Name)
952+
.Case("global.to.shared.cluster",
953+
Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
954+
.Case("shared.cta.to.cluster",
955+
Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
956+
.Case("tensor.g2s.im2col.3d",
957+
Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
958+
.Case("tensor.g2s.im2col.4d",
959+
Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
960+
.Case("tensor.g2s.im2col.5d",
961+
Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
962+
.Case("tensor.g2s.tile.1d",
963+
Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
964+
.Case("tensor.g2s.tile.2d",
965+
Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
966+
.Case("tensor.g2s.tile.3d",
967+
Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
968+
.Case("tensor.g2s.tile.4d",
969+
Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
970+
.Case("tensor.g2s.tile.5d",
971+
Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
972+
.Default(Intrinsic::not_intrinsic);
973+
974+
if (ID != Intrinsic::not_intrinsic)
975+
if (F->getArg(0)->getType()->getPointerAddressSpace() ==
976+
NVPTXAS::ADDRESS_SPACE_SHARED)
977+
return ID;
978+
}
979+
980+
return Intrinsic::not_intrinsic;
981+
}
982+
941983
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
942984
if (Name.consume_front("fma.rn."))
943985
return StringSwitch<Intrinsic::ID>(Name)
@@ -1278,6 +1320,14 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
12781320
}
12791321
}
12801322

1323+
// Upgrade Distributed Shared Memory Intrinsics
1324+
Intrinsic::ID IID = shouldUpgradeNVPTXDSharedIntrinsic(F, Name);
1325+
if (IID != Intrinsic::not_intrinsic) {
1326+
rename(F);
1327+
NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1328+
return true;
1329+
}
1330+
12811331
// The following nvvm intrinsics correspond exactly to an LLVM idiom, but
12821332
// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
12831333
//
@@ -4718,6 +4768,43 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
47184768
CI->eraseFromParent();
47194769
return;
47204770
}
4771+
case Intrinsic::nvvm_mapa_shared_cluster: {
4772+
// Create a new call with the correct address space.
4773+
NewCall =
4774+
Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
4775+
Value *Res = NewCall;
4776+
Res = Builder.CreateAddrSpaceCast(
4777+
Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_GENERIC));
4778+
Res = Builder.CreateAddrSpaceCast(
4779+
Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
4780+
NewCall->takeName(CI);
4781+
CI->replaceAllUsesWith(Res);
4782+
CI->eraseFromParent();
4783+
return;
4784+
}
4785+
case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
4786+
case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster:
4787+
case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4788+
case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4789+
case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4790+
case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
4791+
case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
4792+
case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
4793+
case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
4794+
case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
4795+
4796+
SmallVector<Value *, 4> Args(CI->args());
4797+
Args[0] = Builder.CreateAddrSpaceCast(
4798+
Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_GENERIC));
4799+
Args[0] = Builder.CreateAddrSpaceCast(
4800+
Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_DSHARED));
4801+
4802+
NewCall = Builder.CreateCall(NewFn, Args);
4803+
NewCall->takeName(CI);
4804+
CI->replaceAllUsesWith(NewCall);
4805+
CI->eraseFromParent();
4806+
return;
4807+
}
47214808
case Intrinsic::riscv_sha256sig0:
47224809
case Intrinsic::riscv_sha256sig1:
47234810
case Intrinsic::riscv_sha256sum0:

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
285285
case NVPTX::AddressSpace::Global:
286286
case NVPTX::AddressSpace::Const:
287287
case NVPTX::AddressSpace::Shared:
288+
case NVPTX::AddressSpace::Dshared:
288289
case NVPTX::AddressSpace::Param:
289290
case NVPTX::AddressSpace::Local:
290291
O << "." << A;

llvm/lib/Target/NVPTX/NVPTX.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ enum AddressSpace : AddressSpaceUnderlyingType {
176176
Shared = 3,
177177
Const = 4,
178178
Local = 5,
179+
Dshared = 7,
179180

180181
// NVPTX Backend Private:
181182
Param = 101

llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ static AliasResult::Kind getAliasResult(unsigned AS1, unsigned AS2) {
8686
// TODO: cvta.param is not yet supported. We need to change aliasing
8787
// rules once it is added.
8888

89+
// Distributed shared memory aliases with shared memory.
90+
if (((AS1 == ADDRESS_SPACE_SHARED) && (AS2 == ADDRESS_SPACE_DSHARED)) ||
91+
((AS1 == ADDRESS_SPACE_DSHARED) && (AS2 == ADDRESS_SPACE_SHARED)))
92+
return AliasResult::MayAlias;
93+
8994
return (AS1 == AS2 ? AliasResult::MayAlias : AliasResult::NoAlias);
9095
}
9196

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,8 @@ static std::optional<unsigned> convertAS(unsigned AS) {
513513
return NVPTX::AddressSpace::Global;
514514
case llvm::ADDRESS_SPACE_SHARED:
515515
return NVPTX::AddressSpace::Shared;
516+
case llvm::ADDRESS_SPACE_DSHARED:
517+
return NVPTX::AddressSpace::Dshared;
516518
case llvm::ADDRESS_SPACE_GENERIC:
517519
return NVPTX::AddressSpace::Generic;
518520
case llvm::ADDRESS_SPACE_PARAM:
@@ -658,7 +660,8 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
658660
bool AddrGenericOrGlobalOrShared =
659661
(CodeAddrSpace == NVPTX::AddressSpace::Generic ||
660662
CodeAddrSpace == NVPTX::AddressSpace::Global ||
661-
CodeAddrSpace == NVPTX::AddressSpace::Shared);
663+
CodeAddrSpace == NVPTX::AddressSpace::Shared ||
664+
CodeAddrSpace == NVPTX::AddressSpace::Dshared);
662665
if (!AddrGenericOrGlobalOrShared)
663666
return NVPTX::Ordering::NotAtomic;
664667

@@ -979,6 +982,9 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
979982
case ADDRESS_SPACE_SHARED:
980983
Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;
981984
break;
985+
case ADDRESS_SPACE_DSHARED:
986+
Opc = TM.is64Bit() ? NVPTX::cvta_dshared_64 : NVPTX::cvta_dshared;
987+
break;
982988
case ADDRESS_SPACE_CONST:
983989
Opc = TM.is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const;
984990
break;
@@ -1001,6 +1007,9 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
10011007
case ADDRESS_SPACE_SHARED:
10021008
Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared;
10031009
break;
1010+
case ADDRESS_SPACE_DSHARED:
1011+
Opc = TM.is64Bit() ? NVPTX::cvta_to_dshared_64 : NVPTX::cvta_to_dshared;
1012+
break;
10041013
case ADDRESS_SPACE_CONST:
10051014
Opc = TM.is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const;
10061015
break;

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">;
137137
def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">;
138138
def hasVote : Predicate<"Subtarget->hasVote()">;
139139
def hasDouble : Predicate<"Subtarget->hasDouble()">;
140+
def hasClusters : Predicate<"Subtarget->hasClusters()">;
140141
def hasLDG : Predicate<"Subtarget->hasLDG()">;
141142
def hasLDU : Predicate<"Subtarget->hasLDU()">;
142143
def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ def AS_match {
3333
code shared = [{
3434
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
3535
}];
36+
code dshared = [{
37+
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_DSHARED);
38+
}];
3639
code global = [{
3740
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
3841
}];
@@ -2039,10 +2042,11 @@ class ATOMIC_GLOBAL_CHK <dag frag>
20392042
: PatFrag<!setdagop(frag, ops), frag, AS_match.global>;
20402043
class ATOMIC_SHARED_CHK <dag frag>
20412044
: PatFrag<!setdagop(frag, ops), frag, AS_match.shared>;
2045+
class ATOMIC_DSHARED_CHK <dag frag>
2046+
: PatFrag<!setdagop(frag, ops), frag, AS_match.dshared>;
20422047
class ATOMIC_GENERIC_CHK <dag frag>
20432048
: PatFrag<!setdagop(frag, ops), frag, AS_match.generic>;
20442049

2045-
20462050
multiclass F_ATOMIC_2<RegTyInfo t, string sem_str, string as_str, string op_str,
20472051
SDPatternOperator op, list<Predicate> preds> {
20482052
defvar asm_str = "atom" # sem_str # as_str # "." # op_str # " \t$dst, [$addr], $b;";
@@ -2094,13 +2098,15 @@ multiclass F_ATOMIC_2_AS<RegTyInfo t, SDPatternOperator frag, string op_str, lis
20942098
defvar frag_pat = (frag node:$a, node:$b);
20952099
defm _G : F_ATOMIC_2<t, "", ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
20962100
defm _S : F_ATOMIC_2<t, "", ".shared", op_str, ATOMIC_SHARED_CHK<frag_pat>, preds>;
2101+
defm _DS : F_ATOMIC_2<t, "", ".shared::cluster", op_str, ATOMIC_DSHARED_CHK<frag_pat>, !listconcat([hasSM<80>], preds)>;
20972102
defm _GEN : F_ATOMIC_2<t, "", "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
20982103
}
20992104

21002105
multiclass F_ATOMIC_3_AS<RegTyInfo t, SDPatternOperator frag, string sem_str, string op_str, list<Predicate> preds = []> {
21012106
defvar frag_pat = (frag node:$a, node:$b, node:$c);
21022107
defm _G : F_ATOMIC_3<t, sem_str, ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
21032108
defm _S : F_ATOMIC_3<t, sem_str, ".shared", op_str, ATOMIC_SHARED_CHK<frag_pat>, preds>;
2109+
defm _DS : F_ATOMIC_3<t, sem_str, ".shared::cluster", op_str, ATOMIC_DSHARED_CHK<frag_pat>, !listconcat([hasSM<80>], preds)>;
21042110
defm _GEN : F_ATOMIC_3<t, sem_str, "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
21052111
}
21062112

@@ -2397,11 +2403,13 @@ multiclass G_TO_NG<string Str> {
23972403

23982404
defm cvta_local : NG_TO_G<"local">;
23992405
defm cvta_shared : NG_TO_G<"shared">;
2406+
defm cvta_dshared : NG_TO_G<"shared::cluster">;
24002407
defm cvta_global : NG_TO_G<"global">;
24012408
defm cvta_const : NG_TO_G<"const">;
24022409

24032410
defm cvta_to_local : G_TO_NG<"local">;
24042411
defm cvta_to_shared : G_TO_NG<"shared">;
2412+
defm cvta_to_dshared : G_TO_NG<"shared::cluster">;
24052413
defm cvta_to_global : G_TO_NG<"global">;
24062414
defm cvta_to_const : G_TO_NG<"const">;
24072415

0 commit comments

Comments
 (0)