Skip to content

Commit 3d4964f

Browse files
committed
[NVPTX] add new sm90-specific intrinsics.
Differential Revision: https://reviews.llvm.org/D151009
1 parent ffb635c commit 3d4964f

File tree

4 files changed

+289
-94
lines changed

4 files changed

+289
-94
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 52 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,25 +1569,29 @@ def int_nvvm_reflect :
15691569

15701570
// isspacep.{const, global, local, shared}
15711571
def int_nvvm_isspacep_const
1572-
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1572+
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
15731573
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
15741574
"llvm.nvvm.isspacep.const">,
15751575
ClangBuiltin<"__nvvm_isspacep_const">;
15761576
def int_nvvm_isspacep_global
1577-
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1577+
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
15781578
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
15791579
"llvm.nvvm.isspacep.global">,
15801580
ClangBuiltin<"__nvvm_isspacep_global">;
15811581
def int_nvvm_isspacep_local
1582-
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1582+
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
15831583
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
15841584
"llvm.nvvm.isspacep.local">,
15851585
ClangBuiltin<"__nvvm_isspacep_local">;
15861586
def int_nvvm_isspacep_shared
1587-
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1587+
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
15881588
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
15891589
"llvm.nvvm.isspacep.shared">,
15901590
ClangBuiltin<"__nvvm_isspacep_shared">;
1591+
def int_nvvm_isspacep_shared_cluster
1592+
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1593+
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1594+
"llvm.nvvm.isspacep.shared.cluster">;
15911595

15921596
// Environment register read
15931597
def int_nvvm_read_ptx_sreg_envreg0
@@ -4341,30 +4345,29 @@ def int_nvvm_swap_lo_hi_b64
43414345

43424346

43434347
// Accessing special registers.
4348+
4349+
class PTXReadSRegIntrinsicNB_r32
4350+
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>;
4351+
class PTXReadSRegIntrinsic_r32<string name>
4352+
: PTXReadSRegIntrinsicNB_r32, ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4353+
43444354
multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
43454355
// FIXME: Do we need the 128-bit integer type version?
43464356
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>;
43474357

43484358
// FIXME: Enable this once v4i32 support is enabled in back-end.
43494359
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
4360+
foreach suffix = ["_x", "_y", "_z", "_w"] in
4361+
def suffix : PTXReadSRegIntrinsic_r32<regname # suffix>;
4362+
}
43504363

4351-
def _x : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
4352-
[IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
4353-
ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
4354-
def _y : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
4355-
[IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
4356-
ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
4357-
def _z : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
4358-
[IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
4359-
ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
4360-
def _w : DefaultAttrsIntrinsic<[llvm_i32_ty], [],
4361-
[IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
4362-
ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
4364+
// Same, but without automatic clang builtins. It will be used for
4365+
// registers that require particular GPU or PTX version.
4366+
multiclass PTXReadSRegIntrinsicNB_v4i32 {
4367+
foreach suffix = ["_x", "_y", "_z", "_w"] in
4368+
def suffix : PTXReadSRegIntrinsicNB_r32;
43634369
}
43644370

4365-
class PTXReadSRegIntrinsic_r32<string name>
4366-
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
4367-
ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
43684371
class PTXReadSRegIntrinsic_r64<string name>
43694372
: DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
43704373
ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
@@ -4413,6 +4416,15 @@ def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
44134416

44144417
def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
44154418

4419+
// sm90+, PTX7.8+
4420+
defm int_nvvm_read_ptx_sreg_clusterid : PTXReadSRegIntrinsicNB_v4i32;
4421+
defm int_nvvm_read_ptx_sreg_nclusterid : PTXReadSRegIntrinsicNB_v4i32;
4422+
defm int_nvvm_read_ptx_sreg_cluster_ctaid : PTXReadSRegIntrinsicNB_v4i32;
4423+
defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32;
4424+
4425+
def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
4426+
def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;
4427+
44164428
//
44174429
// SHUFFLE
44184430
//
@@ -4661,4 +4673,25 @@ foreach transposed = [0, 1] in {
46614673
}
46624674
}
46634675

4676+
def int_nvvm_mapa
4677+
: DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
4678+
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4679+
"llvm.nvvm.mapa">;
4680+
def int_nvvm_mapa_shared_cluster
4681+
: DefaultAttrsIntrinsic<[llvm_shared_i8ptr_ty], [llvm_shared_i8ptr_ty, llvm_i32_ty],
4682+
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4683+
"llvm.nvvm.mapa.shared.cluster">;
4684+
def int_nvvm_getctarank
4685+
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty],
4686+
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4687+
"llvm.nvvm.getctarank">;
4688+
def int_nvvm_getctarank_shared_cluster
4689+
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_i8ptr_ty],
4690+
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4691+
"llvm.nvvm.getctarank.shared.cluster">;
4692+
def int_nvvm_is_explicit_cluster
4693+
: DefaultAttrsIntrinsic<[llvm_i1_ty], [],
4694+
[IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
4695+
"llvm.nvvm.is_explicit_cluster">;
4696+
46644697
} // let TargetPrefix = "nvvm"

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 97 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -2480,41 +2480,24 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
24802480

24812481
// isspacep
24822482

2483-
def ISSPACEP_CONST_32
2484-
: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2485-
"isspacep.const \t$d, $a;",
2486-
[(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2487-
Requires<[hasPTX<31>]>;
2488-
def ISSPACEP_CONST_64
2489-
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2490-
"isspacep.const \t$d, $a;",
2491-
[(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2492-
Requires<[hasPTX<31>]>;
2493-
def ISSPACEP_GLOBAL_32
2494-
: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2495-
"isspacep.global \t$d, $a;",
2496-
[(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2497-
def ISSPACEP_GLOBAL_64
2498-
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2499-
"isspacep.global \t$d, $a;",
2500-
[(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2501-
def ISSPACEP_LOCAL_32
2502-
: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2503-
"isspacep.local \t$d, $a;",
2504-
[(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2505-
def ISSPACEP_LOCAL_64
2506-
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2507-
"isspacep.local \t$d, $a;",
2508-
[(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2509-
def ISSPACEP_SHARED_32
2510-
: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2511-
"isspacep.shared \t$d, $a;",
2512-
[(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2513-
def ISSPACEP_SHARED_64
2514-
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2515-
"isspacep.shared \t$d, $a;",
2516-
[(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2517-
2483+
multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2484+
def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2485+
"isspacep." # suffix # "\t$d, $a;",
2486+
[(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
2487+
Requires<Preds>;
2488+
def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2489+
"isspacep." # suffix # "\t$d, $a;",
2490+
[(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
2491+
Requires<Preds>;
2492+
}
2493+
2494+
defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2495+
defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2496+
defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>;
2497+
defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2498+
defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2499+
int_nvvm_isspacep_shared_cluster,
2500+
[hasPTX<78>, hasSM<90>]>;
25182501

25192502
// Special register reads
25202503
def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
@@ -6213,61 +6196,58 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
62136196
// Read Special Registers
62146197
//-----------------------------------
62156198

6216-
class PTX_READ_SREG_R64<string regname, Intrinsic intop>
6199+
class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
62176200
: NVPTXInst<(outs Int64Regs:$d), (ins),
62186201
!strconcat("mov.u64 \t$d, %", regname, ";"),
6219-
[(set Int64Regs:$d, (intop))]>;
6202+
[(set Int64Regs:$d, (intop))]>,
6203+
Requires<Preds>;
62206204

6221-
class PTX_READ_SREG_R32<string regname, Intrinsic intop>
6205+
class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
62226206
: NVPTXInst<(outs Int32Regs:$d), (ins),
62236207
!strconcat("mov.u32 \t$d, %", regname, ";"),
6224-
[(set Int32Regs:$d, (intop))]>;
6208+
[(set Int32Regs:$d, (intop))]>,
6209+
Requires<Preds>;
6210+
6211+
multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
6212+
foreach suffix = ["x", "y", "z", "w"] in {
6213+
defvar reg = regname # "." # suffix;
6214+
defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
6215+
def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>;
6216+
}
6217+
}
62256218

62266219
// TODO Add read vector-version of special registers
62276220

6228-
def INT_PTX_SREG_TID_X :
6229-
PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
6230-
def INT_PTX_SREG_TID_Y :
6231-
PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
6232-
def INT_PTX_SREG_TID_Z :
6233-
PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
6234-
def INT_PTX_SREG_TID_W :
6235-
PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
6236-
6237-
def INT_PTX_SREG_NTID_X :
6238-
PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
6239-
def INT_PTX_SREG_NTID_Y :
6240-
PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
6241-
def INT_PTX_SREG_NTID_Z :
6242-
PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
6243-
def INT_PTX_SREG_NTID_W :
6244-
PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
6221+
defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">;
6222+
defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">;
6223+
defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
6224+
defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
6225+
6226+
defm INT_PTX_SREG_CLUSTERID :
6227+
PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
6228+
defm INT_PTX_SREG_NCLUSTERID :
6229+
PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
6230+
defm INT_PTX_SREG_CLUSTER_CTAID :
6231+
PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
6232+
defm INT_PTX_SREG_CLUSTER_NCTAID:
6233+
PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
6234+
6235+
def INT_PTX_SREG_CLUSTER_CTARANK :
6236+
PTX_READ_SREG_R32<"cluster_ctarank",
6237+
int_nvvm_read_ptx_sreg_cluster_ctarank,
6238+
[hasSM<90>, hasPTX<78>]>;
6239+
def INT_PTX_SREG_CLUSTER_NCTARANK:
6240+
PTX_READ_SREG_R32<"cluster_nctarank",
6241+
int_nvvm_read_ptx_sreg_cluster_nctarank,
6242+
[hasSM<90>, hasPTX<78>]>;
6243+
62456244

62466245
def INT_PTX_SREG_LANEID :
62476246
PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
62486247
def INT_PTX_SREG_WARPID :
62496248
PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
62506249
def INT_PTX_SREG_NWARPID :
62516250
PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6252-
6253-
def INT_PTX_SREG_CTAID_X :
6254-
PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
6255-
def INT_PTX_SREG_CTAID_Y :
6256-
PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
6257-
def INT_PTX_SREG_CTAID_Z :
6258-
PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
6259-
def INT_PTX_SREG_CTAID_W :
6260-
PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
6261-
6262-
def INT_PTX_SREG_NCTAID_X :
6263-
PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
6264-
def INT_PTX_SREG_NCTAID_Y :
6265-
PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
6266-
def INT_PTX_SREG_NCTAID_Z :
6267-
PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
6268-
def INT_PTX_SREG_NCTAID_W :
6269-
PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
6270-
62716251
def INT_PTX_SREG_SMID :
62726252
PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
62736253
def INT_PTX_SREG_NSMID :
@@ -6704,3 +6684,45 @@ class MMA_PAT<WMMA_INSTR wi>
67046684
// Build intrinsic->instruction patterns for all MMA instructions.
67056685
foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
67066686
def : MMA_PAT<mma>;
6687+
6688+
multiclass MAPA<string suffix, Intrinsic Intr> {
6689+
def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
6690+
"mapa" # suffix # ".u32\t$d, $a, $b;",
6691+
[(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
6692+
Requires<[hasSM<90>, hasPTX<78>]>;
6693+
def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
6694+
"mapa" # suffix # ".u32\t$d, $a, $b;",
6695+
[(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
6696+
Requires<[hasSM<90>, hasPTX<78>]>;
6697+
def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
6698+
"mapa" # suffix # ".u64\t$d, $a, $b;",
6699+
[(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
6700+
Requires<[hasSM<90>, hasPTX<78>]>;
6701+
def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
6702+
"mapa" # suffix # ".u64\t$d, $a, $b;",
6703+
[(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
6704+
Requires<[hasSM<90>, hasPTX<78>]>;
6705+
}
6706+
6707+
defm mapa : MAPA<"", int_nvvm_mapa>;
6708+
defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
6709+
6710+
6711+
multiclass GETCTARANK<string suffix, Intrinsic Intr> {
6712+
def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
6713+
"getctarank" # suffix # ".u32\t$d, $a;",
6714+
[(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
6715+
Requires<[hasSM<90>, hasPTX<78>]>;
6716+
def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
6717+
"getctarank" # suffix # ".u64\t$d, $a;",
6718+
[(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
6719+
Requires<[hasSM<90>, hasPTX<78>]>;
6720+
}
6721+
6722+
defm getctarank : GETCTARANK<"", int_nvvm_getctarank>;
6723+
defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
6724+
6725+
def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
6726+
"mov.pred\t$d, %is_explicit_cluster;",
6727+
[(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
6728+
Requires<[hasSM<90>, hasPTX<78>]>;

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
291291
case Intrinsic::nvvm_isspacep_local:
292292
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
293293
case Intrinsic::nvvm_isspacep_shared:
294+
case Intrinsic::nvvm_isspacep_shared_cluster:
294295
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
295296
default:
296297
break;

0 commit comments

Comments
 (0)