@@ -2480,41 +2480,24 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2480
2480
2481
2481
// isspacep
2482
2482
2483
- def ISSPACEP_CONST_32
2484
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2485
- "isspacep.const \t$d, $a;",
2486
- [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2487
- Requires<[hasPTX<31>]>;
2488
- def ISSPACEP_CONST_64
2489
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2490
- "isspacep.const \t$d, $a;",
2491
- [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2492
- Requires<[hasPTX<31>]>;
2493
- def ISSPACEP_GLOBAL_32
2494
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2495
- "isspacep.global \t$d, $a;",
2496
- [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2497
- def ISSPACEP_GLOBAL_64
2498
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2499
- "isspacep.global \t$d, $a;",
2500
- [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2501
- def ISSPACEP_LOCAL_32
2502
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2503
- "isspacep.local \t$d, $a;",
2504
- [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2505
- def ISSPACEP_LOCAL_64
2506
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2507
- "isspacep.local \t$d, $a;",
2508
- [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2509
- def ISSPACEP_SHARED_32
2510
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2511
- "isspacep.shared \t$d, $a;",
2512
- [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2513
- def ISSPACEP_SHARED_64
2514
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2515
- "isspacep.shared \t$d, $a;",
2516
- [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2517
-
2483
+ multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2484
+ def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2485
+ "isspacep." # suffix # "\t$d, $a;",
2486
+ [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
2487
+ Requires<Preds>;
2488
+ def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2489
+ "isspacep." # suffix # "\t$d, $a;",
2490
+ [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
2491
+ Requires<Preds>;
2492
+ }
2493
+
2494
+ defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2495
+ defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2496
+ defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>;
2497
+ defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2498
+ defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2499
+ int_nvvm_isspacep_shared_cluster,
2500
+ [hasPTX<78>, hasSM<90>]>;
2518
2501
2519
2502
// Special register reads
2520
2503
def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
@@ -6213,61 +6196,58 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6213
6196
// Read Special Registers
6214
6197
//-----------------------------------
6215
6198
6216
- class PTX_READ_SREG_R64<string regname, Intrinsic intop>
6199
+ class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[] >
6217
6200
: NVPTXInst<(outs Int64Regs:$d), (ins),
6218
6201
!strconcat("mov.u64 \t$d, %", regname, ";"),
6219
- [(set Int64Regs:$d, (intop))]>;
6202
+ [(set Int64Regs:$d, (intop))]>,
6203
+ Requires<Preds>;
6220
6204
6221
- class PTX_READ_SREG_R32<string regname, Intrinsic intop>
6205
+ class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[] >
6222
6206
: NVPTXInst<(outs Int32Regs:$d), (ins),
6223
6207
!strconcat("mov.u32 \t$d, %", regname, ";"),
6224
- [(set Int32Regs:$d, (intop))]>;
6208
+ [(set Int32Regs:$d, (intop))]>,
6209
+ Requires<Preds>;
6210
+
6211
+ multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
6212
+ foreach suffix = ["x", "y", "z", "w"] in {
6213
+ defvar reg = regname # "." # suffix;
6214
+ defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
6215
+ def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>;
6216
+ }
6217
+ }
6225
6218
6226
6219
// TODO Add read vector-version of special registers
6227
6220
6228
- def INT_PTX_SREG_TID_X :
6229
- PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
6230
- def INT_PTX_SREG_TID_Y :
6231
- PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
6232
- def INT_PTX_SREG_TID_Z :
6233
- PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
6234
- def INT_PTX_SREG_TID_W :
6235
- PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
6236
-
6237
- def INT_PTX_SREG_NTID_X :
6238
- PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
6239
- def INT_PTX_SREG_NTID_Y :
6240
- PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
6241
- def INT_PTX_SREG_NTID_Z :
6242
- PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
6243
- def INT_PTX_SREG_NTID_W :
6244
- PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
6221
+ defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">;
6222
+ defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">;
6223
+ defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
6224
+ defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
6225
+
6226
+ defm INT_PTX_SREG_CLUSTERID :
6227
+ PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
6228
+ defm INT_PTX_SREG_NCLUSTERID :
6229
+ PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
6230
+ defm INT_PTX_SREG_CLUSTER_CTAID :
6231
+ PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
6232
+ defm INT_PTX_SREG_CLUSTER_NCTAID:
6233
+ PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
6234
+
6235
+ def INT_PTX_SREG_CLUSTER_CTARANK :
6236
+ PTX_READ_SREG_R32<"cluster_ctarank",
6237
+ int_nvvm_read_ptx_sreg_cluster_ctarank,
6238
+ [hasSM<90>, hasPTX<78>]>;
6239
+ def INT_PTX_SREG_CLUSTER_NCTARANK:
6240
+ PTX_READ_SREG_R32<"cluster_nctarank",
6241
+ int_nvvm_read_ptx_sreg_cluster_nctarank,
6242
+ [hasSM<90>, hasPTX<78>]>;
6243
+
6245
6244
6246
6245
def INT_PTX_SREG_LANEID :
6247
6246
PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6248
6247
def INT_PTX_SREG_WARPID :
6249
6248
PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6250
6249
def INT_PTX_SREG_NWARPID :
6251
6250
PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6252
-
6253
- def INT_PTX_SREG_CTAID_X :
6254
- PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
6255
- def INT_PTX_SREG_CTAID_Y :
6256
- PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
6257
- def INT_PTX_SREG_CTAID_Z :
6258
- PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
6259
- def INT_PTX_SREG_CTAID_W :
6260
- PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
6261
-
6262
- def INT_PTX_SREG_NCTAID_X :
6263
- PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
6264
- def INT_PTX_SREG_NCTAID_Y :
6265
- PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
6266
- def INT_PTX_SREG_NCTAID_Z :
6267
- PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
6268
- def INT_PTX_SREG_NCTAID_W :
6269
- PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
6270
-
6271
6251
def INT_PTX_SREG_SMID :
6272
6252
PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6273
6253
def INT_PTX_SREG_NSMID :
@@ -6704,3 +6684,45 @@ class MMA_PAT<WMMA_INSTR wi>
6704
6684
// Build intrinsic->instruction patterns for all MMA instructions.
6705
6685
foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6706
6686
def : MMA_PAT<mma>;
6687
+
6688
+ multiclass MAPA<string suffix, Intrinsic Intr> {
6689
+ def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
6690
+ "mapa" # suffix # ".u32\t$d, $a, $b;",
6691
+ [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
6692
+ Requires<[hasSM<90>, hasPTX<78>]>;
6693
+ def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
6694
+ "mapa" # suffix # ".u32\t$d, $a, $b;",
6695
+ [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
6696
+ Requires<[hasSM<90>, hasPTX<78>]>;
6697
+ def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
6698
+ "mapa" # suffix # ".u64\t$d, $a, $b;",
6699
+ [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
6700
+ Requires<[hasSM<90>, hasPTX<78>]>;
6701
+ def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
6702
+ "mapa" # suffix # ".u64\t$d, $a, $b;",
6703
+ [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
6704
+ Requires<[hasSM<90>, hasPTX<78>]>;
6705
+ }
6706
+
6707
+ defm mapa : MAPA<"", int_nvvm_mapa>;
6708
+ defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
6709
+
6710
+
6711
+ multiclass GETCTARANK<string suffix, Intrinsic Intr> {
6712
+ def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
6713
+ "getctarank" # suffix # ".u32\t$d, $a;",
6714
+ [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
6715
+ Requires<[hasSM<90>, hasPTX<78>]>;
6716
+ def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
6717
+ "getctarank" # suffix # ".u64\t$d, $a;",
6718
+ [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
6719
+ Requires<[hasSM<90>, hasPTX<78>]>;
6720
+ }
6721
+
6722
+ defm getctarank : GETCTARANK<"", int_nvvm_getctarank>;
6723
+ defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
6724
+
6725
+ def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
6726
+ "mov.pred\t$d, %is_explicit_cluster;",
6727
+ [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
6728
+ Requires<[hasSM<90>, hasPTX<78>]>;
0 commit comments