@@ -214,15 +214,15 @@ def NVVM_ClusterDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ncluster
214
214
//===----------------------------------------------------------------------===//
215
215
// CTA index and range within Cluster
216
216
def NVVM_BlockInClusterIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.x", [NVVMRequiresSM<90>]>;
217
- def NVVM_BlockInClusterIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.y">;
218
- def NVVM_BlockInClusterIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.z">;
219
- def NVVM_ClusterDimBlocksXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.x">;
220
- def NVVM_ClusterDimBlocksYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.y">;
217
+ def NVVM_BlockInClusterIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.y", [NVVMRequiresSM<90>] >;
218
+ def NVVM_BlockInClusterIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.z", [NVVMRequiresSM<90>] >;
219
+ def NVVM_ClusterDimBlocksXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.x", [NVVMRequiresSM<90>] >;
220
+ def NVVM_ClusterDimBlocksYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.y", [NVVMRequiresSM<90>] >;
221
221
def NVVM_ClusterDimBlocksZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.z">;
222
222
223
223
//===----------------------------------------------------------------------===//
224
224
// CTA index and across Cluster dimensions
225
- def NVVM_ClusterId : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctarank">;
225
+ def NVVM_ClusterId : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctarank", [NVVMRequiresSM<90>] >;
226
226
def NVVM_ClusterDim : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctarank">;
227
227
228
228
//===----------------------------------------------------------------------===//
@@ -323,7 +323,7 @@ def NVVM_MBarrierInitOp : NVVM_PTXBuilder_Op<"mbarrier.init">,
323
323
}
324
324
325
325
/// mbarrier.init instruction with shared pointer type
326
- def NVVM_MBarrierInitSharedOp : NVVM_PTXBuilder_Op<"mbarrier.init.shared">,
326
+ def NVVM_MBarrierInitSharedOp : NVVM_PTXBuilder_Op<"mbarrier.init.shared", [NVVMRequiresSM<80>, DeclareOpInterfaceMethods<BasicPtxBuilderOpInterface>] >,
327
327
Arguments<(ins LLVM_PointerShared:$addr, I32:$count, PtxPredicate:$predicate)> {
328
328
string llvmBuilder = [{
329
329
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_init_shared, {$addr, $count});
@@ -545,7 +545,7 @@ def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
545
545
let assemblyFormat = "attr-dict";
546
546
}
547
547
548
- def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed"> {
548
+ def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed", [NVVMRequiresSM<90>] > {
549
549
let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
550
550
551
551
let summary = "Cluster Barrier Relaxed Arrive Op";
@@ -571,7 +571,7 @@ def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed"> {
571
571
let assemblyFormat = "attr-dict";
572
572
}
573
573
574
- def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait"> {
574
+ def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait", [NVVMRequiresSM<90>] > {
575
575
let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
576
576
577
577
let summary = "Cluster Barrier Wait Op";
@@ -776,7 +776,7 @@ def ShflKind : I32EnumAttr<"ShflKind", "NVVM shuffle kind",
776
776
def ShflKindAttr : EnumAttr<NVVM_Dialect, ShflKind, "shfl_kind">;
777
777
778
778
def NVVM_ShflOp :
779
- NVVM_Op<"shfl.sync">,
779
+ NVVM_Op<"shfl.sync", [NVVMRequiresSM<30>] >,
780
780
Results<(outs LLVM_Type:$res)>,
781
781
Arguments<(ins I32:$thread_mask,
782
782
LLVM_Type:$val,
@@ -1880,7 +1880,7 @@ def NVVM_CpAsyncBulkCommitGroupOp : NVVM_Op<"cp.async.bulk.commit.group">,
1880
1880
}];
1881
1881
}
1882
1882
1883
- def NVVM_CpAsyncBulkWaitGroupOp : NVVM_Op<"cp.async.bulk.wait_group">,
1883
+ def NVVM_CpAsyncBulkWaitGroupOp : NVVM_Op<"cp.async.bulk.wait_group", [NVVMRequiresSM<90>] >,
1884
1884
Arguments<(ins
1885
1885
ConfinedAttr<I32Attr, [IntMinValue<0>]>:$group,
1886
1886
OptionalAttr<UnitAttr>:$read)> {
@@ -1910,7 +1910,7 @@ def NVVM_CpAsyncBulkWaitGroupOp : NVVM_Op<"cp.async.bulk.wait_group">,
1910
1910
def NVVM_CpAsyncBulkTensorGlobalToSharedClusterOp :
1911
1911
NVVM_Op<"cp.async.bulk.tensor.shared.cluster.global",
1912
1912
[DeclareOpInterfaceMethods<BasicPtxBuilderOpInterface>,
1913
- AttrSizedOperandSegments]>,
1913
+ AttrSizedOperandSegments, NVVMRequiresSM<90> ]>,
1914
1914
Arguments<(ins LLVM_PointerShared:$dstMem,
1915
1915
LLVM_AnyPointer:$tmaDescriptor,
1916
1916
Variadic<I32>:$coordinates,
@@ -2347,8 +2347,7 @@ def NVVM_CpAsyncBulkSharedCTAToGlobalOp :
2347
2347
// NVVM Wgmma Ops
2348
2348
//===----------------------------------------------------------------------===//
2349
2349
2350
- def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned",
2351
- [NVVMRequiresSM<90, /*ArchAccelerated*/"true">]> {
2350
+ def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned", [NVVMRequiresSM90a]> {
2352
2351
let arguments = (ins);
2353
2352
let description = [{
2354
2353
Enforce an ordering of register accesses between warpgroup level matrix
@@ -2362,8 +2361,7 @@ def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned",
2362
2361
}];
2363
2362
}
2364
2363
2365
- def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned",
2366
- [NVVMRequiresSM<90, /*ArchAccelerated*/"true">]> {
2364
+ def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", [NVVMRequiresSM90a]> {
2367
2365
let assemblyFormat = "attr-dict";
2368
2366
let description = [{
2369
2367
Commits all prior uncommitted warpgroup level matrix multiplication operations.
@@ -2375,7 +2373,7 @@ def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned",
2375
2373
}];
2376
2374
}
2377
2375
2378
- def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned"> {
2376
+ def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned", [NVVMRequiresSM90a]> {
2379
2377
let arguments = (ins I64Attr:$group);
2380
2378
let assemblyFormat = "attr-dict $group";
2381
2379
let description = [{
@@ -2571,7 +2569,7 @@ def NVVM_GriddepcontrolLaunchDependentsOp
2571
2569
2572
2570
def NVVM_MapaOp: NVVM_Op<"mapa",
2573
2571
[TypesMatchWith<"`res` and `a` should have the same type",
2574
- "a", "res", "$_self">]> {
2572
+ "a", "res", "$_self">, NVVMRequiresSM<90> ]> {
2575
2573
let results = (outs AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$res);
2576
2574
let arguments = (ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$a, I32:$b);
2577
2575
@@ -2662,7 +2660,7 @@ def Tcgen05WaitKindAttr :
2662
2660
let assemblyFormat = "`<` $value `>`";
2663
2661
}
2664
2662
2665
- def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc"> {
2663
+ def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc", [NVVMRequiresSM<100, "true", "false">] > {
2666
2664
let summary = "Tcgen05 alloc operation";
2667
2665
let description = [{
2668
2666
The `tcgen05.alloc` Op allocates tensor core memory for
@@ -2692,7 +2690,7 @@ def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc"> {
2692
2690
}];
2693
2691
}
2694
2692
2695
- def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc"> {
2693
+ def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc", [NVVMRequiresSM<100, "true", "false">] > {
2696
2694
let summary = "Tcgen05 dealloc operation";
2697
2695
let description = [{
2698
2696
The `tcgen05.dealloc` Op de-allocates the tensor core memory
@@ -2720,7 +2718,7 @@ def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc"> {
2720
2718
}];
2721
2719
}
2722
2720
2723
- def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_permit"> {
2721
+ def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_permit", [NVVMRequiresSM<100, "true", "false">] > {
2724
2722
let summary = "Tcgen05 Op to relinquish the right to allocate";
2725
2723
let description = [{
2726
2724
The `tcgen05.relinquish_alloc_permit` Op specifies that the CTA
@@ -2743,7 +2741,7 @@ def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_perm
2743
2741
}];
2744
2742
}
2745
2743
2746
- def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence"> {
2744
+ def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence", [NVVMRequiresSM<100, "true", "false">] > {
2747
2745
let summary = "Tcgen05 fence operations";
2748
2746
let description = [{
2749
2747
The `tcgen05.fence<before>` orders all prior async tcgen05 operations
@@ -2765,7 +2763,7 @@ def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence"> {
2765
2763
}];
2766
2764
}
2767
2765
2768
- def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait"> {
2766
+ def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait", [NVVMRequiresSM<100, "true", "false">] > {
2769
2767
let summary = "Tcgen05 wait operations";
2770
2768
let description = [{
2771
2769
The `tcgen05.wait<load>` causes the executing thread to block until
@@ -2787,7 +2785,7 @@ def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait"> {
2787
2785
}];
2788
2786
}
2789
2787
2790
- def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit"> {
2788
+ def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit", [NVVMRequiresSM<100, "true", "false">] > {
2791
2789
let summary = "Tcgen05 commit operations";
2792
2790
let description = [{
2793
2791
The `tcgen05.commit` makes the mbarrier object, specified by
@@ -2825,7 +2823,7 @@ def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit"> {
2825
2823
}];
2826
2824
}
2827
2825
2828
- def NVVM_Tcgen05ShiftOp : NVVM_Op<"tcgen05.shift"> {
2826
+ def NVVM_Tcgen05ShiftOp : NVVM_Op<"tcgen05.shift", [NVVMRequiresSM<100, "true", "false">] > {
2829
2827
let summary = "Tcgen05 shift operation";
2830
2828
let description = [{
2831
2829
The `tcgen05.shift` is an asynchronous instruction which initiates
@@ -2891,7 +2889,7 @@ def Tcgen05CpSrcFormatAttr : EnumAttr<NVVM_Dialect, Tcgen05CpSrcFormat, "tcgen05
2891
2889
let assemblyFormat = "`<` $value `>`";
2892
2890
}
2893
2891
2894
- def NVVM_Tcgen05CpOp : NVVM_Op<"tcgen05.cp"> {
2892
+ def NVVM_Tcgen05CpOp : NVVM_Op<"tcgen05.cp", [NVVMRequiresSM<100, "true", "false">] > {
2895
2893
let summary = "Tcgen05 copy operation";
2896
2894
let description = [{
2897
2895
Instruction tcgen05.cp initiates an asynchronous copy operation from
@@ -2961,7 +2959,7 @@ def Tcgen05LdStShapeAttr: EnumAttr<NVVM_Dialect, Tcgen05LdStShape, "tcgen05_ldst
2961
2959
// NVVM tcgen05.ld Op
2962
2960
//===----------------------------------------------------------------------===//
2963
2961
2964
- def NVVM_Tcgen05LdOp : NVVM_Op<"tcgen05.ld"> {
2962
+ def NVVM_Tcgen05LdOp : NVVM_Op<"tcgen05.ld", [NVVMRequiresSM<100, "true", "false">] > {
2965
2963
let summary = "tensor memory load instructions";
2966
2964
let arguments = (ins
2967
2965
// Attributes
@@ -3051,7 +3049,7 @@ def NVVM_Tcgen05LdOp : NVVM_Op<"tcgen05.ld"> {
3051
3049
// NVVM tcgen05.st Op
3052
3050
//===----------------------------------------------------------------------===//
3053
3051
3054
- def NVVM_Tcgen05StOp : NVVM_Op<"tcgen05.st"> {
3052
+ def NVVM_Tcgen05StOp : NVVM_Op<"tcgen05.st", [NVVMRequiresSM<100, "true", "false">] > {
3055
3053
let summary = "tensor memory store instructions";
3056
3054
let arguments = (ins
3057
3055
// Attributes
0 commit comments