@@ -514,7 +514,7 @@ def NVVM_SyncWarpOp :
514
514
let assemblyFormat = "$mask attr-dict `:` type($mask)";
515
515
}
516
516
517
- // https://docs.nvidia.com/cuda/parallel-thread-execution/#id62
517
+
518
518
def LoadCacheModifierCA : I32EnumAttrCase<"CA", 0, "ca">;
519
519
def LoadCacheModifierCG : I32EnumAttrCase<"CG", 1, "cg">;
520
520
def LoadCacheModifierCS : I32EnumAttrCase<"CS", 2, "cs">;
@@ -528,6 +528,11 @@ def LoadCacheModifierKind : I32EnumAttr<"LoadCacheModifierKind",
528
528
LoadCacheModifierLU, LoadCacheModifierCV]> {
529
529
let genSpecializedAttr = 0;
530
530
let cppNamespace = "::mlir::NVVM";
531
+ let description = [{
532
+ Enum attribute of the different kinds of cache operators for load instructions.
533
+
534
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#id62)
535
+ }];
531
536
}
532
537
533
538
def LoadCacheModifierAttr : EnumAttr<NVVM_Dialect, LoadCacheModifierKind, "load_cache_modifier">;
@@ -1436,8 +1441,8 @@ def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned",
1436
1441
let description = [{
1437
1442
Enforce an ordering of register accesses between warpgroup level matrix
1438
1443
multiplication and other operations.
1439
- See for more information:
1440
- https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions-wgmma-fence
1444
+
1445
+ [For more information, see PTX ISA]( https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions-wgmma-fence)
1441
1446
}];
1442
1447
let assemblyFormat = "attr-dict";
1443
1448
let extraClassDefinition = [{
@@ -1451,8 +1456,8 @@ def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned",
1451
1456
let assemblyFormat = "attr-dict";
1452
1457
let description = [{
1453
1458
Commits all prior uncommitted warpgroup level matrix multiplication operations.
1454
- See for more information:
1455
- https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions-wgmma-commit-group
1459
+
1460
+ [For more information, see PTX ISA]( https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions-wgmma-commit-group)
1456
1461
}];
1457
1462
let extraClassDefinition = [{
1458
1463
std::string $cppClass::getPtx() { return std::string("wgmma.commit_group.sync.aligned;"); }
@@ -1465,8 +1470,8 @@ def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned",
1465
1470
let assemblyFormat = "attr-dict $group";
1466
1471
let description = [{
1467
1472
Signal the completion of a preceding warpgroup operation.
1468
- See for more information:
1469
- https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions-wgmma-wait-group
1473
+
1474
+ [For more information, see PTX ISA]( https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions-wgmma-wait-group)
1470
1475
}];
1471
1476
let extraClassDefinition = [{
1472
1477
std::string $cppClass::getPtx() { return std::string("wgmma.wait_group.sync.aligned %0;"); }
@@ -1603,8 +1608,8 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async",
1603
1608
|--------------|--------------|------------|--------------|---------------|
1604
1609
```
1605
1610
1606
- See for more information:
1607
- https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions
1611
+
1612
+ [For more information, see PTX ISA]( https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions)
1608
1613
}];
1609
1614
1610
1615
let hasVerifier = 1;
0 commit comments