@@ -68,7 +68,7 @@ def GPU_ClusterDimOp : GPU_IndexOp<"cluster_dim"> {
68
68
69
69
def GPU_ClusterIdOp : GPU_IndexOp<"cluster_id"> {
70
70
let description = [{
71
- Returns the cluster id, i.e. the index of the current cluster within the
71
+ Returns the cluster id, i.e. the index of the current cluster within the
72
72
grid along the x, y, or z `dimension`.
73
73
74
74
Example:
@@ -462,23 +462,23 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
462
462
def GPU_DynamicSharedMemoryOp : GPU_Op<"dynamic_shared_memory", [Pure]>
463
463
{
464
464
let summary = "Get the memref for dynamic shared memory";
465
-
465
+
466
466
let description = [{
467
- This operation provides a memref pointer to the start of dynamic shared
467
+ This operation provides a memref pointer to the start of dynamic shared
468
468
memory, often referred to as workgroup memory. It's important to note that
469
- this dynamic shared memory needs to be allocated at kernel launch. One can
470
- conveniently utilize `the dynamic_shared_memory_size` parameter of
469
+ this dynamic shared memory needs to be allocated at kernel launch. One can
470
+ conveniently utilize `the dynamic_shared_memory_size` parameter of
471
471
`gpu.launch` for this purpose.
472
-
473
- Examples:
474
- ```mlir
472
+
473
+ Examples:
474
+ ```mlir
475
475
%0 = gpu.dynamic.shared.memory : memref<?xi8, #gpu.address_space<workgroup>>
476
- %1 = memref.view %0[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>>
476
+ %1 = memref.view %0[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>>
477
477
to memref<32x64xf32, #gpu.address_space<workgroup>>
478
- %2 = memref.view %0[%c16384][] : memref<?xi8, #gpu.address_space<workgroup>>
478
+ %2 = memref.view %0[%c16384][] : memref<?xi8, #gpu.address_space<workgroup>>
479
479
to memref<32x64xf32, #gpu.address_space<workgroup>>
480
480
```
481
- }];
481
+ }];
482
482
let arguments = (ins);
483
483
let results = (outs Arg<MemRefRankOf<[I8], [1]>>:$resultMemref);
484
484
let assemblyFormat = [{ attr-dict `:` type($resultMemref) }];
@@ -493,11 +493,11 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
493
493
"blockSizeY", "blockSizeZ"]>]>,
494
494
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
495
495
SymbolRefAttr:$kernel,
496
- LaunchIndx:$gridSizeX,
497
- LaunchIndx:$gridSizeY,
496
+ LaunchIndx:$gridSizeX,
497
+ LaunchIndx:$gridSizeY,
498
498
LaunchIndx:$gridSizeZ,
499
- LaunchIndx:$blockSizeX,
500
- LaunchIndx:$blockSizeY,
499
+ LaunchIndx:$blockSizeX,
500
+ LaunchIndx:$blockSizeY,
501
501
LaunchIndx:$blockSizeZ,
502
502
Optional<LaunchIndx>:$clusterSizeX,
503
503
Optional<LaunchIndx>:$clusterSizeY,
@@ -539,10 +539,10 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
539
539
The remaining operands if present are passed as arguments to the kernel
540
540
function.
541
541
542
- The `gpu.launch_func` also supports kernel launching with clusters if
543
- supported by the target architecture. The cluster size can be set by
544
- `clusterSizeX`, `clusterSizeY`, and `clusterSizeZ` arguments. When these
545
- arguments are present, the Op launches a kernel that clusters the given
542
+ The `gpu.launch_func` also supports kernel launching with clusters if
543
+ supported by the target architecture. The cluster size can be set by
544
+ `clusterSizeX`, `clusterSizeY`, and `clusterSizeZ` arguments. When these
545
+ arguments are present, the Op launches a kernel that clusters the given
546
546
thread blocks. This feature is exclusive to certain architectures.
547
547
548
548
Example:
@@ -593,7 +593,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
593
593
async // (Optional) Don't block host, return token.
594
594
[%t0] // (Optional) Execute only after %t0 has completed.
595
595
@kernels::@kernel_1 // Kernel function.
596
- clusters in (%cst, %cst, %cst) // (Optional) Cluster size only for support architectures.
596
+ clusters in (%cst, %cst, %cst) // (Optional) Cluster size only for support architectures.
597
597
blocks in (%cst, %cst, %cst) // Grid size.
598
598
threads in (%cst, %cst, %cst) // Block size.
599
599
dynamic_shared_memory_size %s // (Optional) Amount of dynamic shared
@@ -659,7 +659,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
659
659
let assemblyFormat = [{
660
660
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
661
661
(`<` $asyncObject^ `:` type($asyncObject) `>`)?
662
- $kernel
662
+ $kernel
663
663
( `clusters` `in` ` ` `(` $clusterSizeX^ `,` $clusterSizeY `,` $clusterSizeZ `)` )?
664
664
`blocks` `in` ` ` `(` $gridSizeX `,` $gridSizeY `,` $gridSizeZ `)`
665
665
`threads` `in` ` ` `(` $blockSizeX `,` $blockSizeY `,` $blockSizeZ `)`
0 commit comments