Skip to content

Commit bd6568c

Browse files
[MLIR][GPU] Add gpu.cluster_dim_blocks and gpu.cluster_block_id Ops (#95245)
This commit adds support for `gpu.cluster_dim_blocks` and `gpu.cluster_block_id` Ops to represent number of blocks per cluster and block id inside a cluster respectively. Also, fixed the description of `gpu.cluster_dim` Op and updated the `cga_cluster.mlir` test file to use `gpu.cluster_dim_blocks` Co-authored-by: pradeepku <[email protected]> Co-authored-by: Guray Ozen <[email protected]>
1 parent d66084b commit bd6568c

File tree

5 files changed

+50
-7
lines changed

5 files changed

+50
-7
lines changed

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class GPU_IndexOp<string mnemonic, list<Trait> traits = []> :
7070

7171
def GPU_ClusterDimOp : GPU_IndexOp<"cluster_dim"> {
7272
let description = [{
73-
Returns the number of thread blocks in the cluster along
73+
Returns the number of cluster identifiers per grid along
7474
the x, y, or z `dimension`.
7575

7676
Example:
@@ -81,6 +81,19 @@ def GPU_ClusterDimOp : GPU_IndexOp<"cluster_dim"> {
8181
}];
8282
}
8383

84+
def GPU_ClusterDimBlocksOp : GPU_IndexOp<"cluster_dim_blocks"> {
85+
let description = [{
86+
Returns the number of thread blocks in the cluster along
87+
the x, y, or z `dimension`.
88+
89+
Example:
90+
91+
```mlir
92+
%cDimBlocksX = gpu.cluster_dim_blocks x
93+
```
94+
}];
95+
}
96+
8497
def GPU_ClusterIdOp : GPU_IndexOp<"cluster_id"> {
8598
let description = [{
8699
Returns the cluster id, i.e. the index of the current cluster within the
@@ -94,6 +107,18 @@ def GPU_ClusterIdOp : GPU_IndexOp<"cluster_id"> {
94107
}];
95108
}
96109

110+
def GPU_ClusterBlockIdOp : GPU_IndexOp<"cluster_block_id"> {
111+
let description = [{
112+
Returns the block id within the cluster along the x, y, or z `dimension`.
113+
114+
Example:
115+
116+
```mlir
117+
%cBlockIdY = gpu.cluster_block_id y
118+
```
119+
}];
120+
}
121+
97122
def GPU_BlockDimOp : GPU_IndexOp<"block_dim"> {
98123
let description = [{
99124
Returns the number of threads in the thread block (aka the block size) along

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,9 @@ def NVVM_ClusterDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nclusterid.z">;
160160
def NVVM_BlockInClusterIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.x">;
161161
def NVVM_BlockInClusterIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.y">;
162162
def NVVM_BlockInClusterIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.z">;
163-
def NVVM_GridInClusterDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.x">;
164-
def NVVM_GridInClusterDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.y">;
165-
def NVVM_GridInClusterDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.z">;
163+
def NVVM_ClusterDimBlocksXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.x">;
164+
def NVVM_ClusterDimBlocksYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.y">;
165+
def NVVM_ClusterDimBlocksZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.z">;
166166

167167
//===----------------------------------------------------------------------===//
168168
// CTA index and across Cluster dimensions

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,14 @@ void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
342342
NVVM::BlockDimYOp, NVVM::BlockDimZOp>,
343343
GPUIndexIntrinsicOpLowering<gpu::ClusterIdOp, NVVM::ClusterIdXOp,
344344
NVVM::ClusterIdYOp, NVVM::ClusterIdZOp>,
345+
GPUIndexIntrinsicOpLowering<
346+
gpu::ClusterBlockIdOp, NVVM::BlockInClusterIdXOp,
347+
NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>,
345348
GPUIndexIntrinsicOpLowering<gpu::ClusterDimOp, NVVM::ClusterDimXOp,
346349
NVVM::ClusterDimYOp, NVVM::ClusterDimZOp>,
350+
GPUIndexIntrinsicOpLowering<
351+
gpu::ClusterDimBlocksOp, NVVM::ClusterDimBlocksXOp,
352+
NVVM::ClusterDimBlocksYOp, NVVM::ClusterDimBlocksZOp>,
347353
GPUIndexIntrinsicOpLowering<gpu::BlockIdOp, NVVM::BlockIdXOp,
348354
NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
349355
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,

mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@ static std::optional<uint64_t> getKnownLaunchDim(Op op, LaunchDims type) {
8686

8787
void ClusterDimOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
8888
SetIntRangeFn setResultRange) {
89+
uint64_t max = APInt::getMaxValue(64).getZExtValue();
90+
setResultRange(getResult(), getIndexRange(1, max));
91+
}
92+
93+
void ClusterDimBlocksOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
94+
SetIntRangeFn setResultRange) {
8995
setResultRange(getResult(), getIndexRange(1, kMaxClusterDim));
9096
}
9197

@@ -95,6 +101,12 @@ void ClusterIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
95101
setResultRange(getResult(), getIndexRange(0, max - 1ULL));
96102
}
97103

104+
void ClusterBlockIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
105+
SetIntRangeFn setResultRange) {
106+
uint64_t max = kMaxClusterDim;
107+
setResultRange(getResult(), getIndexRange(0, max - 1ULL));
108+
}
109+
98110
void BlockDimOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
99111
SetIntRangeFn setResultRange) {
100112
std::optional<uint64_t> knownVal =

mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ module attributes {gpu.container_module} {
2222
%cidX = gpu.cluster_id x
2323
%cidY = gpu.cluster_id y
2424
%cidZ = gpu.cluster_id z
25-
%cdimX = gpu.cluster_dim x
26-
%cdimY = gpu.cluster_dim y
27-
%cdimZ = gpu.cluster_dim z
25+
%cdimX = gpu.cluster_dim_blocks x
26+
%cdimY = gpu.cluster_dim_blocks y
27+
%cdimZ = gpu.cluster_dim_blocks z
2828
%bidX = gpu.block_id x
2929
%bidY = gpu.block_id y
3030
%bidZ = gpu.block_id z

0 commit comments

Comments
 (0)