Skip to content

Commit a8b5115

Browse files
authored
[MLIR][NVGPU] Fix the cga_cluster.mlir test (#112191)
This patch fixes the sm90 cluster test by: * Fixing a typo in LowerGpuOpsToNVVMOps where one of the ClusterDim Op conversion pattern should actually be for the ClusterDimBlocks Op. This addresses the compilation error for this test. * The grid-size should be (4,4,1) instead of (2,2,1). This passes the scf-if check against the threshold of 3 below and actually generates the required prints from the GPU. Signed-off-by: Durgadoss R <[email protected]>
1 parent ddb64e6 commit a8b5115

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,9 @@ void mlir::populateGpuToNVVMConversionPatterns(
373373
NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>>(
374374
converter, IndexKind::Other, IntrType::Id);
375375
patterns.add<gpu::index_lowering::OpLowering<
376-
gpu::ClusterDimOp, NVVM::ClusterDimXOp, NVVM::ClusterDimYOp,
377-
NVVM::ClusterDimZOp>>(converter, IndexKind::Other, IntrType::Dim);
376+
gpu::ClusterDimBlocksOp, NVVM::ClusterDimBlocksXOp,
377+
NVVM::ClusterDimBlocksYOp, NVVM::ClusterDimBlocksZOp>>(
378+
converter, IndexKind::Other, IntrType::Dim);
378379
patterns.add<gpu::index_lowering::OpLowering<
379380
gpu::BlockIdOp, NVVM::BlockIdXOp, NVVM::BlockIdYOp, NVVM::BlockIdZOp>>(
380381
converter, IndexKind::Grid, IntrType::Id);

mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ module attributes {gpu.container_module} {
1818
return
1919
}
2020
gpu.module @gpumodule {
21-
gpu.func @kernel_cluster() kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 1>} {
21+
gpu.func @kernel_cluster() kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 4, 1>} {
2222
%cidX = gpu.cluster_id x
2323
%cidY = gpu.cluster_id y
2424
%cidZ = gpu.cluster_id z

0 commit comments

Comments
 (0)