Skip to content

Commit 641e05d

Browse files
authored
[mlir][gpu] Support dynamic_shared_memory Op with vector dialect (#74475)
`gpu.dynamic_shared_memory` currently does not get lowered when it is used with vector dialect. The reason is that vector-to-llvm conversion is not included in gpu-to-nvvm. This PR includes that and adds a test.
1 parent ea4ce16 commit 641e05d

File tree

2 files changed

+22
-0
lines changed

2 files changed

+22
-0
lines changed

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
2222
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
2323
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
24+
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
2425
#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
2526
#include "mlir/Dialect/Func/IR/FuncOps.h"
2627
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -282,6 +283,7 @@ struct LowerGpuOpsToNVVMOpsPass
282283
populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
283284
populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
284285
populateGpuWMMAToNVVMConversionPatterns(converter, llvmPatterns);
286+
populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
285287
if (this->hasRedux)
286288
populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns);
287289
LLVMConversionTarget target(getContext());

mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,26 @@ gpu.module @kernel {
4949

5050
// -----
5151

52+
gpu.module @kernel {
53+
gpu.func @dynamic_shmem_with_vector(%arg1: memref<1xf32>) {
54+
%0 = arith.constant 0 : index
55+
%1 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
56+
%2 = memref.view %1[%0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<1xf32, #gpu.address_space<workgroup>>
57+
%3 = vector.load %2[%0] : memref<1xf32, #gpu.address_space<workgroup>>, vector<1xf32>
58+
vector.store %3, %arg1[%0] : memref<1xf32>, vector<1xf32>
59+
gpu.return
60+
}
61+
}
62+
63+
// ROCDL: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32} : !llvm.array<0 x i8>
64+
// NVVM: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
65+
// CHECK-LABEL: llvm.func @dynamic_shmem_with_vector
66+
// CHECK: llvm.mlir.addressof @__dynamic_shmem__0 : !llvm.ptr<3>
67+
// CHECK: llvm.load %{{.*}} {alignment = 4 : i64} : !llvm.ptr<3> -> vector<1xf32>
68+
// CHECK: llvm.store
69+
70+
// -----
71+
5272
gpu.module @kernel {
5373
gpu.func @dynamic_shmem(%arg0: f32) {
5474
%0 = arith.constant 0 : index

0 commit comments

Comments
 (0)