Skip to content

Commit 3a03da3

Browse files
authored
[mlir][nvgpu] Add address space attribute converter in nvgpu-to-nvvm pass (#74075)
GPU dialect has `#gpu.address_space<workgroup>` for shared memory of NVGPU (address space =3). Howeverm when IR combine NVGPU and GPU dialect, `nvgpu-to-nvvm` pass fails due to missing attribute conversion. This PR adds `populateGpuMemorySpaceAttributeConversions` to nvgou-to-nvvm lowering, so we can use `#gpu.address_space<workgroup>` `nvgpu-to-nvvm` pass
1 parent 0c568c2 commit 3a03da3

File tree

3 files changed

+44
-0
lines changed

3 files changed

+44
-0
lines changed

mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,21 @@ struct ConvertNVGPUToNVVMPass
405405
RewritePatternSet patterns(&getContext());
406406
LLVMTypeConverter converter(&getContext(), options);
407407
IRRewriter rewriter(&getContext());
408+
populateGpuMemorySpaceAttributeConversions(
409+
converter, [](gpu::AddressSpace space) -> unsigned {
410+
switch (space) {
411+
case gpu::AddressSpace::Global:
412+
return static_cast<unsigned>(
413+
NVVM::NVVMMemorySpace::kGlobalMemorySpace);
414+
case gpu::AddressSpace::Workgroup:
415+
return static_cast<unsigned>(
416+
NVVM::NVVMMemorySpace::kSharedMemorySpace);
417+
case gpu::AddressSpace::Private:
418+
return 0;
419+
}
420+
llvm_unreachable("unknown address space enum value");
421+
return 0;
422+
});
408423
/// device-side async tokens cannot be materialized in nvvm. We just
409424
/// convert them to a dummy i32 type in order to easily drop them during
410425
/// conversion.

mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.h"
1010

1111
#include "mlir/Analysis/SliceAnalysis.h"
12+
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
1213
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
1314
#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"
1415
#include "mlir/Dialect/Affine/IR/AffineOps.h"
@@ -51,6 +52,21 @@ void transform::ApplyNVGPUToNVVMConversionPatternsOp::populatePatterns(
5152
/// device-side async tokens cannot be materialized in nvvm. We just
5253
/// convert them to a dummy i32 type in order to easily drop them during
5354
/// conversion.
55+
populateGpuMemorySpaceAttributeConversions(
56+
llvmTypeConverter, [](gpu::AddressSpace space) -> unsigned {
57+
switch (space) {
58+
case gpu::AddressSpace::Global:
59+
return static_cast<unsigned>(
60+
NVVM::NVVMMemorySpace::kGlobalMemorySpace);
61+
case gpu::AddressSpace::Workgroup:
62+
return static_cast<unsigned>(
63+
NVVM::NVVMMemorySpace::kSharedMemorySpace);
64+
case gpu::AddressSpace::Private:
65+
return 0;
66+
}
67+
llvm_unreachable("unknown address space enum value");
68+
return 0;
69+
});
5470
llvmTypeConverter.addConversion(
5571
[&](nvgpu::DeviceAsyncTokenType type) -> Type {
5672
return llvmTypeConverter.convertType(

mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,19 @@ func.func @async_tma_load(%tensorMap1d: !tensorMap1d, %tensorMap2d: !tensorMap2d
666666
func.return
667667
}
668668

669+
// CHECK-LABEL: func @async_tma_load
670+
!tensorMap1dgpuspace = !nvgpu.tensormap.descriptor<tensor = memref<128xf32, #gpu.address_space<workgroup>>, swizzle=none, l2promo = none, oob = nan, interleave = none>
671+
func.func @async_tma_load_gpu_address_space(%tensorMap1d: !tensorMap1dgpuspace,
672+
%buffer1d: memref<128xf32, #gpu.address_space<workgroup>>,
673+
%mbarrier: !mbarrier) {
674+
%c0 = arith.constant 0 : index
675+
%crd0 = arith.constant 0 : index
676+
%crd1 = arith.constant 0 : index
677+
// CHECK: nvvm.cp.async.bulk.tensor.shared.cluster.global %{{.*}}, %{{.*}}, %{{.*}} box[%{{.*}}]
678+
nvgpu.tma.async.load %tensorMap1d[%crd0], %mbarrier[%c0] to %buffer1d : !tensorMap1dgpuspace, !mbarrier -> memref<128xf32,#gpu.address_space<workgroup>>
679+
func.return
680+
}
681+
669682
// CHECK-LABEL: func @async_tma_load_pred
670683
func.func @async_tma_load_pred(%tensorMap1d: !tensorMap1d, %tensorMap2d: !tensorMap2d, %tensorMap3d: !tensorMap3d, %tensorMap4d: !tensorMap4d, %tensorMap5d: !tensorMap5d,
671684
%buffer1d: memref<128xf32,3>,

0 commit comments

Comments
 (0)