Skip to content

Commit 4cba595

Browse files
authored
[mlir][ROCDL] Set the LLVM data layout when lowering to ROCDL LLVM (#74501)
In order to ensure operations lower correctly (especially memref.addrspacecast, which relies on the data layout benig set correctly then dealing with dynamic memrefs) and to prevent compilation issues later down the line, set the `llvm.data_layout` attribute on GPU modules when lowering their contents to a ROCDL / AMDGPU target. If there's a good way to test the embedded string to prevent it from going out of sync with the LLVM TargetMachine, I'd appreciate hearing about it. (Or, alternatively, if there's a place I could farctor the string out to).
1 parent d612d59 commit 4cba595

File tree

2 files changed

+22
-0
lines changed

2 files changed

+22
-0
lines changed

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ Value getLaneId(ConversionPatternRewriter &rewriter, Location loc,
7575
ValueRange{minus1, mbcntLo});
7676
return laneId;
7777
}
78+
static constexpr StringLiteral amdgcnDataLayout =
79+
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
80+
"-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:"
81+
"128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
82+
"G1-ni:7:8";
7883

7984
namespace {
8085
struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
@@ -212,6 +217,12 @@ struct LowerGpuOpsToROCDLOpsPass
212217
gpu::GPUModuleOp m = getOperation();
213218
MLIRContext *ctx = m.getContext();
214219

220+
auto llvmDataLayout = m->getAttrOfType<StringAttr>(
221+
LLVM::LLVMDialect::getDataLayoutAttrName());
222+
if (!llvmDataLayout) {
223+
llvmDataLayout = StringAttr::get(ctx, amdgcnDataLayout);
224+
m->setAttr(LLVM::LLVMDialect::getDataLayoutAttrName(), llvmDataLayout);
225+
}
215226
// Request C wrapper emission.
216227
for (auto func : m.getOps<func::FuncOp>()) {
217228
func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
@@ -227,6 +238,7 @@ struct LowerGpuOpsToROCDLOpsPass
227238
/// Customize the bitwidth used for the device side index computations.
228239
LowerToLLVMOptions options(
229240
ctx, DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
241+
options.dataLayout = llvm::DataLayout(llvmDataLayout.getValue());
230242
if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
231243
options.overrideIndexBitwidth(indexBitwidth);
232244

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s
22
// RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s
33

4+
// CHECK-LABEL: @test_module
5+
// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
46
gpu.module @test_module {
57
// CHECK-LABEL: func @gpu_index_ops()
68
// CHECK32-LABEL: func @gpu_index_ops()
@@ -628,3 +630,11 @@ gpu.module @test_module {
628630
func.return %shfl, %shfli : f32, f32
629631
}
630632
}
633+
634+
// -----
635+
636+
// CHECK-LABEL: @test_custom_data_layout
637+
// CHECK-SAME: llvm.data_layout = "e"
638+
gpu.module @test_custom_data_layout attributes {llvm.data_layout = "e"} {
639+
640+
}

0 commit comments

Comments
 (0)