Skip to content

Commit cdd2698

Browse files
committed
Add to convert-gpu-to-rocdl
1 parent 1b500ef commit cdd2698

File tree

2 files changed

+4
-1
lines changed

2 files changed

+4
-1
lines changed

mlir/include/mlir/Conversion/Passes.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,7 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> {
601601
let constructor = "mlir::createLowerGpuOpsToROCDLOpsPass()";
602602
let dependentDialects = [
603603
"ROCDL::ROCDLDialect",
604+
"amdgpu::AMDGPUDialect",
604605
"cf::ControlFlowDialect",
605606
"memref::MemRefDialect",
606607
];
@@ -1415,7 +1416,7 @@ def ConvertVectorToLLVMPass : Pass<"convert-vector-to-llvm"> {
14151416
"bool", /*default=*/"false",
14161417
"Use the preferred alignment of a vector type in load/store "
14171418
"operations instead of the alignment of the element type of the "
1418-
"memref. This flag is intended for use with hardware which requires"
1419+
"memref. This flag is intended for use with hardware which requires"
14191420
"vector alignment, or in application contexts where it is known all "
14201421
"vector access are naturally aligned. ">,
14211422
Option<"amx", "enable-amx",

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
2828
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
2929
#include "mlir/Conversion/MathToROCDL/MathToROCDL.h"
30+
#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
3031
#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
3132
#include "mlir/Dialect/Func/IR/FuncOps.h"
3233
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -319,6 +320,7 @@ struct LowerGpuOpsToROCDLOpsPass final
319320
RewritePatternSet patterns(ctx);
320321
populateGpuRewritePatterns(patterns);
321322
arith::populateExpandBFloat16Patterns(patterns);
323+
populateGpuPromoteShuffleToAMDGPUPatterns(patterns);
322324
(void)applyPatternsGreedily(m, std::move(patterns));
323325
}
324326

0 commit comments

Comments
 (0)