-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][rocdl] Add AMDGPU-specific cf.assert
lowering
#121067
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][rocdl] Add AMDGPU-specific cf.assert
lowering
#121067
Conversation
@llvm/pr-subscribers-mlir-gpu @llvm/pr-subscribers-mlir Author: Maksim Levental (makslevental) ChangesThis commit adds an AMD-specific lowering of Note, the reason for lowering to Full diff: https://github.com/llvm/llvm-project/pull/121067.diff 2 Files Affected:
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index aaf00e51f49416..2b0d16a5defb1f 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -31,6 +31,7 @@
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
@@ -195,6 +196,75 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
}
};
+/// Based on
+/// mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp#AssertOpToAssertfailLowering
+/// Lowering of cf.assert into a conditional llvm.intr.trap plus gpu.printf with
+/// the metadata (filename, fileline, assert msg).
+struct AssertOpToBuiltinTrapLowering
+ : public ConvertOpToLLVMPattern<cf::AssertOp> {
+ using ConvertOpToLLVMPattern<cf::AssertOp>::ConvertOpToLLVMPattern;
+
+ LogicalResult
+ matchAndRewrite(cf::AssertOp assertOp, cf::AssertOpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ Location loc = assertOp.getLoc();
+
+ // Split blocks and insert conditional branch.
+ // ^before:
+ // ...
+ // cf.cond_br %condition, ^after, ^assert
+ // ^assert:
+ // cf.assert
+ // cf.br ^after
+ // ^after:
+ // ...
+ Block *beforeBlock = assertOp->getBlock();
+ Block *assertBlock =
+ rewriter.splitBlock(beforeBlock, assertOp->getIterator());
+ Block *afterBlock =
+ rewriter.splitBlock(assertBlock, ++assertOp->getIterator());
+ rewriter.setInsertionPointToEnd(beforeBlock);
+ rewriter.create<cf::CondBranchOp>(loc, adaptor.getArg(), afterBlock,
+ assertBlock);
+ rewriter.setInsertionPointToEnd(assertBlock);
+ rewriter.create<cf::BranchOp>(loc, afterBlock);
+
+ // Continue cf.assert lowering.
+ rewriter.setInsertionPoint(assertOp);
+
+ // Populate file name, file number and function name from the location of
+ // the AssertOp.
+ StringRef fileName = "(unknown)";
+ StringRef funcName = "(unknown)";
+ int32_t fileLine = 0;
+ if (auto fileLineColLoc = dyn_cast<FileLineColRange>(loc)) {
+ fileName = fileLineColLoc.getFilename().strref();
+ fileLine = fileLineColLoc.getStartLine();
+ } else if (auto nameLoc = dyn_cast<NameLoc>(loc)) {
+ funcName = nameLoc.getName().strref();
+ if (auto fileLineColLoc =
+ dyn_cast<FileLineColRange>(nameLoc.getChildLoc())) {
+ fileName = fileLineColLoc.getFilename().strref();
+ fileLine = fileLineColLoc.getStartLine();
+ }
+ }
+
+ Value assertLine =
+ rewriter.create<LLVM::ConstantOp>(loc, rewriter.getI32Type(), fileLine);
+ // interpolate the fmt str AOT because current gpu.printf lowering doesn't
+ // handle %s
+ llvm::Twine fmtStr = fileName + ":%u: " + funcName +
+ " Device-side assertion `" + assertOp.getMsg() +
+ "' failed.\n";
+ rewriter.create<gpu::PrintfOp>(assertOp.getLoc(),
+ rewriter.getStringAttr(fmtStr),
+ ValueRange{assertLine});
+ rewriter.replaceOpWithNewOp<LLVM::Trap>(assertOp);
+
+ return success();
+ }
+};
+
/// Import the GPU Ops to ROCDL Patterns.
#include "GPUToROCDL.cpp.inc"
@@ -297,7 +367,7 @@ struct LowerGpuOpsToROCDLOpsPass
populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
populateMathToLLVMConversionPatterns(converter, llvmPatterns);
cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
- cf::populateAssertToLLVMConversionPattern(converter, llvmPatterns);
+ llvmPatterns.add<AssertOpToBuiltinTrapLowering>(converter);
populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
populateGpuToROCDLConversionPatterns(converter, llvmPatterns, runtime);
diff --git a/mlir/test/Integration/GPU/ROCM/assert.mlir b/mlir/test/Integration/GPU/ROCM/assert.mlir
new file mode 100644
index 00000000000000..0c292d1b02473e
--- /dev/null
+++ b/mlir/test/Integration/GPU/ROCM/assert.mlir
@@ -0,0 +1,37 @@
+// RUN: mlir-opt %s -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
+// RUN: | mlir-cpu-runner \
+// RUN: --shared-libs=%mlir_cuda_runtime \
+// RUN: --shared-libs=%mlir_runner_utils \
+// RUN: --entry-point-result=void 2>&1 \
+// RUN: | FileCheck %s
+
+// CHECK-DAG: thread 0: print after passing assertion
+// CHECK-DAG: thread 1: print after passing assertion
+// CHECK-DAG: mlir/test/Integration/GPU/ROCM/assert.mlir:{{.*}}: (unknown) Device-side assertion `failing assertion' failed.
+// CHECK-DAG: mlir/test/Integration/GPU/ROCM/assert.mlir:{{.*}}: (unknown) Device-side assertion `failing assertion' failed.
+// CHECK-NOT: print after failing assertion
+
+module attributes {gpu.container_module} {
+gpu.module @kernels {
+gpu.func @test_assert(%c0: i1, %c1: i1) kernel {
+ %0 = gpu.thread_id x
+ cf.assert %c1, "passing assertion"
+ gpu.printf "thread %lld: print after passing assertion\n" %0 : index
+ cf.assert %c0, "failing assertion"
+ gpu.printf "thread %lld: print after failing assertion\n" %0 : index
+ gpu.return
+}
+}
+
+func.func @main() {
+ %c2 = arith.constant 2 : index
+ %c1 = arith.constant 1 : index
+ %c0_i1 = arith.constant 0 : i1
+ %c1_i1 = arith.constant 1 : i1
+ gpu.launch_func @kernels::@test_assert
+ blocks in (%c1, %c1, %c1)
+ threads in (%c2, %c1, %c1)
+ args(%c0_i1 : i1, %c1_i1 : i1)
+ return
+}
+}
|
a3ba62c
to
b8afb04
Compare
b8afb04
to
79c96cb
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- Approved
- Wrt OpenCL ... I hope legalization didn't get broken, but in the OpenCL flow, pryntf should lower to ...
printf()
, which the compiler will handle. Or at least that's my recollection of how that goes from staring at the AMDGPU backend ~a year ago
It's complicated, there's like two different flows that either lower |
Re OpenCL and the linked test: we don't have an integration test for OpenCL's lowering because we don't have a way to execute it |
8865986
to
86f4eff
Compare
86f4eff
to
dbd81f3
Compare
llvm:users/matthias-springer/gpu_assert was deleted when I merged my PR. Looks like this cannot be reopened, I think you have to create a new PR. Or maybe you can change the target branch and then reopen. |
This commit adds an AMD-specific lowering of
cf.assert
tollvm.intr.trap
. It depends on (and is therefore merging into) #120431.Note, the reason for lowering to
llvm.intr.trap
instead of__assertfail
/__assert_fail
is because these are header only functions in HIP.Note also, this currently doesn't work with
runtime=OpenCL
because thegpu.printf
isn't legalized on that path (ever? adjacent printf.mlir doesn't lower forOpenCL
either...).cc @danzimm @SamGinzburg