Skip to content

Commit 22a32f7

Browse files
committed
[mlir][gpu] Add dump-ptx option
When targeting NVIDIA GPUs, seeing the generated PTX is important. Currently, we don't have simple way to do it. This work adds dump-ptx to gpu-to-cubin pass. One can use it like `gpu-to-cubin{chip=sm_90 features=+ptx80 dump-ptx}`. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D155166
1 parent 6b7805f commit 22a32f7

File tree

3 files changed

+32
-5
lines changed

3 files changed

+32
-5
lines changed

mlir/include/mlir/Dialect/GPU/Transforms/Passes.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
117117
*this, "gpu-binary-annotation",
118118
llvm::cl::desc("Annotation attribute string for GPU binary"),
119119
llvm::cl::init(getDefaultGpuBinaryAnnotation())};
120+
Option<bool> dumpPtx{*this, "dump-ptx",
121+
::llvm::cl::desc("Dump generated PTX"),
122+
llvm::cl::init(false)};
120123
};
121124
} // namespace gpu
122125

@@ -137,7 +140,8 @@ void registerGpuSerializeToHsacoPass();
137140
std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
138141
StringRef chip,
139142
StringRef features,
140-
int optLevel = 2);
143+
int optLevel = 2,
144+
bool dumpPtx = false);
141145

142146
/// Create an instance of the GPU kernel function to HSAco binary serialization
143147
/// pass.

mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "mlir/Dialect/GPU/Transforms/Passes.h"
15+
#include "llvm/Support/Debug.h"
1516

1617
#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
1718
#include "mlir/Pass/Pass.h"
@@ -50,7 +51,7 @@ class SerializeToCubinPass
5051

5152
SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",
5253
StringRef chip = "sm_35", StringRef features = "+ptx60",
53-
int optLevel = 2);
54+
int optLevel = 2, bool dumpPtx = false);
5455

5556
StringRef getArgument() const override { return "gpu-to-cubin"; }
5657
StringRef getDescription() const override {
@@ -73,10 +74,12 @@ static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {
7374
}
7475

7576
SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,
76-
StringRef features, int optLevel) {
77+
StringRef features, int optLevel,
78+
bool dumpPtx) {
7779
maybeSetOption(this->triple, triple);
7880
maybeSetOption(this->chip, chip);
7981
maybeSetOption(this->features, features);
82+
this->dumpPtx = dumpPtx;
8083
if (this->optLevel.getNumOccurrences() == 0)
8184
this->optLevel.setValue(optLevel);
8285
}
@@ -112,6 +115,10 @@ SerializeToCubinPass::serializeISA(const std::string &isa) {
112115
&linkState));
113116

114117
auto kernelName = getOperation().getName().str();
118+
if (dumpPtx) {
119+
llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n";
120+
llvm::dbgs() << isa << "\n";
121+
}
115122
RETURN_ON_CUDA_ERROR(cuLinkAddData(
116123
linkState, CUjitInputType::CU_JIT_INPUT_PTX,
117124
const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(),
@@ -151,9 +158,10 @@ void mlir::registerGpuSerializeToCubinPass() {
151158
std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,
152159
StringRef arch,
153160
StringRef features,
154-
int optLevel) {
161+
int optLevel,
162+
bool dumpPtx) {
155163
return std::make_unique<SerializeToCubinPass>(triple, arch, features,
156-
optLevel);
164+
optLevel, dumpPtx);
157165
}
158166

159167
#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// RUN: mlir-opt %s \
2+
// RUN: | mlir-opt -gpu-kernel-outlining \
3+
// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{dump-ptx}))' \
4+
// RUN: 2>&1 | FileCheck %s
5+
6+
// CHECK: Generated by LLVM NVPTX Back-End
7+
// CHECK: .visible .func kernel_a()
8+
// CHECK: ret;
9+
10+
gpu.module @bar {
11+
llvm.func @kernel_a()
12+
attributes { gpu.kernel } {
13+
llvm.return
14+
}
15+
}

0 commit comments

Comments
 (0)