Skip to content

Commit 816134b

Browse files
authored
[MLIR] Dump sass (#110227)
This PR dump sass by using nvdiasm
1 parent 097ada2 commit 816134b

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

mlir/lib/Target/LLVM/NVVM/Target.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "llvm/Support/Process.h"
3131
#include "llvm/Support/Program.h"
3232
#include "llvm/Support/TargetSelect.h"
33+
#include "llvm/Support/raw_ostream.h"
3334

3435
#include <cstdlib>
3536

@@ -401,6 +402,26 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
401402
/*MemoryLimit=*/0,
402403
/*ErrMsg=*/&message))
403404
return emitLogError("`ptxas`");
405+
#define DEBUG_TYPE "dump-sass"
406+
LLVM_DEBUG({
407+
std::optional<std::string> nvdisasm = findTool("nvdisasm");
408+
SmallVector<StringRef> nvdisasmArgs(
409+
{StringRef("nvdisasm"), StringRef(cubinFile.first)});
410+
if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
411+
/*Env=*/std::nullopt,
412+
/*Redirects=*/redirects,
413+
/*SecondsToWait=*/0,
414+
/*MemoryLimit=*/0,
415+
/*ErrMsg=*/&message))
416+
return emitLogError("`nvdisasm`");
417+
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
418+
llvm::MemoryBuffer::getFile(logFile->first);
419+
if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
420+
llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
421+
llvm::dbgs().flush();
422+
}
423+
});
424+
#undef DEBUG_TYPE
404425

405426
// Invoke `fatbin`.
406427
message.clear();
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: mlir-opt %s \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=dump-sass \
3+
// RUN: 2>&1 | FileCheck %s
4+
5+
// CHECK: MOV
6+
// CHECK: STG.E
7+
8+
func.func @other_func(%arg0 : f32, %arg1 : memref<?xf32>) {
9+
%cst = arith.constant 1 : index
10+
%c0 = arith.constant 0 : index
11+
%cst2 = memref.dim %arg1, %c0 : memref<?xf32>
12+
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
13+
threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) {
14+
memref.store %arg0, %arg1[%tx] : memref<?xf32>
15+
gpu.terminator
16+
}
17+
return
18+
}

0 commit comments

Comments
 (0)