Skip to content

Reland '[flang][cuda] Add cuf.register_kernel operation' #112389

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -288,4 +288,23 @@ def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
let hasVerifier = 1;
}

def cuf_RegisterKernelOp : cuf_Op<"register_kernel", []> {
let summary = "Register a CUDA kernel";

let arguments = (ins
SymbolRefAttr:$name
);

let assemblyFormat = [{
$name attr-dict
}];

let hasVerifier = 1;

let extraClassDeclaration = [{
mlir::StringAttr getKernelName();
mlir::StringAttr getKernelModuleName();
}];
}

#endif // FORTRAN_DIALECT_CUF_CUF_OPS
1 change: 1 addition & 0 deletions flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ add_flang_library(CUFDialect
FIRDialect
FIRDialectSupport
MLIRIR
MLIRGPUDialect
MLIRTargetLLVMIRExport

LINK_COMPONENTS
Expand Down
37 changes: 37 additions & 0 deletions flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinOps.h"
Expand Down Expand Up @@ -253,6 +254,42 @@ llvm::LogicalResult cuf::KernelOp::verify() {
return mlir::success();
}

//===----------------------------------------------------------------------===//
// RegisterKernelOp
//===----------------------------------------------------------------------===//

mlir::StringAttr cuf::RegisterKernelOp::getKernelModuleName() {
return getName().getRootReference();
}

mlir::StringAttr cuf::RegisterKernelOp::getKernelName() {
return getName().getLeafReference();
}

mlir::LogicalResult cuf::RegisterKernelOp::verify() {
if (getKernelName() == getKernelModuleName())
return emitOpError("expect a module and a kernel name");

auto mod = getOperation()->getParentOfType<mlir::ModuleOp>();
if (!mod)
return emitOpError("expect to be in a module");

mlir::SymbolTable symTab(mod);
auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(getKernelModuleName());
if (!gpuMod)
return emitOpError("gpu module not found");

mlir::SymbolTable gpuSymTab(gpuMod);
auto func = gpuSymTab.lookup<mlir::gpu::GPUFuncOp>(getKernelName());
if (!func)
return emitOpError("device function not found");

if (!func.isKernel())
return emitOpError("only kernel gpu.func can be registered");

return mlir::success();
}

// Tablegen operators

#define GET_OP_CLASSES
Expand Down
20 changes: 20 additions & 0 deletions flang/test/Fir/CUDA/cuda-register-func.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// RUN: fir-opt %s | FileCheck %s

module attributes {gpu.container_module} {
gpu.module @cuda_device_mod {
gpu.func @_QPsub_device1() kernel {
gpu.return
}
gpu.func @_QPsub_device2(%arg0: !fir.ref<f32>) kernel {
gpu.return
}
}
llvm.func internal @__cudaFortranConstructor() {
cuf.register_kernel @cuda_device_mod::@_QPsub_device1
cuf.register_kernel @cuda_device_mod::@_QPsub_device2
llvm.return
}
}

// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device1
// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device2
50 changes: 50 additions & 0 deletions flang/test/Fir/cuf-invalid.fir
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,53 @@ func.func @_QPsub1(%arg0: !fir.ref<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda
cuf.data_transfer %20#0 to %11#0, %19 : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>
return
}

// -----

module attributes {gpu.container_module} {
gpu.module @cuda_device_mod {
gpu.func @_QPsub_device1() {
gpu.return
}
}
llvm.func internal @__cudaFortranConstructor() {
// expected-error@+1{{'cuf.register_kernel' op only kernel gpu.func can be registered}}
cuf.register_kernel @cuda_device_mod::@_QPsub_device1
llvm.return
}
}

// -----

module attributes {gpu.container_module} {
gpu.module @cuda_device_mod {
gpu.func @_QPsub_device1() {
gpu.return
}
}
llvm.func internal @__cudaFortranConstructor() {
// expected-error@+1{{'cuf.register_kernel' op device function not found}}
cuf.register_kernel @cuda_device_mod::@_QPsub_device2
llvm.return
}
}

// -----

module attributes {gpu.container_module} {
llvm.func internal @__cudaFortranConstructor() {
// expected-error@+1{{'cuf.register_kernel' op gpu module not found}}
cuf.register_kernel @cuda_device_mod::@_QPsub_device1
llvm.return
}
}

// -----

module attributes {gpu.container_module} {
llvm.func internal @__cudaFortranConstructor() {
// expected-error@+1{{'cuf.register_kernel' op expect a module and a kernel name}}
cuf.register_kernel @_QPsub_device1
llvm.return
}
}
1 change: 1 addition & 0 deletions flang/tools/fir-opt/fir-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ int main(int argc, char **argv) {
#endif
DialectRegistry registry;
fir::support::registerDialects(registry);
registry.insert<mlir::gpu::GPUDialect>();
fir::support::addFIRExtensions(registry);
return failed(MlirOptMain(argc, argv, "FIR modular optimizer driver\n",
registry));
Expand Down
Loading