Skip to content

[flang][cuda] Update CompilerGeneratedNames pass to work on gpu module #120660

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 27 additions & 14 deletions flang/lib/Optimizer/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1247,10 +1247,10 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {

/// Get the address of the type descriptor global variable that was created by
/// lowering for derived type \p recType.
mlir::Value getTypeDescriptor(mlir::ModuleOp mod,
mlir::ConversionPatternRewriter &rewriter,
mlir::Location loc,
fir::RecordType recType) const {
template <typename ModOpTy>
mlir::Value
getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter,
mlir::Location loc, fir::RecordType recType) const {
std::string name =
this->options.typeDescriptorsRenamedForAssembly
? fir::NameUniquer::getTypeDescriptorAssemblyName(recType.getName())
Expand All @@ -1275,7 +1275,8 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
return rewriter.create<mlir::LLVM::ZeroOp>(loc, llvmPtrTy);
}

mlir::Value populateDescriptor(mlir::Location loc, mlir::ModuleOp mod,
template <typename ModOpTy>
mlir::Value populateDescriptor(mlir::Location loc, ModOpTy mod,
fir::BaseBoxType boxTy, mlir::Type inputType,
mlir::ConversionPatternRewriter &rewriter,
unsigned rank, mlir::Value eleSize,
Expand Down Expand Up @@ -1414,10 +1415,16 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
extraField =
this->getExtraFromBox(loc, sourceBoxTyPair, sourceBox, rewriter);
}
auto mod = box->template getParentOfType<mlir::ModuleOp>();
mlir::Value descriptor =
populateDescriptor(loc, mod, boxTy, inputType, rewriter, rank, eleSize,
cfiTy, typeDesc, allocatorIdx, extraField);

mlir::Value descriptor;
if (auto gpuMod = box->template getParentOfType<mlir::gpu::GPUModuleOp>())
descriptor = populateDescriptor(loc, gpuMod, boxTy, inputType, rewriter,
rank, eleSize, cfiTy, typeDesc,
allocatorIdx, extraField);
else if (auto mod = box->template getParentOfType<mlir::ModuleOp>())
descriptor = populateDescriptor(loc, mod, boxTy, inputType, rewriter,
rank, eleSize, cfiTy, typeDesc,
allocatorIdx, extraField);

return {boxTy, descriptor, eleSize};
}
Expand Down Expand Up @@ -1460,11 +1467,17 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
mlir::Value extraField =
this->getExtraFromBox(loc, inputBoxTyPair, loweredBox, rewriter);

auto mod = box->template getParentOfType<mlir::ModuleOp>();
mlir::Value descriptor =
populateDescriptor(loc, mod, boxTy, box.getBox().getType(), rewriter,
rank, eleSize, cfiTy, typeDesc,
/*allocatorIdx=*/kDefaultAllocator, extraField);
mlir::Value descriptor;
if (auto gpuMod = box->template getParentOfType<mlir::gpu::GPUModuleOp>())
descriptor =
populateDescriptor(loc, gpuMod, boxTy, box.getBox().getType(),
rewriter, rank, eleSize, cfiTy, typeDesc,
/*allocatorIdx=*/kDefaultAllocator, extraField);
else if (auto mod = box->template getParentOfType<mlir::ModuleOp>())
descriptor =
populateDescriptor(loc, mod, boxTy, box.getBox().getType(), rewriter,
rank, eleSize, cfiTy, typeDesc,
/*allocatorIdx=*/kDefaultAllocator, extraField);

return {boxTy, descriptor, eleSize};
}
Expand Down
40 changes: 24 additions & 16 deletions flang/lib/Optimizer/Transforms/CompilerGeneratedNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/Support/InternalNames.h"
#include "flang/Optimizer/Transforms/Passes.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/Pass/Pass.h"
Expand Down Expand Up @@ -42,24 +43,31 @@ void CompilerGeneratedNamesConversionPass::runOnOperation() {
auto *context = &getContext();

llvm::DenseMap<mlir::StringAttr, mlir::FlatSymbolRefAttr> remappings;
for (auto &funcOrGlobal : op->getRegion(0).front()) {
if (llvm::isa<mlir::func::FuncOp>(funcOrGlobal) ||
llvm::isa<fir::GlobalOp>(funcOrGlobal)) {
auto symName = funcOrGlobal.getAttrOfType<mlir::StringAttr>(
mlir::SymbolTable::getSymbolAttrName());
auto deconstructedName = fir::NameUniquer::deconstruct(symName);
if (deconstructedName.first != fir::NameUniquer::NameKind::NOT_UNIQUED &&
!fir::NameUniquer::isExternalFacingUniquedName(deconstructedName)) {
std::string newName =
fir::NameUniquer::replaceSpecialSymbols(symName.getValue().str());
if (newName != symName) {
auto newAttr = mlir::StringAttr::get(context, newName);
mlir::SymbolTable::setSymbolName(&funcOrGlobal, newAttr);
auto newSymRef = mlir::FlatSymbolRefAttr::get(newAttr);
remappings.try_emplace(symName, newSymRef);
}

auto processOp = [&](mlir::Operation &op) {
auto symName = op.getAttrOfType<mlir::StringAttr>(
mlir::SymbolTable::getSymbolAttrName());
auto deconstructedName = fir::NameUniquer::deconstruct(symName);
if (deconstructedName.first != fir::NameUniquer::NameKind::NOT_UNIQUED &&
!fir::NameUniquer::isExternalFacingUniquedName(deconstructedName)) {
std::string newName =
fir::NameUniquer::replaceSpecialSymbols(symName.getValue().str());
if (newName != symName) {
auto newAttr = mlir::StringAttr::get(context, newName);
mlir::SymbolTable::setSymbolName(&op, newAttr);
auto newSymRef = mlir::FlatSymbolRefAttr::get(newAttr);
remappings.try_emplace(symName, newSymRef);
}
}
};
for (auto &op : op->getRegion(0).front()) {
if (llvm::isa<mlir::func::FuncOp>(op) || llvm::isa<fir::GlobalOp>(op))
processOp(op);
else if (auto gpuMod = mlir::dyn_cast<mlir::gpu::GPUModuleOp>(&op))
for (auto &op : gpuMod->getRegion(0).front())
if (llvm::isa<mlir::func::FuncOp>(op) || llvm::isa<fir::GlobalOp>(op) ||
llvm::isa<mlir::gpu::GPUFuncOp>(op))
processOp(op);
}

if (remappings.empty())
Expand Down
17 changes: 17 additions & 0 deletions flang/test/Fir/CUDA/cuda-compiler-generated-names.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// RUN: fir-opt --split-input-file --compiler-generated-names --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu type-descriptors-renamed-for-assembly=true" %s | FileCheck %s

module @mod1 attributes {gpu.container} {
gpu.module @gpu1 {
fir.global linkonce @_QMtest_dinitE.dt.tseq constant : i8

func.func @embox1(%arg0: !fir.ref<!fir.type<_QMtest_dinitTtseq{i:i32}>>) {
%0 = fir.embox %arg0() : (!fir.ref<!fir.type<_QMtest_dinitTtseq{i:i32}>>) -> !fir.box<!fir.type<_QMtest_dinitTtseq{i:i32}>>
return
}
}
}

// CHECK-LABEL: gpu.module @gpu1
// CHECK: llvm.mlir.global linkonce constant @_QMtest_dinitEXdtXtseq
// CHECK: llvm.mlir.addressof @_QMtest_dinitEXdtXtseq : !llvm.ptr

Loading