-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR][LLVM] Fix memory explosion when converting global variable bodies in ModuleTranslation #82708
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MLIR][LLVM] Fix memory explosion when converting global variable bodies in ModuleTranslation #82708
Changes from all commits
cc7b35a
7705e97
9dde902
e8f3d4e
c6d807a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,11 +51,15 @@ | |
#include "llvm/IR/MDBuilder.h" | ||
#include "llvm/IR/Module.h" | ||
#include "llvm/IR/Verifier.h" | ||
#include "llvm/Support/Debug.h" | ||
#include "llvm/Support/raw_ostream.h" | ||
#include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||
#include "llvm/Transforms/Utils/Cloning.h" | ||
#include "llvm/Transforms/Utils/ModuleUtils.h" | ||
#include <optional> | ||
|
||
#define DEBUG_TYPE "llvm-dialect-to-llvm-ir" | ||
|
||
using namespace mlir; | ||
using namespace mlir::LLVM; | ||
using namespace mlir::LLVM::detail; | ||
|
@@ -1042,17 +1046,80 @@ LogicalResult ModuleTranslation::convertGlobals() { | |
for (auto op : getModuleBody(mlirModule).getOps<LLVM::GlobalOp>()) { | ||
if (Block *initializer = op.getInitializerBlock()) { | ||
llvm::IRBuilder<> builder(llvmModule->getContext()); | ||
|
||
int numConstantsHit = 0; | ||
int numConstantsErased = 0; | ||
DenseMap<llvm::ConstantAggregate *, int> constantAggregateUseMap; | ||
|
||
for (auto &op : initializer->without_terminator()) { | ||
if (failed(convertOperation(op, builder)) || | ||
!isa<llvm::Constant>(lookupValue(op.getResult(0)))) | ||
if (failed(convertOperation(op, builder))) | ||
return emitError(op.getLoc(), "fail to convert global initializer"); | ||
auto *cst = dyn_cast<llvm::Constant>(lookupValue(op.getResult(0))); | ||
if (!cst) | ||
return emitError(op.getLoc(), "unemittable constant value"); | ||
|
||
// When emitting an LLVM constant, a new constant is created and the old | ||
// constant may become dangling and take space. We should remove the | ||
// dangling constants to avoid memory explosion especially for constant | ||
// arrays whose number of elements is large. | ||
// Because multiple operations may refer to the same constant, we need | ||
// to count the number of uses of each constant array and remove it only | ||
// when the count becomes zero. | ||
if (auto *agg = dyn_cast<llvm::ConstantAggregate>(cst)) { | ||
numConstantsHit++; | ||
Value result = op.getResult(0); | ||
int numUsers = std::distance(result.use_begin(), result.use_end()); | ||
auto [iterator, inserted] = | ||
constantAggregateUseMap.try_emplace(agg, numUsers); | ||
if (!inserted) { | ||
// Key already exists, update the value | ||
iterator->second += numUsers; | ||
} | ||
} | ||
// Scan the operands of the operation to decrement the use count of | ||
// constants. Erase the constant if the use count becomes zero. | ||
for (Value v : op.getOperands()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth adding a comment for this loop I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. |
||
auto cst = dyn_cast<llvm::ConstantAggregate>(lookupValue(v)); | ||
if (!cst) | ||
continue; | ||
auto iter = constantAggregateUseMap.find(cst); | ||
assert(iter != constantAggregateUseMap.end() && "constant not found"); | ||
iter->second--; | ||
if (iter->second == 0) { | ||
// NOTE: cannot call removeDeadConstantUsers() here because it | ||
// may remove the constant which has uses not be converted yet. | ||
if (cst->user_empty()) { | ||
cst->destroyConstant(); | ||
numConstantsErased++; | ||
} | ||
constantAggregateUseMap.erase(iter); | ||
} | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel I'm missing something, why can't we have a
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually this is done also line 1104, but I don't get why we need the dance around There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
My understanding is that we're converting all the ops in intializer to llvm::Constant. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I get it: and how critical is it to garbage collect during the interpretation of the initializer block instead of just at the end of the block? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The worst block is building a constant with type [75525 x [100 x f32]] for my test case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also: isn't it something that MLIR can constant fold? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The constant will fold, but the input constant is still in memory. I tried to avoid creating these insertvalue which will be converted to temp constant. Here's what I got with experiment
It crashes because !llvm.array<75525 x i64> is not correct type. Any suggestion for how to express array of string for llvm dialect? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't it be just a concat of all the strings into a single dense attribute of the right type? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Removing at the end not work :( One block already used all of the memory. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Thanks for this suggestion. It saves a lot of time for create globalOp for each string! But to create the array of string, I still have to create a lot of insertvalue. Is it possible to express the following llvm ir with LLVM dialect?
Then there will be no insertvalue needed. |
||
|
||
ReturnOp ret = cast<ReturnOp>(initializer->getTerminator()); | ||
llvm::Constant *cst = | ||
cast<llvm::Constant>(lookupValue(ret.getOperand(0))); | ||
auto *global = cast<llvm::GlobalVariable>(lookupGlobal(op)); | ||
if (!shouldDropGlobalInitializer(global->getLinkage(), cst)) | ||
global->setInitializer(cst); | ||
|
||
// Try to remove the dangling constants again after all operations are | ||
// converted. | ||
for (auto it : constantAggregateUseMap) { | ||
auto cst = it.first; | ||
cst->removeDeadConstantUsers(); | ||
if (cst->user_empty()) { | ||
cst->destroyConstant(); | ||
numConstantsErased++; | ||
} | ||
} | ||
|
||
LLVM_DEBUG(llvm::dbgs() | ||
<< "Convert initializer for " << op.getName() << "\n"; | ||
llvm::dbgs() << numConstantsHit << " new constants hit\n"; | ||
llvm::dbgs() | ||
<< numConstantsErased << " dangling constants erased\n";); | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
// RUN: mlir-translate -mlir-to-llvmir %s -debug-only=llvm-dialect-to-llvm-ir 2>&1 | FileCheck %s | ||
|
||
// CHECK: Convert initializer for dup_const | ||
// CHECK: 6 new constants hit | ||
// CHECK: 3 dangling constants erased | ||
// CHECK: Convert initializer for unique_const | ||
// CHECK: 6 new constants hit | ||
// CHECK: 5 dangling constants erased | ||
|
||
|
||
// CHECK:@dup_const = global { [2 x double], [2 x double], [2 x double] } { [2 x double] [double 3.612250e-02, double 5.119230e-02], [2 x double] [double 3.612250e-02, double 5.119230e-02], [2 x double] [double 3.612250e-02, double 5.119230e-02] } | ||
|
||
llvm.mlir.global @dup_const() : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> { | ||
%c0 = llvm.mlir.constant(3.612250e-02 : f64) : f64 | ||
%c1 = llvm.mlir.constant(5.119230e-02 : f64) : f64 | ||
|
||
%empty0 = llvm.mlir.undef : !llvm.array<2 x f64> | ||
%a00 = llvm.insertvalue %c0, %empty0[0] : !llvm.array<2 x f64> | ||
|
||
%empty1 = llvm.mlir.undef : !llvm.array<2 x f64> | ||
%a10 = llvm.insertvalue %c0, %empty1[0] : !llvm.array<2 x f64> | ||
|
||
%empty2 = llvm.mlir.undef : !llvm.array<2 x f64> | ||
%a20 = llvm.insertvalue %c0, %empty2[0] : !llvm.array<2 x f64> | ||
|
||
// NOTE: a00, a10, a20 are all same ConstantAggregate which not used at this point. | ||
// should not delete it before all of the uses of the ConstantAggregate finished. | ||
|
||
%a01 = llvm.insertvalue %c1, %a00[1] : !llvm.array<2 x f64> | ||
%a11 = llvm.insertvalue %c1, %a10[1] : !llvm.array<2 x f64> | ||
%a21 = llvm.insertvalue %c1, %a20[1] : !llvm.array<2 x f64> | ||
%empty_r = llvm.mlir.undef : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> | ||
%r0 = llvm.insertvalue %a01, %empty_r[0] : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> | ||
%r1 = llvm.insertvalue %a11, %r0[1] : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> | ||
%r2 = llvm.insertvalue %a21, %r1[2] : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> | ||
|
||
llvm.return %r2 : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> | ||
} | ||
|
||
// CHECK:@unique_const = global { [2 x double], [2 x double], [2 x double] } { [2 x double] [double 3.612250e-02, double 5.119230e-02], [2 x double] [double 3.312250e-02, double 5.219230e-02], [2 x double] [double 3.412250e-02, double 5.419230e-02] } | ||
|
||
llvm.mlir.global @unique_const() : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> { | ||
%c0 = llvm.mlir.constant(3.612250e-02 : f64) : f64 | ||
%c1 = llvm.mlir.constant(5.119230e-02 : f64) : f64 | ||
|
||
%c2 = llvm.mlir.constant(3.312250e-02 : f64) : f64 | ||
%c3 = llvm.mlir.constant(5.219230e-02 : f64) : f64 | ||
|
||
%c4 = llvm.mlir.constant(3.412250e-02 : f64) : f64 | ||
%c5 = llvm.mlir.constant(5.419230e-02 : f64) : f64 | ||
|
||
%2 = llvm.mlir.undef : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> | ||
|
||
%3 = llvm.mlir.undef : !llvm.array<2 x f64> | ||
|
||
%4 = llvm.insertvalue %c0, %3[0] : !llvm.array<2 x f64> | ||
%5 = llvm.insertvalue %c1, %4[1] : !llvm.array<2 x f64> | ||
|
||
%6 = llvm.insertvalue %5, %2[0] : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> | ||
|
||
%7 = llvm.insertvalue %c2, %3[0] : !llvm.array<2 x f64> | ||
%8 = llvm.insertvalue %c3, %7[1] : !llvm.array<2 x f64> | ||
|
||
%9 = llvm.insertvalue %8, %6[1] : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> | ||
|
||
%10 = llvm.insertvalue %c4, %3[0] : !llvm.array<2 x f64> | ||
%11 = llvm.insertvalue %c5, %10[1] : !llvm.array<2 x f64> | ||
|
||
%12 = llvm.insertvalue %11, %9[2] : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> | ||
|
||
llvm.return %12 : !llvm.struct<(array<2 x f64>, array<2 x f64>, array<2 x f64>)> | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is O(N) on a linked list, hopefully small though...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most of the ops will be insertvalue which only used once for build constant.