Skip to content

Commit c5da2dc

Browse files
definelichtgysit
authored andcommitted
[MLIR][LLVM] Support inlining of LLVM::AllocaOp.
If inlining into an LLVM function, move allocas of constant size from the callee's entry block to the new entry block, as this will fold into the prologue/epilogue code during code generation. We still allow inlining allocas even if we cannot do this post-processing (i.e., when we are not inlining into an `LLVM::FuncOp`), since this is an optimization (and thus not necessary for correctness). Depends on D141682 Reviewed By: gysit Differential Revision: https://reviews.llvm.org/D142436
1 parent 2e9bc1b commit c5da2dc

File tree

2 files changed

+121
-11
lines changed

2 files changed

+121
-11
lines changed

mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2848,6 +2848,33 @@ struct LLVMOpAsmDialectInterface : public OpAsmDialectInterface {
28482848
// DialectInlinerInterface
28492849
//===----------------------------------------------------------------------===//
28502850

2851+
/// Move all alloca operations with a constant size in the former entry block of
2852+
/// the newly inlined callee into the entry block of the caller.
2853+
static void moveConstantAllocasToEntryBlock(
2854+
iterator_range<Region::iterator> inlinedBlocks) {
2855+
Block *calleeEntryBlock = &(*inlinedBlocks.begin());
2856+
Block *callerEntryBlock = &(*calleeEntryBlock->getParent()->begin());
2857+
if (calleeEntryBlock == callerEntryBlock)
2858+
// Nothing to do.
2859+
return;
2860+
SmallVector<std::pair<LLVM::AllocaOp, IntegerAttr>> allocasToMove;
2861+
// Conservatively only move alloca operations that are part of the entry block
2862+
// and do not inspect nested regions, since they may execute conditionally or
2863+
// have other unknown semantics.
2864+
for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
2865+
IntegerAttr arraySize;
2866+
if (matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize)))
2867+
allocasToMove.emplace_back(allocaOp, arraySize);
2868+
}
2869+
OpBuilder builder(callerEntryBlock, callerEntryBlock->begin());
2870+
for (auto &[allocaOp, arraySize] : allocasToMove) {
2871+
auto newConstant = builder.create<LLVM::ConstantOp>(
2872+
allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize);
2873+
allocaOp->moveAfter(newConstant);
2874+
allocaOp.getArraySizeMutable().assign(newConstant.getResult());
2875+
}
2876+
}
2877+
28512878
namespace {
28522879
struct LLVMInlinerInterface : public DialectInlinerInterface {
28532880
using DialectInlinerInterface::DialectInlinerInterface;
@@ -2885,7 +2912,7 @@ struct LLVMInlinerInterface : public DialectInlinerInterface {
28852912
return false;
28862913
return true;
28872914
})
2888-
.Case<LLVM::CallOp>([](auto) { return true; })
2915+
.Case<LLVM::CallOp, LLVM::AllocaOp>([](auto) { return true; })
28892916
.Default([](auto) { return false; });
28902917
}
28912918

@@ -2918,6 +2945,17 @@ struct LLVMInlinerInterface : public DialectInlinerInterface {
29182945
dst.replaceAllUsesWith(src);
29192946
}
29202947

2948+
void processInlinedCallBlocks(
2949+
Operation *call,
2950+
iterator_range<Region::iterator> inlinedBlocks) const override {
2951+
// Alloca operations with a constant size that were in the entry block of
2952+
// the callee should be moved to the entry block of the caller, as this will
2953+
// fold into prologue/epilogue code during code generation.
2954+
// This is not implemented as a standalone pattern because we need to know
2955+
// which newly inlined block was previously the entry block of the callee.
2956+
moveConstantAllocasToEntryBlock(inlinedBlocks);
2957+
}
2958+
29212959
private:
29222960
/// Returns true if all attributes of `callOp` are handled during inlining.
29232961
[[nodiscard]] static bool isLegalToInlineCallAttributes(LLVM::CallOp callOp) {

mlir/test/Dialect/LLVMIR/inlining.mlir

Lines changed: 82 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,17 @@ func.func @test_inline(%ptr : !llvm.ptr) -> i32 {
2020

2121
// -----
2222

23-
func.func @inner_func_not_inlinable() -> !llvm.ptr<f64> {
24-
%0 = llvm.mlir.constant(0 : i32) : i32
25-
%1 = llvm.alloca %0 x f64 : (i32) -> !llvm.ptr<f64>
26-
return %1 : !llvm.ptr<f64>
23+
func.func @inner_func_not_inlinable() -> i32 {
24+
%0 = llvm.inline_asm has_side_effects "foo", "bar" : () -> i32
25+
return %0 : i32
2726
}
2827

29-
// CHECK-LABEL: func.func @test_not_inline() -> !llvm.ptr<f64> {
30-
// CHECK-NEXT: %[[RES:.*]] = call @inner_func_not_inlinable() : () -> !llvm.ptr<f64>
31-
// CHECK-NEXT: return %[[RES]] : !llvm.ptr<f64>
32-
func.func @test_not_inline() -> !llvm.ptr<f64> {
33-
%0 = call @inner_func_not_inlinable() : () -> !llvm.ptr<f64>
34-
return %0 : !llvm.ptr<f64>
28+
// CHECK-LABEL: func.func @test_not_inline() -> i32 {
29+
// CHECK-NEXT: %[[RES:.*]] = call @inner_func_not_inlinable() : () -> i32
30+
// CHECK-NEXT: return %[[RES]] : i32
31+
func.func @test_not_inline() -> i32 {
32+
%0 = call @inner_func_not_inlinable() : () -> i32
33+
return %0 : i32
3534
}
3635

3736
// -----
@@ -203,3 +202,76 @@ llvm.func @caller() {
203202
llvm.call @callee() { branch_weights = dense<42> : vector<1xi32> } : () -> ()
204203
llvm.return
205204
}
205+
206+
// -----
207+
208+
llvm.func @static_alloca() -> f32 {
209+
%0 = llvm.mlir.constant(4 : i32) : i32
210+
%1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
211+
%2 = llvm.load %1 : !llvm.ptr -> f32
212+
llvm.return %2 : f32
213+
}
214+
215+
llvm.func @dynamic_alloca(%size : i32) -> f32 {
216+
%0 = llvm.add %size, %size : i32
217+
%1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
218+
%2 = llvm.load %1 : !llvm.ptr -> f32
219+
llvm.return %2 : f32
220+
}
221+
222+
// CHECK-LABEL: llvm.func @test_inline
223+
llvm.func @test_inline(%cond : i1, %size : i32) -> f32 {
224+
// Check that the static alloca was moved to the entry block after inlining
225+
// with its size defined by a constant.
226+
// CHECK-NOT: ^{{.+}}:
227+
// CHECK-NEXT: llvm.mlir.constant
228+
// CHECK-NEXT: llvm.alloca
229+
// CHECK: llvm.cond_br
230+
llvm.cond_br %cond, ^bb1, ^bb2
231+
// CHECK: ^{{.+}}:
232+
^bb1:
233+
// CHECK-NOT: llvm.call @static_alloca
234+
%0 = llvm.call @static_alloca() : () -> f32
235+
// CHECK: llvm.br
236+
llvm.br ^bb3(%0: f32)
237+
// CHECK: ^{{.+}}:
238+
^bb2:
239+
// Check that the dynamic alloca was inlined, but that it was not moved to the
240+
// entry block.
241+
// CHECK: llvm.add
242+
// CHECK-NEXT: llvm.alloca
243+
// CHECK-NOT: llvm.call @dynamic_alloca
244+
%1 = llvm.call @dynamic_alloca(%size) : (i32) -> f32
245+
// CHECK: llvm.br
246+
llvm.br ^bb3(%1: f32)
247+
// CHECK: ^{{.+}}:
248+
^bb3(%arg : f32):
249+
llvm.return %arg : f32
250+
}
251+
252+
// -----
253+
254+
llvm.func @static_alloca_not_in_entry(%cond : i1) -> f32 {
255+
llvm.cond_br %cond, ^bb1, ^bb2
256+
^bb1:
257+
%0 = llvm.mlir.constant(4 : i32) : i32
258+
%1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
259+
llvm.br ^bb3(%1: !llvm.ptr)
260+
^bb2:
261+
%2 = llvm.mlir.constant(8 : i32) : i32
262+
%3 = llvm.alloca %2 x f32 : (i32) -> !llvm.ptr
263+
llvm.br ^bb3(%3: !llvm.ptr)
264+
^bb3(%ptr : !llvm.ptr):
265+
%4 = llvm.load %ptr : !llvm.ptr -> f32
266+
llvm.return %4 : f32
267+
}
268+
269+
// CHECK-LABEL: llvm.func @test_inline
270+
llvm.func @test_inline(%cond : i1) -> f32 {
271+
// Make sure the alloca was not moved to the entry block.
272+
// CHECK-NOT: llvm.alloca
273+
// CHECK: llvm.cond_br
274+
// CHECK: llvm.alloca
275+
%0 = llvm.call @static_alloca_not_in_entry(%cond) : (i1) -> f32
276+
llvm.return %0 : f32
277+
}

0 commit comments

Comments
 (0)