Skip to content

Commit 192c0e5

Browse files
committed
IROutliner: Fix assert with non-0 alloca addrspace
The arguments are passed as stored to new allocas so the address space needs to match.
1 parent 2fab927 commit 192c0e5

File tree

2 files changed

+51
-3
lines changed

2 files changed

+51
-3
lines changed

llvm/lib/Transforms/IPO/IROutliner.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,7 +1277,7 @@ static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
12771277
/// \param [in,out] Region - The region of code to be analyzed.
12781278
/// \param [in] Outputs - The values found by the code extractor.
12791279
static void
1280-
findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
1280+
findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region,
12811281
SetVector<Value *> &Outputs) {
12821282
OutlinableGroup &Group = *Region.Parent;
12831283
IRSimilarityCandidate &C = *Region.Candidate;
@@ -1350,7 +1350,8 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
13501350
// the output, so we add a pointer type to the argument types of the overall
13511351
// function to handle this output and create a mapping to it.
13521352
if (!TypeFound) {
1353-
Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
1353+
Group.ArgumentTypes.push_back(Output->getType()->getPointerTo(
1354+
M.getDataLayout().getAllocaAddrSpace()));
13541355
// Mark the new pointer type as the last value in the aggregate argument
13551356
// list.
13561357
unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
@@ -1418,7 +1419,7 @@ void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
14181419

14191420
// Map the outputs found by the CodeExtractor to the arguments found for
14201421
// the overall function.
1421-
findExtractedOutputToOverallOutputMapping(Region, Outputs);
1422+
findExtractedOutputToOverallOutputMapping(M, Region, Outputs);
14221423
}
14231424

14241425
/// Replace the extracted function in the Region with a call to the overall
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
2+
; RUN: opt -S -passes=iroutliner -ir-outlining-no-cost < %s | FileCheck %s
3+
4+
; Check alloca with non-0 address spaces works correctly.
5+
6+
target datalayout = "A5"
7+
8+
define i32 @outlineable() {
9+
bb:
10+
%i = tail call i32 @func(i32 0, i32 1)
11+
%i1 = or i32 0, %i
12+
%i2 = tail call i32 @func(i32 %i1, i32 0)
13+
%i3 = or i32 %i1, %i2
14+
ret i32 0
15+
}
16+
17+
declare i32 @func(i32, i32)
18+
; CHECK-LABEL: define {{[^@]+}}@outlineable() {
19+
; CHECK-NEXT: bb:
20+
; CHECK-NEXT: [[I1_LOC:%.*]] = alloca i32, align 4, addrspace(5)
21+
; CHECK-NEXT: [[LT_CAST:%.*]] = addrspacecast ptr addrspace(5) [[I1_LOC]] to ptr
22+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[LT_CAST]])
23+
; CHECK-NEXT: call void @outlined_ir_func_0(i32 0, i32 1, ptr addrspace(5) [[I1_LOC]], i32 0)
24+
; CHECK-NEXT: [[I1_RELOAD:%.*]] = load i32, ptr addrspace(5) [[I1_LOC]], align 4
25+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[LT_CAST]])
26+
; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[I1_RELOAD]], i32 0, ptr addrspace(5) null, i32 -1)
27+
; CHECK-NEXT: ret i32 0
28+
;
29+
;
30+
; CHECK-LABEL: define {{[^@]+}}@outlined_ir_func_0
31+
; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], ptr addrspace(5) [[TMP2:%.*]], i32 [[TMP3:%.*]]) #[[ATTR1:[0-9]+]] {
32+
; CHECK-NEXT: newFuncRoot:
33+
; CHECK-NEXT: br label [[BB_TO_OUTLINE:%.*]]
34+
; CHECK: bb_to_outline:
35+
; CHECK-NEXT: [[I:%.*]] = tail call i32 @func(i32 [[TMP0]], i32 [[TMP1]])
36+
; CHECK-NEXT: [[I1:%.*]] = or i32 [[TMP0]], [[I]]
37+
; CHECK-NEXT: br label [[BB_AFTER_OUTLINE_EXITSTUB:%.*]]
38+
; CHECK: bb_after_outline.exitStub:
39+
; CHECK-NEXT: switch i32 [[TMP3]], label [[FINAL_BLOCK_0:%.*]] [
40+
; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]]
41+
; CHECK-NEXT: ]
42+
; CHECK: output_block_0_0:
43+
; CHECK-NEXT: store i32 [[I1]], ptr addrspace(5) [[TMP2]], align 4
44+
; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
45+
; CHECK: final_block_0:
46+
; CHECK-NEXT: ret void
47+
;

0 commit comments

Comments
 (0)