Skip to content

[SROA] analyze gep(ptr,phi(const...)) #82425

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 57 additions & 2 deletions llvm/lib/Transforms/Scalar/SROA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4070,12 +4070,67 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
return true;
}

bool foldGEPPhiConst(GetElementPtrInst &GEPI) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we copy foldGEPSelect and handle both gep(phi(ptr), idx) and gep(ptr, phi(idx)) in one function? and also handle GEPs with more than one index

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we copy foldGEPSelect and handle both gep(phi(ptr), idx) and gep(ptr, phi(idx)) in one function?

They are deceptively similar on the surface, but they do literally everything differently. The first one operates on pointer, the other one on the indices. Combined function will literally have to provide different if/else for each of the steps, except for a just a few common lines . I've tried and the code is rather unreadable.

and also handle GEPs with more than one index

That is probably unnecessary, now that instcombine normalizes GEPs to untyped ones with a single byte offset.

Also, if we were to do try expanding multiple indices this way, it would result in a combinatorial explosion of the number of GEPs we may need to create.

If/when we run into this, and do discover that we want to handle multiple indices, we should be able to relax the algorithm a bit and iteratively expand one index at a time.

if (GEPI.getNumIndices() != 1)
return false;

PHINode *PHI = cast<PHINode>(GEPI.getOperand(1));
if (!all_of(PHI->incoming_values(), [&](auto &V) {
auto *Parent = PHI->getIncomingBlock(V);
return isa<ConstantInt>(V) && !succ_empty(Parent) &&
Parent->isLegalToHoistInto();
}))
return false;

LLVM_DEBUG(
dbgs() << " Rewriting gep(ptr, phi(const)) -> phi(gep(ptr, const)):"
<< "\n original: " << *PHI << "\n " << GEPI
<< "\n to: ");

SmallVector<Value *, 4> Index(GEPI.indices());
bool IsInBounds = GEPI.isInBounds();
IRB.SetInsertPoint(GEPI.getParent(), GEPI.getParent()->getFirstNonPHIIt());
PHINode *NewPN = IRB.CreatePHI(GEPI.getType(), PHI->getNumIncomingValues(),
PHI->getName() + ".sroa.phi");
for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) {
BasicBlock *B = PHI->getIncomingBlock(I);
Value *NewVal = nullptr;
int Idx = NewPN->getBasicBlockIndex(B);
if (Idx >= 0) {
NewVal = NewPN->getIncomingValue(Idx);
} else {
Value *In = PHI->getIncomingValue(I);
BasicBlock *Parent = PHI->getIncomingBlock(I);
IRB.SetInsertPoint(Parent, Parent->getTerminator()->getIterator());
Type *Ty = GEPI.getSourceElementType();
NewVal = IRB.CreateGEP(Ty, GEPI.getPointerOperand(), In,
In->getName() + ".sroa.gep", IsInBounds);
}
NewPN->addIncoming(NewVal, B);
}

Visited.erase(&GEPI);
GEPI.replaceAllUsesWith(NewPN);
GEPI.eraseFromParent();
Visited.insert(NewPN);
enqueueUsers(*NewPN);

LLVM_DEBUG(for (Value *In
: NewPN->incoming_values()) dbgs()
<< "\n " << *In;
dbgs() << "\n " << *NewPN << '\n');

return true;
}
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (foldGEPSelect(GEPI))
return true;

if (isa<PHINode>(GEPI.getPointerOperand()) &&
foldGEPPhi(GEPI))
if (isa<PHINode>(GEPI.getPointerOperand()) && foldGEPPhi(GEPI))
return true;

if (GEPI.hasIndices() && isa<PHINode>(GEPI.getOperand(1)) &&
foldGEPPhiConst(GEPI))
return true;

enqueueUsers(GEPI);
Expand Down
95 changes: 89 additions & 6 deletions llvm/test/Transforms/SROA/phi-gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
; RUN: opt -S -passes='sroa<modify-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG

%pair = type { i32, i32 }
%t1 = type { i64, i32, i32, i64, i32, i32 }

define i32 @test_sroa_phi_gep(i1 %cond) {
; CHECK-LABEL: @test_sroa_phi_gep(
Expand Down Expand Up @@ -245,15 +246,15 @@ define i32 @test_sroa_invoke_phi_gep(i1 %cond) personality ptr @__gxx_personalit
; CHECK-NEXT: br i1 [[COND:%.*]], label [[CALL:%.*]], label [[END:%.*]]
; CHECK: call:
; CHECK-NEXT: [[B:%.*]] = invoke ptr @foo()
; CHECK-NEXT: to label [[END]] unwind label [[INVOKE_CATCH:%.*]]
; CHECK-NEXT: to label [[END]] unwind label [[INVOKE_CATCH:%.*]]
; CHECK: end:
; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[B]], [[CALL]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PHI]], i32 0, i32 1
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: ret i32 [[LOAD]]
; CHECK: invoke_catch:
; CHECK-NEXT: [[RES:%.*]] = landingpad { ptr, i32 }
; CHECK-NEXT: catch ptr null
; CHECK-NEXT: catch ptr null
; CHECK-NEXT: ret i32 0
;
entry:
Expand Down Expand Up @@ -468,10 +469,10 @@ define i32 @test_sroa_phi_gep_multiple_values_from_same_block(i32 %arg) {
; CHECK-LABEL: @test_sroa_phi_gep_multiple_values_from_same_block(
; CHECK-NEXT: bb.1:
; CHECK-NEXT: switch i32 [[ARG:%.*]], label [[BB_3:%.*]] [
; CHECK-NEXT: i32 1, label [[BB_2:%.*]]
; CHECK-NEXT: i32 2, label [[BB_2]]
; CHECK-NEXT: i32 3, label [[BB_4:%.*]]
; CHECK-NEXT: i32 4, label [[BB_4]]
; CHECK-NEXT: i32 1, label [[BB_2:%.*]]
; CHECK-NEXT: i32 2, label [[BB_2]]
; CHECK-NEXT: i32 3, label [[BB_4:%.*]]
; CHECK-NEXT: i32 4, label [[BB_4]]
; CHECK-NEXT: ]
; CHECK: bb.2:
; CHECK-NEXT: br label [[BB_4]]
Expand Down Expand Up @@ -504,6 +505,88 @@ bb.4: ; preds = %bb.1, %bb.1, %bb
ret i32 %load
}


define void @test_gep_phi_const(i32 %arg) {
; CHECK-LABEL: @test_gep_phi_const(
; CHECK-NEXT: bb:
; CHECK-NEXT: switch i32 [[ARG:%.*]], label [[BB9:%.*]] [
; CHECK-NEXT: i32 0, label [[BB11:%.*]]
; CHECK-NEXT: i32 1, label [[BB10:%.*]]
; CHECK-NEXT: ]
; CHECK: bb9:
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 8, [[BB10]] ], [ 16, [[BB:%.*]] ]
; CHECK-NEXT: [[PHI_SROA_PHI_SROA_SPECULATED:%.*]] = phi ptr [ undef, [[BB10]] ], [ undef, [[BB]] ]
; CHECK-NEXT: br label [[BB11]]
; CHECK: bb10:
; CHECK-NEXT: br label [[BB9]]
; CHECK: bb11:
; CHECK-NEXT: [[PHI12:%.*]] = phi ptr [ [[PHI_SROA_PHI_SROA_SPECULATED]], [[BB9]] ], [ undef, [[BB]] ]
; CHECK-NEXT: store i8 0, ptr [[PHI12]], align 1
; CHECK-NEXT: ret void
;
bb:
%alloca = alloca %t1, align 8
switch i32 %arg, label %bb9 [
i32 0, label %bb11
i32 1, label %bb10
]

bb9: ; preds = %bb10, %bb
%phi = phi i64 [ 8, %bb10 ], [ 16, %bb ]
%getelementptr = getelementptr i8, ptr %alloca, i64 %phi
%load = load ptr, ptr %getelementptr, align 8
br label %bb11

bb10: ; preds = %bb
br label %bb9

bb11: ; preds = %bb9, %bb
%phi12 = phi ptr [ %load, %bb9 ], [ undef, %bb ]
store i8 0, ptr %phi12, align 1
ret void
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please also add a negative test with non-constant incoming value?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would the test below do? It verifies that we only handle all-const phi indices. Or did you have something else in mind?

That got me thinking. Perhaps we should relax "phi(all-const)" into phi(any-const) as it will give SROA some more slices to analyze.


define void @test_gep_phi_nonconst(i64 %arg) {
; CHECK-LABEL: @test_gep_phi_nonconst(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[T1:%.*]], align 8
; CHECK-NEXT: switch i64 [[ARG:%.*]], label [[BB9:%.*]] [
; CHECK-NEXT: i64 0, label [[BB11:%.*]]
; CHECK-NEXT: i64 1, label [[BB10:%.*]]
; CHECK-NEXT: ]
; CHECK: bb9:
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ARG]], [[BB10]] ], [ 16, [[BB:%.*]] ]
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ALLOCA]], i64 [[PHI]]
; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[GETELEMENTPTR]], align 8
; CHECK-NEXT: br label [[BB11]]
; CHECK: bb10:
; CHECK-NEXT: br label [[BB9]]
; CHECK: bb11:
; CHECK-NEXT: [[PHI12:%.*]] = phi ptr [ [[LOAD]], [[BB9]] ], [ undef, [[BB]] ]
; CHECK-NEXT: store i8 0, ptr [[PHI12]], align 1
; CHECK-NEXT: ret void
;
bb:
%alloca = alloca %t1, align 8
switch i64 %arg, label %bb9 [
i64 0, label %bb11
i64 1, label %bb10
]

bb9: ; preds = %bb10, %bb
%phi = phi i64 [ %arg , %bb10 ], [ 16, %bb ]
%getelementptr = getelementptr i8, ptr %alloca, i64 %phi
%load = load ptr, ptr %getelementptr, align 8
br label %bb11

bb10: ; preds = %bb
br label %bb9

bb11: ; preds = %bb9, %bb
%phi12 = phi ptr [ %load, %bb9 ], [ undef, %bb ]
store i8 0, ptr %phi12, align 1
ret void
}
declare ptr @foo()

declare i32 @__gxx_personality_v0(...)
Expand Down