Skip to content

[InstCombine] Optimistically allow multiple shufflevector uses in foldOpPhi #114278

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/InstCombine/InstCombineInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
/// Given a binary operator, cast instruction, or select which has a PHI node
/// as operand #0, see if we can fold the instruction into the PHI (which is
/// only possible if all operands to the PHI are constants).
Instruction *foldOpIntoPhi(Instruction &I, PHINode *PN);
Instruction *foldOpIntoPhi(Instruction &I, PHINode *PN,
bool AllowMultipleUses = false);

/// For a binary operator with 2 phi operands, try to hoist the binary
/// operation before the phi. This can result in fewer instructions in
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2978,7 +2978,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
}
}
if (auto *PN = dyn_cast<PHINode>(LHS)) {
if (Instruction *I = foldOpIntoPhi(SVI, PN))
if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))
return I;
}
}
Expand Down
34 changes: 23 additions & 11 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1763,22 +1763,26 @@ static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN,
return nullptr;
}

Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN,
bool AllowMultipleUses) {
unsigned NumPHIValues = PN->getNumIncomingValues();
if (NumPHIValues == 0)
return nullptr;

// We normally only transform phis with a single use. However, if a PHI has
// multiple uses and they are all the same operation, we can fold *all* of the
// uses into the PHI.
if (!PN->hasOneUse()) {
bool OneUse = PN->hasOneUse();
bool IdenticalUsers = false;
if (!AllowMultipleUses && !OneUse) {
// Walk the use list for the instruction, comparing them to I.
for (User *U : PN->users()) {
Instruction *UI = cast<Instruction>(U);
if (UI != &I && !I.isIdenticalTo(UI))
return nullptr;
}
// Otherwise, we can replace *all* users with the new PHI we form.
IdenticalUsers = true;
}

// Check that all operands are phi-translatable.
Expand Down Expand Up @@ -1829,6 +1833,9 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
continue;
}

if (!OneUse && !IdenticalUsers)
return nullptr;

if (SeenNonSimplifiedInVal)
return nullptr; // More than one non-simplified value.
SeenNonSimplifiedInVal = true;
Expand Down Expand Up @@ -1890,17 +1897,22 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
for (unsigned i = 0; i != NumPHIValues; ++i)
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));

for (User *U : make_early_inc_range(PN->users())) {
Instruction *User = cast<Instruction>(U);
if (User == &I)
continue;
replaceInstUsesWith(*User, NewPN);
eraseInstFromFunction(*User);
if (IdenticalUsers) {
for (User *U : make_early_inc_range(PN->users())) {
Instruction *User = cast<Instruction>(U);
if (User == &I)
continue;
replaceInstUsesWith(*User, NewPN);
eraseInstFromFunction(*User);
}
OneUse = true;
}

replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
const_cast<PHINode &>(*NewPN),
const_cast<PHINode &>(*PN), DT);
if (OneUse) {
replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
const_cast<PHINode &>(*NewPN),
const_cast<PHINode &>(*PN), DT);
}
return replaceInstUsesWith(I, NewPN);
}

Expand Down
115 changes: 115 additions & 0 deletions llvm/test/Transforms/InstCombine/vec_shuffle-phi-multiuse.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -S -passes=instcombine | FileCheck %s

define <4 x i16> @f0(i1 %c, ptr %p0, ptr %p1) {
; CHECK-LABEL: define <4 x i16> @f0(
; CHECK-SAME: i1 [[C:%.*]], ptr [[P0:%.*]], ptr [[P1:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[MERGE:.*]]
; CHECK: [[THEN]]:
; CHECK-NEXT: [[LOAD0:%.*]] = load <4 x i16>, ptr [[P0]], align 16
; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i16>, ptr [[P1]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = sub <4 x i16> [[LOAD0]], [[LOAD1]]
; CHECK-NEXT: br label %[[MERGE]]
; CHECK: [[MERGE]]:
; CHECK-NEXT: [[SUB:%.*]] = phi <4 x i16> [ <i16 -87, i16 327, i16 51, i16 755>, %[[ENTRY]] ], [ [[TMP0]], %[[THEN]] ]
; CHECK-NEXT: ret <4 x i16> [[SUB]]
;
entry:
br i1 %c, label %then, label %merge

then:
%load0 = load <4 x i16>, ptr %p0, align 16
%load1 = load <4 x i16>, ptr %p1, align 16
%interleave = shufflevector <4 x i16> %load0, <4 x i16> %load1, <8 x i32> <i32 0, i32 7, i32 1, i32 6, i32 2, i32 5, i32 3, i32 4>
br label %merge

merge:
%phi = phi <8 x i16> [<i16 1, i16 22, i16 333, i16 4, i16 55, i16 6, i16 777, i16 88>, %entry], [%interleave, %then]
%shuf0 = shufflevector <8 x i16> %phi, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%shuf1 = shufflevector <8 x i16> %phi, <8 x i16> poison, <4 x i32> <i32 7, i32 5, i32 3, i32 1>
%sub = sub <4 x i16> %shuf0, %shuf1
ret <4 x i16> %sub
}

define void @deinterleave_interleave(ptr %p_begin, ptr %p_end, ptr %out) {
; CHECK-LABEL: define void @deinterleave_interleave(
; CHECK-SAME: ptr [[P_BEGIN:%.*]], ptr [[P_END:%.*]], ptr [[OUT:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[ACC:%.*]] = phi <4 x float> [ zeroinitializer, %[[ENTRY]] ], [ [[SUM_LOWS:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[ODDS:%.*]] = phi <4 x float> [ zeroinitializer, %[[ENTRY]] ], [ [[SUM_HIGHS:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[P_BEGIN]], %[[ENTRY]] ], [ [[P_INC:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VAL:%.*]] = load <4 x i8>, ptr [[P]], align 4
; CHECK-NEXT: [[HIGHS:%.*]] = ashr <4 x i8> [[VAL]], splat (i8 4)
; CHECK-NEXT: [[LOWS:%.*]] = and <4 x i8> [[VAL]], splat (i8 15)
; CHECK-NEXT: [[HIGHS_F:%.*]] = sitofp <4 x i8> [[HIGHS]] to <4 x float>
; CHECK-NEXT: [[LOWS_F:%.*]] = uitofp nneg <4 x i8> [[LOWS]] to <4 x float>
; CHECK-NEXT: [[SUM_LOWS]] = fadd <4 x float> [[ACC]], [[LOWS_F]]
; CHECK-NEXT: [[SUM_HIGHS]] = fadd <4 x float> [[ODDS]], [[HIGHS_F]]
; CHECK-NEXT: [[P_INC]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P_INC]], [[P_END]]
; CHECK-NEXT: br i1 [[C]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[INTERLEAVE:%.*]] = shufflevector <4 x float> [[SUM_LOWS]], <4 x float> [[SUM_HIGHS]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; CHECK-NEXT: store <8 x float> [[INTERLEAVE]], ptr [[OUT]], align 4
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:
%acc = phi <8 x float> [ zeroinitializer, %entry ], [ %interleave, %loop ]
%p = phi ptr [%p_begin, %entry ], [%p_inc, %loop]

%evens = shufflevector <8 x float> %acc, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%odds = shufflevector <8 x float> %acc, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>

%val = load <4 x i8>, ptr %p, align 4
%highs = ashr <4 x i8> %val, <i8 4, i8 4, i8 4, i8 4>
%lows = and <4 x i8> %val, <i8 15, i8 15, i8 15, i8 15>

%highs_f = sitofp <4 x i8> %highs to <4 x float>
%lows_f = sitofp <4 x i8> %lows to <4 x float>

%sum_lows = fadd <4 x float> %evens, %lows_f
%sum_highs = fadd <4 x float> %odds, %highs_f

%interleave = shufflevector <4 x float> %sum_lows, <4 x float> %sum_highs, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>

%p_inc = getelementptr inbounds i8, ptr %p, i32 4
%c = icmp eq ptr %p_inc, %p_end
br i1 %c, label %exit, label %loop

exit:
store <8 x float> %interleave, ptr %out, align 4
ret void
}

define <4 x i16> @f1(i1 %c, ptr %p) {
; CHECK-LABEL: define <4 x i16> @f1(
; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[MERGE:.*]]
; CHECK: [[THEN]]:
; CHECK-NEXT: store i32 42, ptr [[P]], align 4
; CHECK-NEXT: br label %[[MERGE]]
; CHECK: [[MERGE]]:
; CHECK-NEXT: [[XOR:%.*]] = phi <4 x i16> [ <i16 3, i16 346, i16 undef, i16 undef>, %[[ENTRY]] ], [ <i16 7, i16 74, i16 undef, i16 undef>, %[[THEN]] ]
; CHECK-NEXT: ret <4 x i16> [[XOR]]
;
entry:
br i1 %c, label %then, label %merge

then:
store i32 42, ptr %p, align 4
br label %merge

merge:
%phi = phi <4 x i16> [<i16 1, i16 22, i16 333, i16 4>, %entry], [<i16 555, i16 6, i16 77, i16 8>, %then]
%shuf0 = shufflevector <4 x i16> %phi, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%add1 = add <4 x i16> %phi, <i16 1, i16 1, i16 1, i16 1>
%xor = xor <4 x i16> %shuf0, %add1
ret <4 x i16> %xor
}
Loading