Skip to content

Commit 7687548

Browse files
authored
[InstCombine] Optimistically allow multiple shufflevector uses in foldOpPhi (#114278)
We would like to optimize situations of the form that happen after loop vectorization+SROA: ``` loop: %phi = phi zeroinitializer, %interleaved %deinterleave_a = shufflevector %phi, poison ; pick half of the lanes %deinterleave_b = shufflevector %phi, posion ; pick remaining lanes ... %a = ... %b = ... %interleaved = shufflevector %a, %b ; interleave lanes of a+b ``` where the interleave and de-interleave shuffle operations cancel each other out. This could be handled by `foldOpPhi` but does not currently work because it does not proceed when there are multiple uses of the `Phi` operation. This extends `foldOpPhi` to allow multiple `shufflevector` uses when they are shown to simplify for all `Phi` input values.
1 parent 38eaea7 commit 7687548

File tree

4 files changed

+141
-13
lines changed

4 files changed

+141
-13
lines changed

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
600600
/// Given a binary operator, cast instruction, or select which has a PHI node
601601
/// as operand #0, see if we can fold the instruction into the PHI (which is
602602
/// only possible if all operands to the PHI are constants).
603-
Instruction *foldOpIntoPhi(Instruction &I, PHINode *PN);
603+
Instruction *foldOpIntoPhi(Instruction &I, PHINode *PN,
604+
bool AllowMultipleUses = false);
604605

605606
/// For a binary operator with 2 phi operands, try to hoist the binary
606607
/// operation before the phi. This can result in fewer instructions in

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2978,7 +2978,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
29782978
}
29792979
}
29802980
if (auto *PN = dyn_cast<PHINode>(LHS)) {
2981-
if (Instruction *I = foldOpIntoPhi(SVI, PN))
2981+
if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))
29822982
return I;
29832983
}
29842984
}

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1763,22 +1763,26 @@ static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN,
17631763
return nullptr;
17641764
}
17651765

1766-
Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
1766+
Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN,
1767+
bool AllowMultipleUses) {
17671768
unsigned NumPHIValues = PN->getNumIncomingValues();
17681769
if (NumPHIValues == 0)
17691770
return nullptr;
17701771

17711772
// We normally only transform phis with a single use. However, if a PHI has
17721773
// multiple uses and they are all the same operation, we can fold *all* of the
17731774
// uses into the PHI.
1774-
if (!PN->hasOneUse()) {
1775+
bool OneUse = PN->hasOneUse();
1776+
bool IdenticalUsers = false;
1777+
if (!AllowMultipleUses && !OneUse) {
17751778
// Walk the use list for the instruction, comparing them to I.
17761779
for (User *U : PN->users()) {
17771780
Instruction *UI = cast<Instruction>(U);
17781781
if (UI != &I && !I.isIdenticalTo(UI))
17791782
return nullptr;
17801783
}
17811784
// Otherwise, we can replace *all* users with the new PHI we form.
1785+
IdenticalUsers = true;
17821786
}
17831787

17841788
// Check that all operands are phi-translatable.
@@ -1829,6 +1833,9 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
18291833
continue;
18301834
}
18311835

1836+
if (!OneUse && !IdenticalUsers)
1837+
return nullptr;
1838+
18321839
if (SeenNonSimplifiedInVal)
18331840
return nullptr; // More than one non-simplified value.
18341841
SeenNonSimplifiedInVal = true;
@@ -1890,17 +1897,22 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
18901897
for (unsigned i = 0; i != NumPHIValues; ++i)
18911898
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
18921899

1893-
for (User *U : make_early_inc_range(PN->users())) {
1894-
Instruction *User = cast<Instruction>(U);
1895-
if (User == &I)
1896-
continue;
1897-
replaceInstUsesWith(*User, NewPN);
1898-
eraseInstFromFunction(*User);
1900+
if (IdenticalUsers) {
1901+
for (User *U : make_early_inc_range(PN->users())) {
1902+
Instruction *User = cast<Instruction>(U);
1903+
if (User == &I)
1904+
continue;
1905+
replaceInstUsesWith(*User, NewPN);
1906+
eraseInstFromFunction(*User);
1907+
}
1908+
OneUse = true;
18991909
}
19001910

1901-
replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
1902-
const_cast<PHINode &>(*NewPN),
1903-
const_cast<PHINode &>(*PN), DT);
1911+
if (OneUse) {
1912+
replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
1913+
const_cast<PHINode &>(*NewPN),
1914+
const_cast<PHINode &>(*PN), DT);
1915+
}
19041916
return replaceInstUsesWith(I, NewPN);
19051917
}
19061918

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -S -passes=instcombine | FileCheck %s
3+
4+
define <4 x i16> @f0(i1 %c, ptr %p0, ptr %p1) {
5+
; CHECK-LABEL: define <4 x i16> @f0(
6+
; CHECK-SAME: i1 [[C:%.*]], ptr [[P0:%.*]], ptr [[P1:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[MERGE:.*]]
9+
; CHECK: [[THEN]]:
10+
; CHECK-NEXT: [[LOAD0:%.*]] = load <4 x i16>, ptr [[P0]], align 16
11+
; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i16>, ptr [[P1]], align 16
12+
; CHECK-NEXT: [[TMP0:%.*]] = sub <4 x i16> [[LOAD0]], [[LOAD1]]
13+
; CHECK-NEXT: br label %[[MERGE]]
14+
; CHECK: [[MERGE]]:
15+
; CHECK-NEXT: [[SUB:%.*]] = phi <4 x i16> [ <i16 -87, i16 327, i16 51, i16 755>, %[[ENTRY]] ], [ [[TMP0]], %[[THEN]] ]
16+
; CHECK-NEXT: ret <4 x i16> [[SUB]]
17+
;
18+
entry:
19+
br i1 %c, label %then, label %merge
20+
21+
then:
22+
%load0 = load <4 x i16>, ptr %p0, align 16
23+
%load1 = load <4 x i16>, ptr %p1, align 16
24+
%interleave = shufflevector <4 x i16> %load0, <4 x i16> %load1, <8 x i32> <i32 0, i32 7, i32 1, i32 6, i32 2, i32 5, i32 3, i32 4>
25+
br label %merge
26+
27+
merge:
28+
%phi = phi <8 x i16> [<i16 1, i16 22, i16 333, i16 4, i16 55, i16 6, i16 777, i16 88>, %entry], [%interleave, %then]
29+
%shuf0 = shufflevector <8 x i16> %phi, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
30+
%shuf1 = shufflevector <8 x i16> %phi, <8 x i16> poison, <4 x i32> <i32 7, i32 5, i32 3, i32 1>
31+
%sub = sub <4 x i16> %shuf0, %shuf1
32+
ret <4 x i16> %sub
33+
}
34+
35+
define void @deinterleave_interleave(ptr %p_begin, ptr %p_end, ptr %out) {
36+
; CHECK-LABEL: define void @deinterleave_interleave(
37+
; CHECK-SAME: ptr [[P_BEGIN:%.*]], ptr [[P_END:%.*]], ptr [[OUT:%.*]]) {
38+
; CHECK-NEXT: [[ENTRY:.*]]:
39+
; CHECK-NEXT: br label %[[LOOP:.*]]
40+
; CHECK: [[LOOP]]:
41+
; CHECK-NEXT: [[ACC:%.*]] = phi <4 x float> [ zeroinitializer, %[[ENTRY]] ], [ [[SUM_LOWS:%.*]], %[[LOOP]] ]
42+
; CHECK-NEXT: [[ODDS:%.*]] = phi <4 x float> [ zeroinitializer, %[[ENTRY]] ], [ [[SUM_HIGHS:%.*]], %[[LOOP]] ]
43+
; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[P_BEGIN]], %[[ENTRY]] ], [ [[P_INC:%.*]], %[[LOOP]] ]
44+
; CHECK-NEXT: [[VAL:%.*]] = load <4 x i8>, ptr [[P]], align 4
45+
; CHECK-NEXT: [[HIGHS:%.*]] = ashr <4 x i8> [[VAL]], splat (i8 4)
46+
; CHECK-NEXT: [[LOWS:%.*]] = and <4 x i8> [[VAL]], splat (i8 15)
47+
; CHECK-NEXT: [[HIGHS_F:%.*]] = sitofp <4 x i8> [[HIGHS]] to <4 x float>
48+
; CHECK-NEXT: [[LOWS_F:%.*]] = uitofp nneg <4 x i8> [[LOWS]] to <4 x float>
49+
; CHECK-NEXT: [[SUM_LOWS]] = fadd <4 x float> [[ACC]], [[LOWS_F]]
50+
; CHECK-NEXT: [[SUM_HIGHS]] = fadd <4 x float> [[ODDS]], [[HIGHS_F]]
51+
; CHECK-NEXT: [[P_INC]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
52+
; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P_INC]], [[P_END]]
53+
; CHECK-NEXT: br i1 [[C]], label %[[EXIT:.*]], label %[[LOOP]]
54+
; CHECK: [[EXIT]]:
55+
; CHECK-NEXT: [[INTERLEAVE:%.*]] = shufflevector <4 x float> [[SUM_LOWS]], <4 x float> [[SUM_HIGHS]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
56+
; CHECK-NEXT: store <8 x float> [[INTERLEAVE]], ptr [[OUT]], align 4
57+
; CHECK-NEXT: ret void
58+
;
59+
entry:
60+
br label %loop
61+
62+
loop:
63+
%acc = phi <8 x float> [ zeroinitializer, %entry ], [ %interleave, %loop ]
64+
%p = phi ptr [%p_begin, %entry ], [%p_inc, %loop]
65+
66+
%evens = shufflevector <8 x float> %acc, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
67+
%odds = shufflevector <8 x float> %acc, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
68+
69+
%val = load <4 x i8>, ptr %p, align 4
70+
%highs = ashr <4 x i8> %val, <i8 4, i8 4, i8 4, i8 4>
71+
%lows = and <4 x i8> %val, <i8 15, i8 15, i8 15, i8 15>
72+
73+
%highs_f = sitofp <4 x i8> %highs to <4 x float>
74+
%lows_f = sitofp <4 x i8> %lows to <4 x float>
75+
76+
%sum_lows = fadd <4 x float> %evens, %lows_f
77+
%sum_highs = fadd <4 x float> %odds, %highs_f
78+
79+
%interleave = shufflevector <4 x float> %sum_lows, <4 x float> %sum_highs, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
80+
81+
%p_inc = getelementptr inbounds i8, ptr %p, i32 4
82+
%c = icmp eq ptr %p_inc, %p_end
83+
br i1 %c, label %exit, label %loop
84+
85+
exit:
86+
store <8 x float> %interleave, ptr %out, align 4
87+
ret void
88+
}
89+
90+
define <4 x i16> @f1(i1 %c, ptr %p) {
91+
; CHECK-LABEL: define <4 x i16> @f1(
92+
; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
93+
; CHECK-NEXT: [[ENTRY:.*]]:
94+
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[MERGE:.*]]
95+
; CHECK: [[THEN]]:
96+
; CHECK-NEXT: store i32 42, ptr [[P]], align 4
97+
; CHECK-NEXT: br label %[[MERGE]]
98+
; CHECK: [[MERGE]]:
99+
; CHECK-NEXT: [[XOR:%.*]] = phi <4 x i16> [ <i16 3, i16 346, i16 undef, i16 undef>, %[[ENTRY]] ], [ <i16 7, i16 74, i16 undef, i16 undef>, %[[THEN]] ]
100+
; CHECK-NEXT: ret <4 x i16> [[XOR]]
101+
;
102+
entry:
103+
br i1 %c, label %then, label %merge
104+
105+
then:
106+
store i32 42, ptr %p, align 4
107+
br label %merge
108+
109+
merge:
110+
%phi = phi <4 x i16> [<i16 1, i16 22, i16 333, i16 4>, %entry], [<i16 555, i16 6, i16 77, i16 8>, %then]
111+
%shuf0 = shufflevector <4 x i16> %phi, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
112+
%add1 = add <4 x i16> %phi, <i16 1, i16 1, i16 1, i16 1>
113+
%xor = xor <4 x i16> %shuf0, %add1
114+
ret <4 x i16> %xor
115+
}

0 commit comments

Comments
 (0)