Skip to content

Commit 9dd83f5

Browse files
committed
[DAG] visitVECTOR_SHUFFLE - attempt to match commuted shuffles with MergeInnerShuffle.
Try to match "shuffle(C, shuffle(A, B, M0), M1) -> shuffle(A, B, M2)" etc. by using MergeInnerShuffle's commuted inner shuffle mode.
1 parent 00e6513 commit 9dd83f5

File tree

5 files changed

+299
-295
lines changed

5 files changed

+299
-295
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21196,23 +21196,28 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
2119621196
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
2119721197
// Don't try to fold shuffles with illegal type.
2119821198
// Only fold if this shuffle is the only user of the other shuffle.
21199-
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21200-
N->isOnlyUserOf(N0.getNode())) {
21201-
// The incoming shuffle must be of the same type as the result of the
21202-
// current shuffle.
21203-
auto *OtherSV = cast<ShuffleVectorSDNode>(N0);
21204-
assert(OtherSV->getOperand(0).getValueType() == VT &&
21205-
"Shuffle types don't match");
21206-
21207-
SDValue SV0, SV1;
21208-
SmallVector<int, 4> Mask;
21209-
if (MergeInnerShuffle(false, SVN, OtherSV, N1, TLI, SV0, SV1, Mask)) {
21210-
// Check if all indices in Mask are Undef. In case, propagate Undef.
21211-
if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21212-
return DAG.getUNDEF(VT);
21213-
21214-
return DAG.getVectorShuffle(VT, SDLoc(N), SV0 ? SV0 : DAG.getUNDEF(VT),
21215-
SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21199+
// Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
21200+
for (int i = 0; i != 2; ++i) {
21201+
if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
21202+
N->isOnlyUserOf(N->getOperand(i).getNode())) {
21203+
// The incoming shuffle must be of the same type as the result of the
21204+
// current shuffle.
21205+
auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
21206+
assert(OtherSV->getOperand(0).getValueType() == VT &&
21207+
"Shuffle types don't match");
21208+
21209+
SDValue SV0, SV1;
21210+
SmallVector<int, 4> Mask;
21211+
if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
21212+
SV0, SV1, Mask)) {
21213+
// Check if all indices in Mask are Undef. In case, propagate Undef.
21214+
if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21215+
return DAG.getUNDEF(VT);
21216+
21217+
return DAG.getVectorShuffle(VT, SDLoc(N),
21218+
SV0 ? SV0 : DAG.getUNDEF(VT),
21219+
SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21220+
}
2121621221
}
2121721222
}
2121821223

llvm/test/CodeGen/Thumb2/mve-shuffle.ll

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -135,19 +135,19 @@ define arm_aapcs_vfpcc <4 x i32> @shuffle3step_i32(<16 x i32> %src) {
135135
; CHECK: @ %bb.0: @ %entry
136136
; CHECK-NEXT: .vsave {d8, d9}
137137
; CHECK-NEXT: vpush {d8, d9}
138-
; CHECK-NEXT: vmov.f32 s14, s8
139-
; CHECK-NEXT: vmov.f32 s15, s11
140-
; CHECK-NEXT: vmov.f32 s16, s1
141-
; CHECK-NEXT: vmov.f32 s12, s2
142-
; CHECK-NEXT: vmov.f32 s17, s4
143-
; CHECK-NEXT: vmov.f32 s1, s3
144-
; CHECK-NEXT: vmov.f32 s18, s7
145-
; CHECK-NEXT: vmov.f32 s2, s6
146-
; CHECK-NEXT: vmov.f32 s19, s10
147-
; CHECK-NEXT: vmov.f32 s3, s9
148-
; CHECK-NEXT: vmov.f32 s13, s5
149-
; CHECK-NEXT: vadd.i32 q0, q0, q4
150-
; CHECK-NEXT: vadd.i32 q0, q0, q3
138+
; CHECK-NEXT: vmov.f32 s12, s1
139+
; CHECK-NEXT: vmov.f32 s16, s0
140+
; CHECK-NEXT: vmov.f32 s13, s4
141+
; CHECK-NEXT: vmov.f32 s17, s3
142+
; CHECK-NEXT: vmov.f32 s14, s7
143+
; CHECK-NEXT: vmov.f32 s18, s6
144+
; CHECK-NEXT: vmov.f32 s4, s2
145+
; CHECK-NEXT: vmov.f32 s6, s8
146+
; CHECK-NEXT: vmov.f32 s15, s10
147+
; CHECK-NEXT: vmov.f32 s19, s9
148+
; CHECK-NEXT: vadd.i32 q3, q4, q3
149+
; CHECK-NEXT: vmov.f32 s7, s11
150+
; CHECK-NEXT: vadd.i32 q0, q3, q1
151151
; CHECK-NEXT: vpop {d8, d9}
152152
; CHECK-NEXT: bx lr
153153
entry:
@@ -1164,19 +1164,19 @@ define arm_aapcs_vfpcc <4 x float> @shuffle3step_f32(<16 x float> %src) {
11641164
; CHECKFP: @ %bb.0: @ %entry
11651165
; CHECKFP-NEXT: .vsave {d8, d9}
11661166
; CHECKFP-NEXT: vpush {d8, d9}
1167-
; CHECKFP-NEXT: vmov.f32 s14, s8
1168-
; CHECKFP-NEXT: vmov.f32 s15, s11
1169-
; CHECKFP-NEXT: vmov.f32 s16, s1
1170-
; CHECKFP-NEXT: vmov.f32 s12, s2
1171-
; CHECKFP-NEXT: vmov.f32 s17, s4
1172-
; CHECKFP-NEXT: vmov.f32 s1, s3
1173-
; CHECKFP-NEXT: vmov.f32 s18, s7
1174-
; CHECKFP-NEXT: vmov.f32 s2, s6
1175-
; CHECKFP-NEXT: vmov.f32 s19, s10
1176-
; CHECKFP-NEXT: vmov.f32 s3, s9
1177-
; CHECKFP-NEXT: vmov.f32 s13, s5
1178-
; CHECKFP-NEXT: vadd.f32 q0, q0, q4
1179-
; CHECKFP-NEXT: vadd.f32 q0, q0, q3
1167+
; CHECKFP-NEXT: vmov.f32 s12, s1
1168+
; CHECKFP-NEXT: vmov.f32 s16, s0
1169+
; CHECKFP-NEXT: vmov.f32 s13, s4
1170+
; CHECKFP-NEXT: vmov.f32 s17, s3
1171+
; CHECKFP-NEXT: vmov.f32 s14, s7
1172+
; CHECKFP-NEXT: vmov.f32 s18, s6
1173+
; CHECKFP-NEXT: vmov.f32 s4, s2
1174+
; CHECKFP-NEXT: vmov.f32 s6, s8
1175+
; CHECKFP-NEXT: vmov.f32 s15, s10
1176+
; CHECKFP-NEXT: vmov.f32 s19, s9
1177+
; CHECKFP-NEXT: vadd.f32 q3, q4, q3
1178+
; CHECKFP-NEXT: vmov.f32 s7, s11
1179+
; CHECKFP-NEXT: vadd.f32 q0, q3, q1
11801180
; CHECKFP-NEXT: vpop {d8, d9}
11811181
; CHECKFP-NEXT: bx lr
11821182
entry:

0 commit comments

Comments
 (0)