Skip to content

Commit caacf86

Browse files
authored
[DAG] Fold freeze(shuffle(x,y,m)) -> shuffle(freeze(x),freeze(y),m) (#90952)
If the shuffle mask contains no undef elements, then we can move the freeze through a shuffle node. This requires special case handling to create a new ShuffleVectorSDNode. Includes VECTOR_SHUFFLE support for isGuaranteedNotToBeUndefOrPoison / canCreateUndefOrPoison.
1 parent 1e3c630 commit caacf86

File tree

7 files changed

+478
-754
lines changed

7 files changed

+478
-754
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15462,9 +15462,11 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1546215462
N0->getNumValues() != 1 || !N0->hasOneUse())
1546315463
return SDValue();
1546415464

15465-
bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
15466-
N0.getOpcode() == ISD::BUILD_PAIR ||
15467-
N0.getOpcode() == ISD::CONCAT_VECTORS;
15465+
bool AllowMultipleMaybePoisonOperands =
15466+
N0.getOpcode() == ISD::BUILD_VECTOR ||
15467+
N0.getOpcode() == ISD::BUILD_PAIR ||
15468+
N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
15469+
N0.getOpcode() == ISD::CONCAT_VECTORS;
1546815470

1546915471
// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
1547015472
// ones" or "constant" into something that depends on FrozenUndef. We can
@@ -15537,8 +15539,16 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1553715539
if (Op.getOpcode() == ISD::UNDEF)
1553815540
Op = DAG.getFreeze(Op);
1553915541
}
15540-
// NOTE: this strips poison generating flags.
15541-
SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15542+
15543+
SDValue R;
15544+
if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
15545+
// Special case handling for ShuffleVectorSDNode nodes.
15546+
R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
15547+
SVN->getMask());
15548+
} else {
15549+
// NOTE: this strips poison generating flags.
15550+
R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15551+
}
1554215552
assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
1554315553
"Can't create node that may be undef/poison!");
1554415554
return R;

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5088,6 +5088,24 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
50885088
}
50895089
return true;
50905090

5091+
case ISD::VECTOR_SHUFFLE: {
5092+
APInt DemandedLHS, DemandedRHS;
5093+
auto *SVN = cast<ShuffleVectorSDNode>(Op);
5094+
if (!getShuffleDemandedElts(DemandedElts.getBitWidth(), SVN->getMask(),
5095+
DemandedElts, DemandedLHS, DemandedRHS,
5096+
/*AllowUndefElts=*/false))
5097+
return false;
5098+
if (!DemandedLHS.isZero() &&
5099+
!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedLHS,
5100+
PoisonOnly, Depth + 1))
5101+
return false;
5102+
if (!DemandedRHS.isZero() &&
5103+
!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedRHS,
5104+
PoisonOnly, Depth + 1))
5105+
return false;
5106+
return true;
5107+
}
5108+
50915109
// TODO: Search for noundef attributes from library functions.
50925110

50935111
// TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
@@ -5225,6 +5243,15 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
52255243
return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
52265244
}
52275245

5246+
case ISD::VECTOR_SHUFFLE: {
5247+
// Check for any demanded shuffle element that is undef.
5248+
auto *SVN = cast<ShuffleVectorSDNode>(Op);
5249+
for (auto [Idx, Elt] : enumerate(SVN->getMask()))
5250+
if (Elt < 0 && DemandedElts[Idx])
5251+
return true;
5252+
return false;
5253+
}
5254+
52285255
default:
52295256
// Allow the target to implement this method for its nodes.
52305257
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||

llvm/test/CodeGen/SystemZ/pr60413.ll

Lines changed: 86 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -13,114 +13,110 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #0
1313
define dso_local void @m() local_unnamed_addr #1 {
1414
; CHECK-LABEL: m:
1515
; CHECK: # %bb.0: # %entry
16-
; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
16+
; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
1717
; CHECK-NEXT: aghi %r15, -168
18-
; CHECK-NEXT: llhrl %r2, f+4
19-
; CHECK-NEXT: sll %r2, 8
20-
; CHECK-NEXT: larl %r1, f
21-
; CHECK-NEXT: ic %r2, 6(%r1)
22-
; CHECK-NEXT: larl %r1, e
23-
; CHECK-NEXT: lb %r0, 3(%r1)
24-
; CHECK-NEXT: clfi %r2, 128
18+
; CHECK-NEXT: lhrl %r1, f+4
19+
; CHECK-NEXT: sll %r1, 8
20+
; CHECK-NEXT: larl %r2, f
21+
; CHECK-NEXT: ic %r1, 6(%r2)
22+
; CHECK-NEXT: larl %r2, e
23+
; CHECK-NEXT: lb %r0, 3(%r2)
24+
; CHECK-NEXT: vlvgp %v0, %r0, %r1
25+
; CHECK-NEXT: vlvgp %v1, %r1, %r0
26+
; CHECK-NEXT: vlvgf %v1, %r1, 0
27+
; CHECK-NEXT: vlvgf %v1, %r1, 2
28+
; CHECK-NEXT: vlvgp %v2, %r1, %r1
29+
; CHECK-NEXT: # kill: def $r1l killed $r1l killed $r1d
30+
; CHECK-NEXT: nilh %r1, 255
31+
; CHECK-NEXT: chi %r1, 128
2532
; CHECK-NEXT: ipm %r1
2633
; CHECK-NEXT: risbg %r1, %r1, 63, 191, 36
27-
; CHECK-NEXT: vlvgp %v1, %r2, %r0
28-
; CHECK-NEXT: vlvgf %v1, %r2, 0
29-
; CHECK-NEXT: vlvgf %v1, %r2, 2
30-
; CHECK-NEXT: vlvgp %v0, %r0, %r2
31-
; CHECK-NEXT: vlvgp %v2, %r2, %r2
32-
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
33-
; CHECK-NEXT: nilh %r2, 255
34-
; CHECK-NEXT: chi %r2, 128
35-
; CHECK-NEXT: ipm %r2
36-
; CHECK-NEXT: risbg %r2, %r2, 63, 191, 36
3734
; CHECK-NEXT: vlvgf %v0, %r0, 0
3835
; CHECK-NEXT: vlvgf %v0, %r0, 2
39-
; CHECK-NEXT: vrepf %v2, %v2, 1
4036
; CHECK-NEXT: vgbm %v3, 30583
4137
; CHECK-NEXT: vn %v0, %v0, %v3
4238
; CHECK-NEXT: vn %v1, %v1, %v3
39+
; CHECK-NEXT: vrepf %v2, %v2, 1
4340
; CHECK-NEXT: vn %v2, %v2, %v3
4441
; CHECK-NEXT: vrepif %v3, 127
4542
; CHECK-NEXT: vchlf %v1, %v1, %v3
46-
; CHECK-NEXT: vlgvf %r12, %v1, 0
43+
; CHECK-NEXT: vlgvf %r13, %v1, 0
4744
; CHECK-NEXT: vchlf %v2, %v2, %v3
48-
; CHECK-NEXT: vlgvf %r4, %v2, 1
49-
; CHECK-NEXT: nilf %r4, 1
50-
; CHECK-NEXT: vlgvf %r5, %v2, 0
51-
; CHECK-NEXT: risbg %r3, %r5, 48, 176, 15
52-
; CHECK-NEXT: rosbg %r3, %r4, 32, 49, 14
53-
; CHECK-NEXT: vlgvf %r14, %v2, 2
45+
; CHECK-NEXT: vlgvf %r3, %v2, 1
46+
; CHECK-NEXT: nilf %r3, 1
47+
; CHECK-NEXT: vlgvf %r4, %v2, 0
48+
; CHECK-NEXT: risbg %r2, %r4, 48, 176, 15
49+
; CHECK-NEXT: rosbg %r2, %r3, 32, 49, 14
50+
; CHECK-NEXT: vlgvf %r5, %v2, 2
51+
; CHECK-NEXT: nilf %r5, 1
52+
; CHECK-NEXT: rosbg %r2, %r5, 32, 50, 13
53+
; CHECK-NEXT: vlgvf %r14, %v2, 3
5454
; CHECK-NEXT: nilf %r14, 1
55-
; CHECK-NEXT: rosbg %r3, %r14, 32, 50, 13
56-
; CHECK-NEXT: vlgvf %r13, %v2, 3
57-
; CHECK-NEXT: nilf %r13, 1
58-
; CHECK-NEXT: rosbg %r3, %r13, 32, 51, 12
59-
; CHECK-NEXT: rosbg %r3, %r12, 52, 52, 11
60-
; CHECK-NEXT: vlgvf %r12, %v1, 1
61-
; CHECK-NEXT: rosbg %r3, %r12, 53, 53, 10
62-
; CHECK-NEXT: vlgvf %r12, %v1, 2
63-
; CHECK-NEXT: rosbg %r3, %r12, 54, 54, 9
64-
; CHECK-NEXT: vlgvf %r12, %v1, 3
65-
; CHECK-NEXT: rosbg %r3, %r12, 55, 55, 8
55+
; CHECK-NEXT: rosbg %r2, %r14, 32, 51, 12
56+
; CHECK-NEXT: rosbg %r2, %r13, 52, 52, 11
57+
; CHECK-NEXT: vlgvf %r13, %v1, 1
58+
; CHECK-NEXT: rosbg %r2, %r13, 53, 53, 10
59+
; CHECK-NEXT: vlgvf %r13, %v1, 2
60+
; CHECK-NEXT: rosbg %r2, %r13, 54, 54, 9
61+
; CHECK-NEXT: vlgvf %r13, %v1, 3
62+
; CHECK-NEXT: rosbg %r2, %r13, 55, 55, 8
6663
; CHECK-NEXT: vchlf %v0, %v0, %v3
67-
; CHECK-NEXT: vlgvf %r12, %v0, 0
68-
; CHECK-NEXT: rosbg %r3, %r12, 56, 56, 7
69-
; CHECK-NEXT: vlgvf %r12, %v0, 1
70-
; CHECK-NEXT: rosbg %r3, %r12, 57, 57, 6
71-
; CHECK-NEXT: vlgvf %r12, %v0, 2
72-
; CHECK-NEXT: rosbg %r3, %r12, 58, 58, 5
73-
; CHECK-NEXT: vlgvf %r12, %v0, 3
74-
; CHECK-NEXT: rosbg %r3, %r12, 59, 59, 4
75-
; CHECK-NEXT: nilf %r5, 1
76-
; CHECK-NEXT: rosbg %r3, %r5, 32, 60, 3
77-
; CHECK-NEXT: rosbg %r3, %r4, 32, 61, 2
78-
; CHECK-NEXT: rosbg %r3, %r14, 32, 62, 1
79-
; CHECK-NEXT: or %r3, %r13
80-
; CHECK-NEXT: vlgvb %r5, %v0, 1
81-
; CHECK-NEXT: vlgvb %r4, %v0, 0
82-
; CHECK-NEXT: risbg %r4, %r4, 48, 176, 15
83-
; CHECK-NEXT: rosbg %r4, %r5, 49, 49, 14
84-
; CHECK-NEXT: vlgvb %r5, %v0, 2
85-
; CHECK-NEXT: rosbg %r4, %r5, 50, 50, 13
86-
; CHECK-NEXT: vlgvb %r5, %v0, 3
87-
; CHECK-NEXT: rosbg %r4, %r5, 51, 51, 12
88-
; CHECK-NEXT: vlgvb %r5, %v0, 4
89-
; CHECK-NEXT: rosbg %r4, %r5, 52, 52, 11
90-
; CHECK-NEXT: vlgvb %r5, %v0, 5
91-
; CHECK-NEXT: rosbg %r4, %r5, 53, 53, 10
92-
; CHECK-NEXT: vlgvb %r5, %v0, 6
93-
; CHECK-NEXT: rosbg %r4, %r5, 54, 54, 9
94-
; CHECK-NEXT: vlgvb %r5, %v0, 7
95-
; CHECK-NEXT: rosbg %r4, %r5, 55, 55, 8
96-
; CHECK-NEXT: vlgvb %r5, %v0, 8
97-
; CHECK-NEXT: rosbg %r4, %r5, 56, 56, 7
98-
; CHECK-NEXT: vlgvb %r5, %v0, 9
99-
; CHECK-NEXT: rosbg %r4, %r5, 57, 57, 6
100-
; CHECK-NEXT: vlgvb %r5, %v0, 10
101-
; CHECK-NEXT: rosbg %r4, %r5, 58, 58, 5
102-
; CHECK-NEXT: vlgvb %r5, %v0, 11
103-
; CHECK-NEXT: rosbg %r4, %r5, 59, 59, 4
104-
; CHECK-NEXT: vlgvb %r5, %v0, 12
105-
; CHECK-NEXT: rosbg %r4, %r5, 60, 60, 3
106-
; CHECK-NEXT: vlgvb %r5, %v0, 13
107-
; CHECK-NEXT: rosbg %r4, %r5, 61, 61, 2
108-
; CHECK-NEXT: vlgvb %r5, %v0, 14
109-
; CHECK-NEXT: rosbg %r4, %r5, 62, 62, 1
110-
; CHECK-NEXT: vlgvb %r5, %v0, 15
111-
; CHECK-NEXT: rosbg %r4, %r5, 63, 63, 0
112-
; CHECK-NEXT: xilf %r4, 4294967295
113-
; CHECK-NEXT: or %r4, %r3
114-
; CHECK-NEXT: tmll %r4, 65535
115-
; CHECK-NEXT: ipm %r3
116-
; CHECK-NEXT: afi %r3, -268435456
117-
; CHECK-NEXT: srl %r3, 31
64+
; CHECK-NEXT: vlgvf %r13, %v0, 0
65+
; CHECK-NEXT: rosbg %r2, %r13, 56, 56, 7
66+
; CHECK-NEXT: vlgvf %r13, %v0, 1
67+
; CHECK-NEXT: rosbg %r2, %r13, 57, 57, 6
68+
; CHECK-NEXT: vlgvf %r13, %v0, 2
69+
; CHECK-NEXT: rosbg %r2, %r13, 58, 58, 5
70+
; CHECK-NEXT: vlgvf %r13, %v0, 3
71+
; CHECK-NEXT: rosbg %r2, %r13, 59, 59, 4
72+
; CHECK-NEXT: nilf %r4, 1
73+
; CHECK-NEXT: rosbg %r2, %r4, 32, 60, 3
74+
; CHECK-NEXT: rosbg %r2, %r3, 32, 61, 2
75+
; CHECK-NEXT: rosbg %r2, %r5, 32, 62, 1
76+
; CHECK-NEXT: or %r2, %r14
77+
; CHECK-NEXT: vlgvb %r4, %v0, 1
78+
; CHECK-NEXT: vlgvb %r3, %v0, 0
79+
; CHECK-NEXT: risbg %r3, %r3, 48, 176, 15
80+
; CHECK-NEXT: rosbg %r3, %r4, 49, 49, 14
81+
; CHECK-NEXT: vlgvb %r4, %v0, 2
82+
; CHECK-NEXT: rosbg %r3, %r4, 50, 50, 13
83+
; CHECK-NEXT: vlgvb %r4, %v0, 3
84+
; CHECK-NEXT: rosbg %r3, %r4, 51, 51, 12
85+
; CHECK-NEXT: vlgvb %r4, %v0, 4
86+
; CHECK-NEXT: rosbg %r3, %r4, 52, 52, 11
87+
; CHECK-NEXT: vlgvb %r4, %v0, 5
88+
; CHECK-NEXT: rosbg %r3, %r4, 53, 53, 10
89+
; CHECK-NEXT: vlgvb %r4, %v0, 6
90+
; CHECK-NEXT: rosbg %r3, %r4, 54, 54, 9
91+
; CHECK-NEXT: vlgvb %r4, %v0, 7
92+
; CHECK-NEXT: rosbg %r3, %r4, 55, 55, 8
93+
; CHECK-NEXT: vlgvb %r4, %v0, 8
94+
; CHECK-NEXT: rosbg %r3, %r4, 56, 56, 7
95+
; CHECK-NEXT: vlgvb %r4, %v0, 9
96+
; CHECK-NEXT: rosbg %r3, %r4, 57, 57, 6
97+
; CHECK-NEXT: vlgvb %r4, %v0, 10
98+
; CHECK-NEXT: rosbg %r3, %r4, 58, 58, 5
99+
; CHECK-NEXT: vlgvb %r4, %v0, 11
100+
; CHECK-NEXT: rosbg %r3, %r4, 59, 59, 4
101+
; CHECK-NEXT: vlgvb %r4, %v0, 12
102+
; CHECK-NEXT: rosbg %r3, %r4, 60, 60, 3
103+
; CHECK-NEXT: vlgvb %r4, %v0, 13
104+
; CHECK-NEXT: rosbg %r3, %r4, 61, 61, 2
105+
; CHECK-NEXT: vlgvb %r4, %v0, 14
106+
; CHECK-NEXT: rosbg %r3, %r4, 62, 62, 1
107+
; CHECK-NEXT: vlgvb %r4, %v0, 15
108+
; CHECK-NEXT: rosbg %r3, %r4, 63, 63, 0
109+
; CHECK-NEXT: xilf %r3, 4294967295
110+
; CHECK-NEXT: or %r3, %r2
111+
; CHECK-NEXT: tmll %r3, 65535
112+
; CHECK-NEXT: ipm %r2
113+
; CHECK-NEXT: afi %r2, -268435456
114+
; CHECK-NEXT: srl %r2, 31
118115
; CHECK-NEXT: nr %r2, %r1
119-
; CHECK-NEXT: nr %r2, %r3
120116
; CHECK-NEXT: nr %r2, %r0
121117
; CHECK-NEXT: larl %r1, g
122118
; CHECK-NEXT: stc %r2, 0(%r1)
123-
; CHECK-NEXT: lmg %r12, %r15, 264(%r15)
119+
; CHECK-NEXT: lmg %r13, %r15, 272(%r15)
124120
; CHECK-NEXT: br %r14
125121
entry:
126122
%n = alloca i32, align 4

llvm/test/CodeGen/X86/freeze-binary.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -546,8 +546,8 @@ define <8 x i16> @freeze_ashr_vec(<8 x i16> %a0) nounwind {
546546
define <4 x i32> @freeze_ashr_vec_outofrange(<4 x i32> %a0) nounwind {
547547
; X86-LABEL: freeze_ashr_vec_outofrange:
548548
; X86: # %bb.0:
549-
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
550549
; X86-NEXT: psrad $1, %xmm0
550+
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
551551
; X86-NEXT: psrad $2, %xmm0
552552
; X86-NEXT: retl
553553
;
@@ -660,8 +660,8 @@ define <8 x i16> @freeze_lshr_vec(<8 x i16> %a0) nounwind {
660660
define <4 x i32> @freeze_lshr_vec_outofrange(<4 x i32> %a0) nounwind {
661661
; X86-LABEL: freeze_lshr_vec_outofrange:
662662
; X86: # %bb.0:
663-
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
664663
; X86-NEXT: psrld $1, %xmm0
664+
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
665665
; X86-NEXT: psrld $2, %xmm0
666666
; X86-NEXT: retl
667667
;

0 commit comments

Comments
 (0)