Skip to content

Commit 7fce332

Browse files
committed
[SDAG] allow vector types for select->logic folds
This prepares codegen for a change that will remove the identical folds from IR because they are not poison-safe. See D93065 / D97360 for details. We already generically support scalar types, and there are various target-specific transforms that overlap the vector folds. For example, x86 recognizes the and patterns, but not or. We can end up with 1 extra instruction there, but I think that is still preferred over the blendv alternative that loads a constant vector. If this is not optimal, then it should be fixed with a later transform (this change is not expected to result in any regressions because InstCombine currently does the same thing). Removing custom code and supporting undefs in constant-pattern-matching can be follow-up changes. Differential Revision: https://reviews.llvm.org/D97730
1 parent 4096ae0 commit 7fce332

File tree

3 files changed

+20
-22
lines changed

3 files changed

+20
-22
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9302,31 +9302,32 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
93029302
}
93039303

93049304
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9305-
assert(N->getOpcode() == ISD::SELECT && "Expected a select");
9305+
assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
9306+
"Expected a (v)select");
93069307
SDValue Cond = N->getOperand(0);
93079308
SDValue T = N->getOperand(1), F = N->getOperand(2);
93089309
EVT VT = N->getValueType(0);
9309-
if (VT != Cond.getValueType() || VT != MVT::i1)
9310+
if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
93109311
return SDValue();
93119312

93129313
// select Cond, Cond, F --> or Cond, F
93139314
// select Cond, 1, F --> or Cond, F
9314-
if (Cond == T || isOneConstant(T))
9315+
if (Cond == T || isOneOrOneSplat(T))
93159316
return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
93169317

93179318
// select Cond, T, Cond --> and Cond, T
93189319
// select Cond, T, 0 --> and Cond, T
9319-
if (Cond == F || isNullConstant(F))
9320+
if (Cond == F || isNullOrNullSplat(F))
93209321
return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
93219322

93229323
// select Cond, T, 1 --> or (not Cond), T
9323-
if (isOneConstant(F)) {
9324+
if (isOneOrOneSplat(F)) {
93249325
SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
93259326
return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
93269327
}
93279328

93289329
// select Cond, 0, F --> and (not Cond), F
9329-
if (isNullConstant(T)) {
9330+
if (isNullOrNullSplat(T)) {
93309331
SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
93319332
return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
93329333
}
@@ -9788,6 +9789,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
97889789
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
97899790
return V;
97909791

9792+
if (SDValue V = foldBoolSelectToLogic(N, DAG))
9793+
return V;
9794+
97919795
// vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
97929796
if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
97939797
return DAG.getSelect(DL, VT, F, N2, N1);

llvm/test/CodeGen/AArch64/select-with-and-or.ll

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ define <4 x i1> @and_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
6666
; CHECK: // %bb.0:
6767
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
6868
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
69-
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
69+
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
7070
; CHECK-NEXT: xtn v0.4h, v0.4s
7171
; CHECK-NEXT: ret
7272
%a = icmp eq <4 x i32> %x, %y
@@ -80,10 +80,8 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
8080
; CHECK: // %bb.0:
8181
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
8282
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
83+
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
8384
; CHECK-NEXT: xtn v0.4h, v0.4s
84-
; CHECK-NEXT: xtn v1.4h, v1.4s
85-
; CHECK-NEXT: movi v2.4h, #1
86-
; CHECK-NEXT: bsl v0.8b, v2.8b, v1.8b
8785
; CHECK-NEXT: ret
8886
%a = icmp eq <4 x i32> %x, %y
8987
%b = icmp sgt <4 x i32> %z, %w
@@ -96,9 +94,8 @@ define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>
9694
; CHECK: // %bb.0:
9795
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
9896
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
97+
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
9998
; CHECK-NEXT: xtn v0.4h, v0.4s
100-
; CHECK-NEXT: xtn v1.4h, v1.4s
101-
; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b
10299
; CHECK-NEXT: ret
103100
%a = icmp eq <4 x i32> %x, %y
104101
%b = icmp sgt <4 x i32> %z, %w
@@ -111,12 +108,8 @@ define <4 x i1> @or_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>
111108
; CHECK: // %bb.0:
112109
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
113110
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
114-
; CHECK-NEXT: movi v2.4h, #1
115-
; CHECK-NEXT: xtn v3.4h, v0.4s
116-
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
111+
; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b
117112
; CHECK-NEXT: xtn v0.4h, v0.4s
118-
; CHECK-NEXT: bic v1.8b, v2.8b, v3.8b
119-
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
120113
; CHECK-NEXT: ret
121114
%a = icmp eq <4 x i32> %x, %y
122115
%b = icmp sgt <4 x i32> %z, %w

llvm/test/CodeGen/X86/select-with-and-or.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
233233
; CHECK: # %bb.0:
234234
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
235235
; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
236-
; CHECK-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
236+
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
237237
; CHECK-NEXT: retq
238238
%a = icmp eq <4 x i32> %x, %y
239239
%b = icmp sgt <4 x i32> %z, %w
@@ -244,9 +244,9 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
244244
define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
245245
; CHECK-LABEL: and_not_vec:
246246
; CHECK: # %bb.0:
247-
; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
248247
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
249-
; CHECK-NEXT: vpandn %xmm2, %xmm0, %xmm0
248+
; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
249+
; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
250250
; CHECK-NEXT: retq
251251
%a = icmp eq <4 x i32> %x, %y
252252
%b = icmp sgt <4 x i32> %z, %w
@@ -258,9 +258,10 @@ define <4 x i1> @or_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>
258258
; CHECK-LABEL: or_not_vec:
259259
; CHECK: # %bb.0:
260260
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
261+
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
262+
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
261263
; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
262-
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
263-
; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
264+
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
264265
; CHECK-NEXT: retq
265266
%a = icmp eq <4 x i32> %x, %y
266267
%b = icmp sgt <4 x i32> %z, %w

0 commit comments

Comments
 (0)