Skip to content

Commit 9d38a54

Browse files
committed
Fix 50142
Fix a miss of further vectorization introduced in 50142, where we can only achieve zext (xor (any_true), -1). Now in test case simd-setcc-reductions.ll, it's converted to all_true.
1 parent 0824664 commit 9d38a54

File tree

2 files changed

+42
-41
lines changed

2 files changed

+42
-41
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3248,6 +3248,37 @@ static SDValue performSETCCCombine(SDNode *N,
32483248
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
32493249
SDLoc DL(N);
32503250
EVT VT = N->getValueType(0);
3251+
// N LHS LhsL LhsLL LhsLR InnerCond RHS Cond
3252+
// setcc (iN (bitcast (setcc vNi1 (vNiY X), <vNiY 0>, eq)), 0, eq
3253+
// => all_true (vNi1 X)
3254+
if (DCI.isBeforeLegalize() && VT.isScalarInteger() && (Cond == ISD::SETEQ) &&
3255+
(isNullConstant(RHS)) && LHS->getOpcode() == ISD::BITCAST) {
3256+
SDValue LhsL = LHS.getOperand(0);
3257+
EVT LhsLType = LhsL.getValueType();
3258+
if (LhsL.getOpcode() == ISD::SETCC) {
3259+
ISD::CondCode InnerCond =
3260+
cast<CondCodeSDNode>(LhsL->getOperand(2))->get();
3261+
if (InnerCond == ISD::SETEQ) {
3262+
SDValue LhsLL = LhsL.getOperand(0); // vNiY X
3263+
SDValue LhsLR = LhsL.getOperand(1); // <0>
3264+
unsigned NumElts = LhsLType.getVectorNumElements();
3265+
bool Vectorizable =
3266+
NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16;
3267+
EVT Width = MVT::getIntegerVT(128 / NumElts);
3268+
3269+
if (Vectorizable && LhsLR.getOpcode() == ISD::BUILD_VECTOR &&
3270+
LhsLType.isFixedLengthVector()) {
3271+
return DAG.getZExtOrTrunc(
3272+
DAG.getNode(
3273+
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3274+
{DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
3275+
DAG.getSExtOrTrunc(
3276+
LhsLL, DL, LhsLType.changeVectorElementType(Width))}),
3277+
DL, MVT::i1);
3278+
}
3279+
}
3280+
}
3281+
}
32513282

32523283
// setcc (iN (bitcast (vNi1 X))), 0, ne
32533284
// ==> any_true (vNi1 X)

llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll

Lines changed: 11 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,8 @@ define i32 @all_true_16_i8(<16 x i8> %v) {
77
; CHECK-LABEL: all_true_16_i8:
88
; CHECK: .functype all_true_16_i8 (v128) -> (i32)
99
; CHECK-NEXT: # %bb.0:
10-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
11-
; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0
12-
; CHECK-NEXT: v128.any_true $push2=, $pop1
13-
; CHECK-NEXT: i32.const $push3=, -1
14-
; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
15-
; CHECK-NEXT: i32.const $push5=, 1
16-
; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
17-
; CHECK-NEXT: return $pop6
10+
; CHECK-NEXT: i8x16.all_true $push0=, $0
11+
; CHECK-NEXT: return $pop0
1812
%1 = icmp eq <16 x i8> %v, zeroinitializer
1913
%2 = bitcast <16 x i1> %1 to i16
2014
%3 = icmp eq i16 %2, 0
@@ -27,14 +21,8 @@ define i32 @all_true_4_i32(<4 x i32> %v) {
2721
; CHECK-LABEL: all_true_4_i32:
2822
; CHECK: .functype all_true_4_i32 (v128) -> (i32)
2923
; CHECK-NEXT: # %bb.0:
30-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
31-
; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0
32-
; CHECK-NEXT: v128.any_true $push2=, $pop1
33-
; CHECK-NEXT: i32.const $push3=, -1
34-
; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
35-
; CHECK-NEXT: i32.const $push5=, 1
36-
; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
37-
; CHECK-NEXT: return $pop6
24+
; CHECK-NEXT: i32x4.all_true $push0=, $0
25+
; CHECK-NEXT: return $pop0
3826
%1 = icmp eq <4 x i32> %v, zeroinitializer
3927
%2 = bitcast <4 x i1> %1 to i4
4028
%3 = icmp eq i4 %2, 0
@@ -47,14 +35,8 @@ define i32 @all_true_8_i16(<8 x i16> %v) {
4735
; CHECK-LABEL: all_true_8_i16:
4836
; CHECK: .functype all_true_8_i16 (v128) -> (i32)
4937
; CHECK-NEXT: # %bb.0:
50-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
51-
; CHECK-NEXT: i16x8.eq $push1=, $0, $pop0
52-
; CHECK-NEXT: v128.any_true $push2=, $pop1
53-
; CHECK-NEXT: i32.const $push3=, -1
54-
; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
55-
; CHECK-NEXT: i32.const $push5=, 1
56-
; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
57-
; CHECK-NEXT: return $pop6
38+
; CHECK-NEXT: i16x8.all_true $push0=, $0
39+
; CHECK-NEXT: return $pop0
5840
%1 = icmp eq <8 x i16> %v, zeroinitializer
5941
%2 = bitcast <8 x i1> %1 to i8
6042
%3 = icmp eq i8 %2, 0
@@ -67,15 +49,9 @@ define i32 @all_true_4_i16(<4 x i16> %v) {
6749
; CHECK-LABEL: all_true_4_i16:
6850
; CHECK: .functype all_true_4_i16 (v128) -> (i32)
6951
; CHECK-NEXT: # %bb.0:
70-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
71-
; CHECK-NEXT: i16x8.eq $push1=, $0, $pop0
72-
; CHECK-NEXT: i32x4.extend_low_i16x8_s $push2=, $pop1
73-
; CHECK-NEXT: v128.any_true $push3=, $pop2
74-
; CHECK-NEXT: i32.const $push4=, -1
75-
; CHECK-NEXT: i32.xor $push5=, $pop3, $pop4
76-
; CHECK-NEXT: i32.const $push6=, 1
77-
; CHECK-NEXT: i32.and $push7=, $pop5, $pop6
78-
; CHECK-NEXT: return $pop7
52+
; CHECK-NEXT: i32x4.extend_low_i16x8_s $push0=, $0
53+
; CHECK-NEXT: i32x4.all_true $push1=, $pop0
54+
; CHECK-NEXT: return $pop1
7955
%1 = icmp eq <4 x i16> %v, zeroinitializer
8056
%2 = bitcast <4 x i1> %1 to i4
8157
%3 = icmp eq i4 %2, 0
@@ -88,14 +64,8 @@ define i32 @all_true_2_i64(<2 x i64> %v) {
8864
; CHECK-LABEL: all_true_2_i64:
8965
; CHECK: .functype all_true_2_i64 (v128) -> (i32)
9066
; CHECK-NEXT: # %bb.0:
91-
; CHECK-NEXT: v128.const $push0=, 0, 0
92-
; CHECK-NEXT: i64x2.eq $push1=, $0, $pop0
93-
; CHECK-NEXT: v128.any_true $push2=, $pop1
94-
; CHECK-NEXT: i32.const $push3=, -1
95-
; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
96-
; CHECK-NEXT: i32.const $push5=, 1
97-
; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
98-
; CHECK-NEXT: return $pop6
67+
; CHECK-NEXT: i64x2.all_true $push0=, $0
68+
; CHECK-NEXT: return $pop0
9969
%1 = icmp eq <2 x i64> %v, zeroinitializer
10070
%2 = bitcast <2 x i1> %1 to i2
10171
%3 = icmp eq i2 %2, 0

0 commit comments

Comments
 (0)