Skip to content

Commit e9c9f8f

Browse files
authored
[WebAssembly] Fold any/alltrue (setcc x, 0, eq/ne) to [not] any/alltrue x (#144741)
Fixes #50142, a miss of further vectorization, where we can only achieve zext (xor (any_true), -1). Now in test case simd-setcc-reductions, it's converted to all_true. Also fixes #145177, which is all_true (setcc x, 0, eq) -> not any_true any_true (setcc x, 0, ne) -> any_true all_true (setcc x, 0, ne) -> all_true --------- Co-authored-by: badumbatish <--show-origin>
1 parent 4a8c1f7 commit e9c9f8f

File tree

3 files changed

+195
-4
lines changed

3 files changed

+195
-4
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3286,6 +3286,59 @@ static SDValue performBitcastCombine(SDNode *N,
32863286
return SDValue();
32873287
}
32883288

3289+
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
3290+
// any_true (setcc <X>, 0, eq) => (not (all_true X))
3291+
// all_true (setcc <X>, 0, eq) => (not (any_true X))
3292+
// any_true (setcc <X>, 0, ne) => (any_true X)
3293+
// all_true (setcc <X>, 0, ne) => (all_true X)
3294+
assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3295+
using namespace llvm::SDPatternMatch;
3296+
3297+
SDValue LHS;
3298+
if (!sd_match(N->getOperand(1),
3299+
m_c_SetCC(m_Value(LHS), m_Zero(), m_CondCode())))
3300+
return SDValue();
3301+
EVT LT = LHS.getValueType();
3302+
if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3303+
return SDValue();
3304+
3305+
auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3306+
ISD::CondCode SetType,
3307+
Intrinsic::WASMIntrinsics InPost) {
3308+
if (N->getConstantOperandVal(0) != InPre)
3309+
return SDValue();
3310+
3311+
SDValue LHS;
3312+
if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3313+
m_SpecificCondCode(SetType))))
3314+
return SDValue();
3315+
3316+
SDLoc DL(N);
3317+
SDValue Ret = DAG.getZExtOrTrunc(
3318+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3319+
{DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3320+
DL, MVT::i1);
3321+
if (SetType == ISD::SETEQ)
3322+
Ret = DAG.getNOT(DL, Ret, MVT::i1);
3323+
return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3324+
};
3325+
3326+
if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3327+
Intrinsic::wasm_alltrue))
3328+
return AnyTrueEQ;
3329+
if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3330+
Intrinsic::wasm_anytrue))
3331+
return AllTrueEQ;
3332+
if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3333+
Intrinsic::wasm_anytrue))
3334+
return AnyTrueNE;
3335+
if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3336+
Intrinsic::wasm_alltrue))
3337+
return AllTrueNE;
3338+
3339+
return SDValue();
3340+
}
3341+
32893342
template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
32903343
Intrinsic::ID Intrin>
32913344
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) {
@@ -3474,8 +3527,11 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
34743527
return performVectorTruncZeroCombine(N, DCI);
34753528
case ISD::TRUNCATE:
34763529
return performTruncateCombine(N, DCI);
3477-
case ISD::INTRINSIC_WO_CHAIN:
3530+
case ISD::INTRINSIC_WO_CHAIN: {
3531+
if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG))
3532+
return AnyAllCombine;
34783533
return performLowerPartialReduction(N, DCI.DAG);
3534+
}
34793535
case ISD::MUL:
34803536
return performMulCombine(N, DCI.DAG);
34813537
}
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
3+
4+
target triple = "wasm64"
5+
6+
define i32 @all_true_16_i8(<16 x i8> %v) {
7+
; CHECK-LABEL: all_true_16_i8:
8+
; CHECK: .functype all_true_16_i8 (v128) -> (i32)
9+
; CHECK-NEXT: # %bb.0:
10+
; CHECK-NEXT: i8x16.all_true $push0=, $0
11+
; CHECK-NEXT: return $pop0
12+
%1 = icmp eq <16 x i8> %v, zeroinitializer
13+
%2 = bitcast <16 x i1> %1 to i16
14+
%3 = icmp eq i16 %2, 0
15+
%conv3 = zext i1 %3 to i32
16+
ret i32 %conv3
17+
}
18+
19+
20+
define i32 @all_true_4_i32(<4 x i32> %v) {
21+
; CHECK-LABEL: all_true_4_i32:
22+
; CHECK: .functype all_true_4_i32 (v128) -> (i32)
23+
; CHECK-NEXT: # %bb.0:
24+
; CHECK-NEXT: i32x4.all_true $push0=, $0
25+
; CHECK-NEXT: return $pop0
26+
%1 = icmp eq <4 x i32> %v, zeroinitializer
27+
%2 = bitcast <4 x i1> %1 to i4
28+
%3 = icmp eq i4 %2, 0
29+
%conv3 = zext i1 %3 to i32
30+
ret i32 %conv3
31+
}
32+
33+
34+
define i32 @all_true_8_i16(<8 x i16> %v) {
35+
; CHECK-LABEL: all_true_8_i16:
36+
; CHECK: .functype all_true_8_i16 (v128) -> (i32)
37+
; CHECK-NEXT: # %bb.0:
38+
; CHECK-NEXT: i16x8.all_true $push0=, $0
39+
; CHECK-NEXT: return $pop0
40+
%1 = icmp eq <8 x i16> %v, zeroinitializer
41+
%2 = bitcast <8 x i1> %1 to i8
42+
%3 = icmp eq i8 %2, 0
43+
%conv3 = zext i1 %3 to i32
44+
ret i32 %conv3
45+
}
46+
47+
48+
define i32 @all_true_2_i64(<2 x i64> %v) {
49+
; CHECK-LABEL: all_true_2_i64:
50+
; CHECK: .functype all_true_2_i64 (v128) -> (i32)
51+
; CHECK-NEXT: # %bb.0:
52+
; CHECK-NEXT: i64x2.all_true $push0=, $0
53+
; CHECK-NEXT: return $pop0
54+
%1 = icmp eq <2 x i64> %v, zeroinitializer
55+
%2 = bitcast <2 x i1> %1 to i2
56+
%3 = icmp eq i2 %2, 0
57+
%conv3 = zext i1 %3 to i32
58+
ret i32 %conv3
59+
}
60+
61+
62+
define i32 @all_true_4_i64(<4 x i64> %v) {
63+
; CHECK-LABEL: all_true_4_i64:
64+
; CHECK: .functype all_true_4_i64 (v128, v128) -> (i32)
65+
; CHECK-NEXT: # %bb.0:
66+
; CHECK-NEXT: v128.const $push9=, 0, 0
67+
; CHECK-NEXT: local.tee $push8=, $2=, $pop9
68+
; CHECK-NEXT: i64x2.eq $push1=, $0, $pop8
69+
; CHECK-NEXT: i64x2.eq $push0=, $1, $2
70+
; CHECK-NEXT: i8x16.shuffle $push2=, $pop1, $pop0, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
71+
; CHECK-NEXT: v128.any_true $push3=, $pop2
72+
; CHECK-NEXT: i32.const $push4=, -1
73+
; CHECK-NEXT: i32.xor $push5=, $pop3, $pop4
74+
; CHECK-NEXT: i32.const $push6=, 1
75+
; CHECK-NEXT: i32.and $push7=, $pop5, $pop6
76+
; CHECK-NEXT: return $pop7
77+
%1 = icmp eq <4 x i64> %v, zeroinitializer
78+
%2 = bitcast <4 x i1> %1 to i4
79+
%3 = icmp eq i4 %2, 0
80+
%conv3 = zext i1 %3 to i32
81+
ret i32 %conv3
82+
}
83+
84+
85+
; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), 0, ne
86+
; => any_true (set_cc (X), 0, ne)
87+
; => any_true (X)
88+
define i32 @any_true_1_4_i32(<4 x i32> %v) {
89+
; CHECK-LABEL: any_true_1_4_i32:
90+
; CHECK: .functype any_true_1_4_i32 (v128) -> (i32)
91+
; CHECK-NEXT: # %bb.0:
92+
; CHECK-NEXT: v128.any_true $push0=, $0
93+
; CHECK-NEXT: return $pop0
94+
%1 = icmp ne <4 x i32> %v, zeroinitializer
95+
%2 = bitcast <4 x i1> %1 to i4
96+
%3 = icmp ne i4 %2, 0
97+
%conv3 = zext i1 %3 to i32
98+
ret i32 %conv3
99+
}
100+
101+
; setcc (iN (bitcast (set_cc (vNi1 X), 0, eq)), -1, ne
102+
; => not all_true (set_cc (X), 0, eq)
103+
; => not not any_true (X)
104+
; => any_true (X)
105+
define i32 @any_true_2_4_i32(<4 x i32> %v) {
106+
; CHECK-LABEL: any_true_2_4_i32:
107+
; CHECK: .functype any_true_2_4_i32 (v128) -> (i32)
108+
; CHECK-NEXT: # %bb.0:
109+
; CHECK-NEXT: v128.any_true $push0=, $0
110+
; CHECK-NEXT: return $pop0
111+
%1 = icmp eq <4 x i32> %v, zeroinitializer
112+
%2 = bitcast <4 x i1> %1 to i4
113+
%3 = icmp ne i4 %2, -1
114+
%conv3 = zext i1 %3 to i32
115+
ret i32 %conv3
116+
}
117+
118+
119+
; setcc (iN (bitcast (set_cc (vNi1 X), 0, ne)), -1, eq
120+
; => all_true (set_cc (X), 0, ne)
121+
; => all_true (X)
122+
define i32 @all_true_2_4_i32(<4 x i32> %v) {
123+
; CHECK-LABEL: all_true_2_4_i32:
124+
; CHECK: .functype all_true_2_4_i32 (v128) -> (i32)
125+
; CHECK-NEXT: # %bb.0:
126+
; CHECK-NEXT: i32x4.all_true $push0=, $0
127+
; CHECK-NEXT: return $pop0
128+
%1 = icmp ne <4 x i32> %v, zeroinitializer
129+
%2 = bitcast <4 x i1> %1 to i4
130+
%3 = icmp eq i4 %2, -1
131+
%conv3 = zext i1 %3 to i32
132+
ret i32 %conv3
133+
}
134+
135+

llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,9 +1086,9 @@ define i1 @test_cmp_v16i8(<16 x i8> %x) {
10861086
; CHECK-LABEL: test_cmp_v16i8:
10871087
; CHECK: .functype test_cmp_v16i8 (v128) -> (i32)
10881088
; CHECK-NEXT: # %bb.0:
1089-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1090-
; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0
1091-
; CHECK-NEXT: v128.any_true $push2=, $pop1
1089+
; CHECK-NEXT: i8x16.all_true $push0=, $0
1090+
; CHECK-NEXT: i32.const $push1=, 1
1091+
; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1
10921092
; CHECK-NEXT: return $pop2
10931093
%zero = icmp eq <16 x i8> %x, zeroinitializer
10941094
%ret = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %zero)

0 commit comments

Comments
 (0)