Skip to content

Commit 0a086da

Browse files
committed
[WebAssembly] Add 3 more optimization for any/all
all_true (setcc x, 0, eq) -> not any_true any_true (setcc x, 0, ne) -> any_true all_true (setcc x, 0, ne) -> all_true
1 parent 9a31f41 commit 0a086da

File tree

2 files changed

+49
-46
lines changed

2 files changed

+49
-46
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 43 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3240,40 +3240,53 @@ static SDValue performBitcastCombine(SDNode *N,
32403240
return SDValue();
32413241
}
32423242

3243-
static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) {
3244-
// any_true (setcc <X>, 0, eq)
3245-
// => not (all_true X)
3246-
3247-
SDLoc DL(N);
3243+
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
3244+
// any_true (setcc <X>, 0, eq) => (not (all_true X))
3245+
// all_true (setcc <X>, 0, eq) => (not (any_true X))
3246+
// any_true (setcc <X>, 0, ne) => (any_true X)
3247+
// all_true (setcc <X>, 0, ne) => (all_true X)
32483248
assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3249-
if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue)
3250-
return SDValue();
3249+
using namespace llvm::SDPatternMatch;
3250+
SDLoc DL(N);
3251+
static auto SimdCombiner =
3252+
[&](Intrinsic::WASMIntrinsics InPre, ISD::CondCode SetType,
3253+
Intrinsic::WASMIntrinsics InPost, bool ShouldInvert) -> SDValue {
3254+
if (N->getConstantOperandVal(0) != InPre)
3255+
return SDValue();
32513256

3252-
SDValue SetCC = N->getOperand(1);
3253-
if (SetCC.getOpcode() != ISD::SETCC)
3254-
return SDValue();
3257+
SDValue LHS;
3258+
if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3259+
m_SpecificCondCode(SetType))))
3260+
return SDValue();
32553261

3256-
SDValue LHS = SetCC->getOperand(0);
3257-
SDValue RHS = SetCC->getOperand(1);
3258-
ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
3259-
EVT LT = LHS.getValueType();
3260-
unsigned NumElts = LT.getVectorNumElements();
3261-
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3262-
return SDValue();
3262+
EVT LT = LHS.getValueType();
3263+
unsigned NumElts = LT.getVectorNumElements();
3264+
if (LT.getScalarSizeInBits() > 128 / NumElts)
3265+
return SDValue();
32633266

3264-
EVT Width = MVT::getIntegerVT(128 / NumElts);
3267+
SDValue Ret = DAG.getZExtOrTrunc(
3268+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3269+
{DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3270+
DL, MVT::i1);
3271+
if (ShouldInvert)
3272+
Ret = DAG.getNOT(DL, Ret, MVT::i1);
3273+
return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3274+
};
32653275

3266-
if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ)
3267-
return SDValue();
3276+
if (SDValue AnyTrueEQ = SimdCombiner(Intrinsic::wasm_anytrue, ISD::SETEQ,
3277+
Intrinsic::wasm_alltrue, true))
3278+
return AnyTrueEQ;
3279+
if (SDValue AllTrueEQ = SimdCombiner(Intrinsic::wasm_alltrue, ISD::SETEQ,
3280+
Intrinsic::wasm_anytrue, true))
3281+
return AllTrueEQ;
3282+
if (SDValue AnyTrueNE = SimdCombiner(Intrinsic::wasm_anytrue, ISD::SETNE,
3283+
Intrinsic::wasm_anytrue, false))
3284+
return AnyTrueNE;
3285+
if (SDValue AllTrueNE = SimdCombiner(Intrinsic::wasm_alltrue, ISD::SETNE,
3286+
Intrinsic::wasm_alltrue, false))
3287+
return AllTrueNE;
32683288

3269-
SDValue Ret = DAG.getZExtOrTrunc(
3270-
DAG.getNode(
3271-
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3272-
{DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
3273-
DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}),
3274-
DL, MVT::i1);
3275-
Ret = DAG.getNOT(DL, Ret, MVT::i1);
3276-
return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3289+
return SDValue();
32773290
}
32783291

32793292
template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
@@ -3465,8 +3478,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
34653478
case ISD::TRUNCATE:
34663479
return performTruncateCombine(N, DCI);
34673480
case ISD::INTRINSIC_WO_CHAIN: {
3468-
if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG))
3469-
return AnyTrueCombine;
3481+
if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG))
3482+
return AnyAllCombine;
34703483
return performLowerPartialReduction(N, DCI.DAG);
34713484
}
34723485
case ISD::MUL:

llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,8 @@ define i32 @any_true_1_4_i32(<4 x i32> %v) {
8989
; CHECK-LABEL: any_true_1_4_i32:
9090
; CHECK: .functype any_true_1_4_i32 (v128) -> (i32)
9191
; CHECK-NEXT: # %bb.0:
92-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
93-
; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0
94-
; CHECK-NEXT: v128.any_true $push2=, $pop1
95-
; CHECK-NEXT: return $pop2
92+
; CHECK-NEXT: v128.any_true $push0=, $0
93+
; CHECK-NEXT: return $pop0
9694
%1 = icmp ne <4 x i32> %v, zeroinitializer
9795
%2 = bitcast <4 x i1> %1 to i4
9896
%3 = icmp ne i4 %2, 0
@@ -109,14 +107,8 @@ define i32 @any_true_2_4_i32(<4 x i32> %v) {
109107
; CHECK-LABEL: any_true_2_4_i32:
110108
; CHECK: .functype any_true_2_4_i32 (v128) -> (i32)
111109
; CHECK-NEXT: # %bb.0:
112-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
113-
; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0
114-
; CHECK-NEXT: i32x4.all_true $push2=, $pop1
115-
; CHECK-NEXT: i32.const $push3=, -1
116-
; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
117-
; CHECK-NEXT: i32.const $push5=, 1
118-
; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
119-
; CHECK-NEXT: return $pop6
110+
; CHECK-NEXT: v128.any_true $push0=, $0
111+
; CHECK-NEXT: return $pop0
120112
%1 = icmp eq <4 x i32> %v, zeroinitializer
121113
%2 = bitcast <4 x i1> %1 to i4
122114
%3 = icmp ne i4 %2, -1
@@ -132,10 +124,8 @@ define i32 @all_true_2_4_i32(<4 x i32> %v) {
132124
; CHECK-LABEL: all_true_2_4_i32:
133125
; CHECK: .functype all_true_2_4_i32 (v128) -> (i32)
134126
; CHECK-NEXT: # %bb.0:
135-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
136-
; CHECK-NEXT: i32x4.ne $push1=, $0, $pop0
137-
; CHECK-NEXT: i32x4.all_true $push2=, $pop1
138-
; CHECK-NEXT: return $pop2
127+
; CHECK-NEXT: i32x4.all_true $push0=, $0
128+
; CHECK-NEXT: return $pop0
139129
%1 = icmp ne <4 x i32> %v, zeroinitializer
140130
%2 = bitcast <4 x i1> %1 to i4
141131
%3 = icmp eq i4 %2, -1

0 commit comments

Comments
 (0)