Skip to content

Commit 93c9305

Browse files
committed
Implement Correct Matching Logic
1 parent 298712b commit 93c9305

File tree

2 files changed

+23
-7
lines changed

2 files changed

+23
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2841,16 +2841,17 @@ static SDValue combineFixedwidthToAVG(SDNode *N, SelectionDAG &DAG) {
28412841
ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
28422842
if (!N1C || N1C->getAPIntValue() != 1)
28432843
return SDValue();
2844-
EVT VT = And.getValueType();
2844+
EVT VT = And1.getValueType();
2845+
EVT NVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
28452846
if (VT.isVector())
2846-
return SDValue();
2847+
VT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
2848+
else
2849+
VT = NVT;
28472850
SDLoc DL(N);
28482851
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28492852
if (!TLI.isOperationLegalOrCustom(ISD::AVGFLOORU, VT))
28502853
return SDValue();
2851-
return DAG.getNode(ISD::AVGFLOORU, DL, VT,
2852-
DAG.getExtOrTrunc(false, And1, DL, VT),
2853-
DAG.getExtOrTrunc(false, And2, DL, VT));
2854+
return DAG.getNode(ISD::AVGFLOORU, DL, VT, And1, And2);
28542855
}
28552856

28562857
SDValue DAGCombiner::visitADD(SDNode *N) {

llvm/test/CodeGen/AArch64/hadd-combine.ll

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -859,8 +859,23 @@ define <4 x i32> @urhadd_v4i32(<4 x i32> %x) {
859859
ret <4 x i32> %r
860860
}
861861

862-
define <4 x i32> @fixedwidth(<4 x i32> %a0, <4 x i32> %a1) {
863-
; CHECK-LABEL: fixedwidth:
862+
define i4 @uhadd_fixedwidth_i4(i4 %a0, i4 %a1) {
863+
; CHECK-LABEL: uhadd_fixedwidth_i4:
864+
; CHECK: // %bb.0:
865+
; CHECK-NEXT: eor w8, w0, w1
866+
; CHECK-NEXT: and w9, w0, w1
867+
; CHECK-NEXT: and w8, w8, #0xe
868+
; CHECK-NEXT: add w0, w9, w8, lsr #1
869+
; CHECK-NEXT: ret
870+
%and = and i4 %a0, %a1
871+
%xor = xor i4 %a0, %a1
872+
%srl = lshr i4 %xor, 1
873+
%res = add i4 %and, %srl
874+
ret i4 %res
875+
}
876+
877+
define <4 x i32> @uhadd_fixedwidth_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
878+
; CHECK-LABEL: uhadd_fixedwidth_v4i32:
864879
; CHECK: // %bb.0:
865880
; CHECK-NEXT: and v2.16b, v0.16b, v1.16b
866881
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b

0 commit comments

Comments
 (0)