Skip to content

Commit 7a06681

Browse files
Revert "[DAGCombiner] Add generic DAG combine for ISD::PARTIAL_REDUCE_MLA (llvm#127083)"
This reverts commit 2bef21f. Multiple builtbot failures have been reported: llvm#127083
1 parent 7c8b127 commit 7a06681

File tree

3 files changed

+251
-330
lines changed

3 files changed

+251
-330
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,6 @@ namespace {
545545
SDValue visitMGATHER(SDNode *N);
546546
SDValue visitMSCATTER(SDNode *N);
547547
SDValue visitMHISTOGRAM(SDNode *N);
548-
SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
549548
SDValue visitVPGATHER(SDNode *N);
550549
SDValue visitVPSCATTER(SDNode *N);
551550
SDValue visitVP_STRIDED_LOAD(SDNode *N);
@@ -1974,9 +1973,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
19741973
case ISD::MSCATTER: return visitMSCATTER(N);
19751974
case ISD::MSTORE: return visitMSTORE(N);
19761975
case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
1977-
case ISD::PARTIAL_REDUCE_SMLA:
1978-
case ISD::PARTIAL_REDUCE_UMLA:
1979-
return visitPARTIAL_REDUCE_MLA(N);
19801976
case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
19811977
case ISD::LIFETIME_END: return visitLIFETIME_END(N);
19821978
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
@@ -12496,58 +12492,6 @@ SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
1249612492
return SDValue();
1249712493
}
1249812494

12499-
// Makes PARTIAL_REDUCE_*MLA(Acc, MUL(ZEXT(LHSExtOp), ZEXT(RHSExtOp)),
12500-
// Splat(1)) into
12501-
// PARTIAL_REDUCE_UMLA(Acc, LHSExtOp, RHSExtOp).
12502-
// Makes PARTIAL_REDUCE_*MLA(Acc, MUL(SEXT(LHSExtOp), SEXT(RHSExtOp)),
12503-
// Splat(1)) into
12504-
// PARTIAL_REDUCE_SMLA(Acc, LHSExtOp, RHSExtOp).
12505-
SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
12506-
SDLoc DL(N);
12507-
12508-
SDValue Acc = N->getOperand(0);
12509-
SDValue Op1 = N->getOperand(1);
12510-
SDValue Op2 = N->getOperand(2);
12511-
12512-
APInt ConstantOne;
12513-
if (Op1->getOpcode() != ISD::MUL ||
12514-
!ISD::isConstantSplatVector(Op2.getNode(), ConstantOne) ||
12515-
!ConstantOne.isOne())
12516-
return SDValue();
12517-
12518-
SDValue LHS = Op1->getOperand(0);
12519-
SDValue RHS = Op1->getOperand(1);
12520-
unsigned LHSOpcode = LHS->getOpcode();
12521-
unsigned RHSOpcode = RHS->getOpcode();
12522-
if (!ISD::isExtOpcode(LHSOpcode) || !ISD::isExtOpcode(RHSOpcode))
12523-
return SDValue();
12524-
12525-
SDValue LHSExtOp = LHS->getOperand(0);
12526-
SDValue RHSExtOp = RHS->getOperand(0);
12527-
EVT LHSExtOpVT = LHSExtOp.getValueType();
12528-
if (LHSExtOpVT != RHSExtOp.getValueType() || LHSOpcode != RHSOpcode)
12529-
return SDValue();
12530-
12531-
// FIXME: Add a check to only perform the DAG combine if there is lowering
12532-
// provided by the target
12533-
12534-
bool ExtIsSigned = LHSOpcode == ISD::SIGN_EXTEND;
12535-
12536-
// For a 2-stage extend the signedness of both of the extends must be the
12537-
// same. This is so the node can be folded into only a signed or unsigned
12538-
// node.
12539-
bool NodeIsSigned = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA;
12540-
EVT AccElemVT = Acc.getValueType().getVectorElementType();
12541-
if (ExtIsSigned != NodeIsSigned &&
12542-
Op1.getValueType().getVectorElementType() != AccElemVT)
12543-
return SDValue();
12544-
12545-
unsigned NewOpcode =
12546-
ExtIsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
12547-
return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp,
12548-
RHSExtOp);
12549-
}
12550-
1255112495
SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
1255212496
auto *SLD = cast<VPStridedLoadSDNode>(N);
1255312497
EVT EltVT = SLD->getValueType(0).getVectorElementType();

llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll

Lines changed: 57 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,13 @@ define <4 x i32> @udot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
1212
;
1313
; CHECK-NODOT-LABEL: udot:
1414
; CHECK-NODOT: // %bb.0:
15-
; CHECK-NODOT-NEXT: ushll v3.8h, v1.8b, #0
16-
; CHECK-NODOT-NEXT: ushll v4.8h, v2.8b, #0
17-
; CHECK-NODOT-NEXT: ushll2 v1.8h, v1.16b, #0
18-
; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
19-
; CHECK-NODOT-NEXT: umlal v0.4s, v4.4h, v3.4h
20-
; CHECK-NODOT-NEXT: umull v5.4s, v2.4h, v1.4h
21-
; CHECK-NODOT-NEXT: umlal2 v0.4s, v2.8h, v1.8h
22-
; CHECK-NODOT-NEXT: umlal2 v5.4s, v4.8h, v3.8h
23-
; CHECK-NODOT-NEXT: add v0.4s, v5.4s, v0.4s
15+
; CHECK-NODOT-NEXT: umull v3.8h, v2.8b, v1.8b
16+
; CHECK-NODOT-NEXT: umull2 v1.8h, v2.16b, v1.16b
17+
; CHECK-NODOT-NEXT: ushll v2.4s, v1.4h, #0
18+
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v3.4h
19+
; CHECK-NODOT-NEXT: uaddw2 v2.4s, v2.4s, v3.8h
20+
; CHECK-NODOT-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
21+
; CHECK-NODOT-NEXT: add v0.4s, v2.4s, v0.4s
2422
; CHECK-NODOT-NEXT: ret
2523
%u.wide = zext <16 x i8> %u to <16 x i32>
2624
%s.wide = zext <16 x i8> %s to <16 x i32>
@@ -97,19 +95,17 @@ define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
9795
;
9896
; CHECK-NODOT-LABEL: udot_narrow:
9997
; CHECK-NODOT: // %bb.0:
100-
; CHECK-NODOT-NEXT: ushll v1.8h, v1.8b, #0
101-
; CHECK-NODOT-NEXT: ushll v2.8h, v2.8b, #0
98+
; CHECK-NODOT-NEXT: umull v1.8h, v2.8b, v1.8b
10299
; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0
103-
; CHECK-NODOT-NEXT: umull v3.4s, v2.4h, v1.4h
104-
; CHECK-NODOT-NEXT: umull2 v4.4s, v2.8h, v1.8h
105-
; CHECK-NODOT-NEXT: ext v5.16b, v1.16b, v1.16b, #8
106-
; CHECK-NODOT-NEXT: ext v6.16b, v2.16b, v2.16b, #8
107-
; CHECK-NODOT-NEXT: umlal v0.4s, v2.4h, v1.4h
100+
; CHECK-NODOT-NEXT: ushll v2.4s, v1.4h, #0
101+
; CHECK-NODOT-NEXT: ushll2 v3.4s, v1.8h, #0
102+
; CHECK-NODOT-NEXT: ext v4.16b, v1.16b, v1.16b, #8
103+
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
108104
; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
109-
; CHECK-NODOT-NEXT: ext v1.16b, v4.16b, v4.16b, #8
110-
; CHECK-NODOT-NEXT: umlal v3.4s, v6.4h, v5.4h
111-
; CHECK-NODOT-NEXT: add v0.2s, v1.2s, v0.2s
105+
; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8
112106
; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
107+
; CHECK-NODOT-NEXT: uaddw v1.4s, v2.4s, v4.4h
108+
; CHECK-NODOT-NEXT: add v0.2s, v1.2s, v0.2s
113109
; CHECK-NODOT-NEXT: ret
114110
%u.wide = zext <8 x i8> %u to <8 x i32>
115111
%s.wide = zext <8 x i8> %s to <8 x i32>
@@ -126,15 +122,13 @@ define <4 x i32> @sdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
126122
;
127123
; CHECK-NODOT-LABEL: sdot:
128124
; CHECK-NODOT: // %bb.0:
129-
; CHECK-NODOT-NEXT: sshll v3.8h, v1.8b, #0
130-
; CHECK-NODOT-NEXT: sshll v4.8h, v2.8b, #0
131-
; CHECK-NODOT-NEXT: sshll2 v1.8h, v1.16b, #0
132-
; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
133-
; CHECK-NODOT-NEXT: smlal v0.4s, v4.4h, v3.4h
134-
; CHECK-NODOT-NEXT: smull v5.4s, v2.4h, v1.4h
135-
; CHECK-NODOT-NEXT: smlal2 v0.4s, v2.8h, v1.8h
136-
; CHECK-NODOT-NEXT: smlal2 v5.4s, v4.8h, v3.8h
137-
; CHECK-NODOT-NEXT: add v0.4s, v5.4s, v0.4s
125+
; CHECK-NODOT-NEXT: smull v3.8h, v2.8b, v1.8b
126+
; CHECK-NODOT-NEXT: smull2 v1.8h, v2.16b, v1.16b
127+
; CHECK-NODOT-NEXT: sshll v2.4s, v1.4h, #0
128+
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v3.4h
129+
; CHECK-NODOT-NEXT: saddw2 v2.4s, v2.4s, v3.8h
130+
; CHECK-NODOT-NEXT: saddw2 v0.4s, v0.4s, v1.8h
131+
; CHECK-NODOT-NEXT: add v0.4s, v2.4s, v0.4s
138132
; CHECK-NODOT-NEXT: ret
139133
%u.wide = sext <16 x i8> %u to <16 x i32>
140134
%s.wide = sext <16 x i8> %s to <16 x i32>
@@ -151,19 +145,17 @@ define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
151145
;
152146
; CHECK-NODOT-LABEL: sdot_narrow:
153147
; CHECK-NODOT: // %bb.0:
154-
; CHECK-NODOT-NEXT: sshll v1.8h, v1.8b, #0
155-
; CHECK-NODOT-NEXT: sshll v2.8h, v2.8b, #0
148+
; CHECK-NODOT-NEXT: smull v1.8h, v2.8b, v1.8b
156149
; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0
157-
; CHECK-NODOT-NEXT: smull v3.4s, v2.4h, v1.4h
158-
; CHECK-NODOT-NEXT: smull2 v4.4s, v2.8h, v1.8h
159-
; CHECK-NODOT-NEXT: ext v5.16b, v1.16b, v1.16b, #8
160-
; CHECK-NODOT-NEXT: ext v6.16b, v2.16b, v2.16b, #8
161-
; CHECK-NODOT-NEXT: smlal v0.4s, v2.4h, v1.4h
150+
; CHECK-NODOT-NEXT: sshll v2.4s, v1.4h, #0
151+
; CHECK-NODOT-NEXT: sshll2 v3.4s, v1.8h, #0
152+
; CHECK-NODOT-NEXT: ext v4.16b, v1.16b, v1.16b, #8
153+
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v1.4h
162154
; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
163-
; CHECK-NODOT-NEXT: ext v1.16b, v4.16b, v4.16b, #8
164-
; CHECK-NODOT-NEXT: smlal v3.4s, v6.4h, v5.4h
165-
; CHECK-NODOT-NEXT: add v0.2s, v1.2s, v0.2s
155+
; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8
166156
; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
157+
; CHECK-NODOT-NEXT: saddw v1.4s, v2.4s, v4.4h
158+
; CHECK-NODOT-NEXT: add v0.2s, v1.2s, v0.2s
167159
; CHECK-NODOT-NEXT: ret
168160
%u.wide = sext <8 x i8> %u to <8 x i32>
169161
%s.wide = sext <8 x i8> %s to <8 x i32>
@@ -415,27 +407,19 @@ define <4 x i64> @udot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b) {
415407
;
416408
; CHECK-NODOT-LABEL: udot_8to64:
417409
; CHECK-NODOT: // %bb.0: // %entry
418-
; CHECK-NODOT-NEXT: ushll v4.8h, v3.8b, #0
419-
; CHECK-NODOT-NEXT: ushll v5.8h, v2.8b, #0
420-
; CHECK-NODOT-NEXT: ushll2 v3.8h, v3.16b, #0
421-
; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
422-
; CHECK-NODOT-NEXT: ushll v6.4s, v4.4h, #0
423-
; CHECK-NODOT-NEXT: ushll v7.4s, v5.4h, #0
410+
; CHECK-NODOT-NEXT: umull v4.8h, v2.8b, v3.8b
411+
; CHECK-NODOT-NEXT: umull2 v2.8h, v2.16b, v3.16b
412+
; CHECK-NODOT-NEXT: ushll v3.4s, v4.4h, #0
413+
; CHECK-NODOT-NEXT: ushll v5.4s, v2.4h, #0
424414
; CHECK-NODOT-NEXT: ushll2 v4.4s, v4.8h, #0
425-
; CHECK-NODOT-NEXT: ushll2 v5.4s, v5.8h, #0
426-
; CHECK-NODOT-NEXT: ushll2 v16.4s, v3.8h, #0
427-
; CHECK-NODOT-NEXT: ushll2 v17.4s, v2.8h, #0
428-
; CHECK-NODOT-NEXT: ushll v3.4s, v3.4h, #0
429-
; CHECK-NODOT-NEXT: ushll v2.4s, v2.4h, #0
430-
; CHECK-NODOT-NEXT: umlal2 v1.2d, v7.4s, v6.4s
431-
; CHECK-NODOT-NEXT: umlal v0.2d, v7.2s, v6.2s
432-
; CHECK-NODOT-NEXT: umull2 v18.2d, v5.4s, v4.4s
433-
; CHECK-NODOT-NEXT: umull v4.2d, v5.2s, v4.2s
434-
; CHECK-NODOT-NEXT: umlal2 v1.2d, v17.4s, v16.4s
435-
; CHECK-NODOT-NEXT: umlal v0.2d, v17.2s, v16.2s
436-
; CHECK-NODOT-NEXT: umlal2 v18.2d, v2.4s, v3.4s
437-
; CHECK-NODOT-NEXT: umlal v4.2d, v2.2s, v3.2s
438-
; CHECK-NODOT-NEXT: add v1.2d, v18.2d, v1.2d
415+
; CHECK-NODOT-NEXT: ushll2 v2.4s, v2.8h, #0
416+
; CHECK-NODOT-NEXT: uaddw2 v1.2d, v1.2d, v3.4s
417+
; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v3.2s
418+
; CHECK-NODOT-NEXT: uaddl2 v3.2d, v4.4s, v5.4s
419+
; CHECK-NODOT-NEXT: uaddl v4.2d, v4.2s, v5.2s
420+
; CHECK-NODOT-NEXT: uaddw2 v1.2d, v1.2d, v2.4s
421+
; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v2.2s
422+
; CHECK-NODOT-NEXT: add v1.2d, v3.2d, v1.2d
439423
; CHECK-NODOT-NEXT: add v0.2d, v4.2d, v0.2d
440424
; CHECK-NODOT-NEXT: ret
441425
entry:
@@ -458,27 +442,19 @@ define <4 x i64> @sdot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b){
458442
;
459443
; CHECK-NODOT-LABEL: sdot_8to64:
460444
; CHECK-NODOT: // %bb.0: // %entry
461-
; CHECK-NODOT-NEXT: sshll v4.8h, v3.8b, #0
462-
; CHECK-NODOT-NEXT: sshll v5.8h, v2.8b, #0
463-
; CHECK-NODOT-NEXT: sshll2 v3.8h, v3.16b, #0
464-
; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
465-
; CHECK-NODOT-NEXT: sshll v6.4s, v4.4h, #0
466-
; CHECK-NODOT-NEXT: sshll v7.4s, v5.4h, #0
445+
; CHECK-NODOT-NEXT: smull v4.8h, v2.8b, v3.8b
446+
; CHECK-NODOT-NEXT: smull2 v2.8h, v2.16b, v3.16b
447+
; CHECK-NODOT-NEXT: sshll v3.4s, v4.4h, #0
448+
; CHECK-NODOT-NEXT: sshll v5.4s, v2.4h, #0
467449
; CHECK-NODOT-NEXT: sshll2 v4.4s, v4.8h, #0
468-
; CHECK-NODOT-NEXT: sshll2 v5.4s, v5.8h, #0
469-
; CHECK-NODOT-NEXT: sshll2 v16.4s, v3.8h, #0
470-
; CHECK-NODOT-NEXT: sshll2 v17.4s, v2.8h, #0
471-
; CHECK-NODOT-NEXT: sshll v3.4s, v3.4h, #0
472-
; CHECK-NODOT-NEXT: sshll v2.4s, v2.4h, #0
473-
; CHECK-NODOT-NEXT: smlal2 v1.2d, v7.4s, v6.4s
474-
; CHECK-NODOT-NEXT: smlal v0.2d, v7.2s, v6.2s
475-
; CHECK-NODOT-NEXT: smull2 v18.2d, v5.4s, v4.4s
476-
; CHECK-NODOT-NEXT: smull v4.2d, v5.2s, v4.2s
477-
; CHECK-NODOT-NEXT: smlal2 v1.2d, v17.4s, v16.4s
478-
; CHECK-NODOT-NEXT: smlal v0.2d, v17.2s, v16.2s
479-
; CHECK-NODOT-NEXT: smlal2 v18.2d, v2.4s, v3.4s
480-
; CHECK-NODOT-NEXT: smlal v4.2d, v2.2s, v3.2s
481-
; CHECK-NODOT-NEXT: add v1.2d, v18.2d, v1.2d
450+
; CHECK-NODOT-NEXT: sshll2 v2.4s, v2.8h, #0
451+
; CHECK-NODOT-NEXT: saddw2 v1.2d, v1.2d, v3.4s
452+
; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v3.2s
453+
; CHECK-NODOT-NEXT: saddl2 v3.2d, v4.4s, v5.4s
454+
; CHECK-NODOT-NEXT: saddl v4.2d, v4.2s, v5.2s
455+
; CHECK-NODOT-NEXT: saddw2 v1.2d, v1.2d, v2.4s
456+
; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v2.2s
457+
; CHECK-NODOT-NEXT: add v1.2d, v3.2d, v1.2d
482458
; CHECK-NODOT-NEXT: add v0.2d, v4.2d, v0.2d
483459
; CHECK-NODOT-NEXT: ret
484460
entry:
@@ -795,10 +771,9 @@ define <4 x i64> @sdot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
795771
define <4 x i32> @not_udot(<4 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
796772
; CHECK-LABEL: not_udot:
797773
; CHECK: // %bb.0:
798-
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
799-
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
800-
; CHECK-NEXT: umlal v0.4s, v2.4h, v1.4h
801-
; CHECK-NEXT: umlal2 v0.4s, v2.8h, v1.8h
774+
; CHECK-NEXT: umull v1.8h, v2.8b, v1.8b
775+
; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
776+
; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
802777
; CHECK-NEXT: ret
803778
%u.wide = zext <8 x i8> %u to <8 x i32>
804779
%s.wide = zext <8 x i8> %s to <8 x i32>

0 commit comments

Comments
 (0)