Skip to content

Commit 3d5223f

Browse files
Fros1erlukel97
authored andcommitted
scalarize binary ops of splats by not check isTypeLegal
1 parent 441b672 commit 3d5223f

File tree

11 files changed

+221
-265
lines changed

11 files changed

+221
-265
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26975,7 +26975,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
2697526975
/// If a vector binop is performed on splat values, it may be profitable to
2697626976
/// extract, scalarize, and insert/splat.
2697726977
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
26978-
const SDLoc &DL) {
26978+
const SDLoc &DL, bool LegalTypes) {
2697926979
SDValue N0 = N->getOperand(0);
2698026980
SDValue N1 = N->getOperand(1);
2698126981
unsigned Opcode = N->getOpcode();
@@ -26993,11 +26993,20 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
2699326993
// TODO: use DAG.isSplatValue instead?
2699426994
bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
2699526995
N1.getOpcode() == ISD::SPLAT_VECTOR;
26996+
26997+
// If binop is legal or custom on EltVT, scalarize should be profitable. The
26998+
// check is the same as isOperationLegalOrCustom without isTypeLegal. We
26999+
// can do this only before LegalTypes, because it may generate illegal `op
27000+
// EltVT` from legal `op VT (splat EltVT)`, where EltVT is not legal type but
27001+
// the result type of splat is legal.
27002+
auto EltAction = TLI.getOperationAction(Opcode, EltVT);
2699627003
if (!Src0 || !Src1 || Index0 != Index1 ||
2699727004
Src0.getValueType().getVectorElementType() != EltVT ||
2699827005
Src1.getValueType().getVectorElementType() != EltVT ||
2699927006
!(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
27000-
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
27007+
(LegalTypes && !TLI.isOperationLegalOrCustom(Opcode, EltVT)) ||
27008+
!(EltAction == TargetLoweringBase::Legal ||
27009+
EltAction == TargetLoweringBase::Custom))
2700127010
return SDValue();
2700227011

2700327012
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
@@ -27163,7 +27172,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
2716327172
}
2716427173
}
2716527174

27166-
if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
27175+
if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
2716727176
return V;
2716827177

2716927178
return SDValue();

llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,30 +16,29 @@ define fastcc i8 @allocno_reload_assign() {
1616
; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h
1717
; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b
1818
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
19-
; CHECK-NEXT: ptrue p0.b
2019
; CHECK-NEXT: fmov w8, s0
2120
; CHECK-NEXT: mov z0.b, #0 // =0x0
22-
; CHECK-NEXT: sbfx x8, x8, #0, #1
2321
; CHECK-NEXT: uunpklo z1.h, z0.b
2422
; CHECK-NEXT: uunpkhi z0.h, z0.b
25-
; CHECK-NEXT: whilelo p1.b, xzr, x8
26-
; CHECK-NEXT: not p0.b, p0/z, p1.b
23+
; CHECK-NEXT: mvn w8, w8
24+
; CHECK-NEXT: sbfx x8, x8, #0, #1
25+
; CHECK-NEXT: whilelo p0.b, xzr, x8
2726
; CHECK-NEXT: uunpklo z2.s, z1.h
2827
; CHECK-NEXT: uunpkhi z3.s, z1.h
2928
; CHECK-NEXT: uunpklo z5.s, z0.h
3029
; CHECK-NEXT: uunpkhi z7.s, z0.h
3130
; CHECK-NEXT: punpklo p1.h, p0.b
3231
; CHECK-NEXT: punpkhi p0.h, p0.b
3332
; CHECK-NEXT: punpklo p2.h, p1.b
33+
; CHECK-NEXT: punpkhi p3.h, p1.b
3434
; CHECK-NEXT: uunpklo z0.d, z2.s
3535
; CHECK-NEXT: uunpkhi z1.d, z2.s
36-
; CHECK-NEXT: punpkhi p3.h, p1.b
36+
; CHECK-NEXT: punpklo p5.h, p0.b
3737
; CHECK-NEXT: uunpklo z2.d, z3.s
3838
; CHECK-NEXT: uunpkhi z3.d, z3.s
39-
; CHECK-NEXT: punpklo p5.h, p0.b
39+
; CHECK-NEXT: punpkhi p7.h, p0.b
4040
; CHECK-NEXT: uunpklo z4.d, z5.s
4141
; CHECK-NEXT: uunpkhi z5.d, z5.s
42-
; CHECK-NEXT: punpkhi p7.h, p0.b
4342
; CHECK-NEXT: uunpklo z6.d, z7.s
4443
; CHECK-NEXT: uunpkhi z7.d, z7.s
4544
; CHECK-NEXT: punpklo p0.h, p2.b

0 commit comments

Comments
 (0)