Skip to content

Commit 234cb4c

Browse files
Fros1erlukel97
andauthored
[SelectionDAG] Scalarize binary ops of splats before legal types (#100749)
Fixes #65072. This allows binary ops of splats to be scalarized if the operation isn't legal on the element type isn't legal, but is legal on the type it will be legalized to. I assume if an Op is legal both in scalar and vector, choose scalar version should always be better no matter what the type is. There are some cases that my approach can't scalarize, for example: ``` llvm ; test/CodeGen/RISCV/rvv/select-int.ll define <vscale x 4 x i64> @select_nxv4i64(i1 zeroext %c, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b) { %v = select i1 %c, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b ret <vscale x 4 x i64> %v } ``` https://godbolt.org/z/xzqrKrxvK `xor (splat i1, splat i1)` is generated in late step after LegalizeType, from select. I didn't figure out how to make `xor i1, i1` legal at this time. --------- Co-authored-by: Luke Lau <[email protected]>
1 parent f3f465e commit 234cb4c

File tree

11 files changed

+251
-266
lines changed

11 files changed

+251
-266
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27107,7 +27107,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
2710727107
/// If a vector binop is performed on splat values, it may be profitable to
2710827108
/// extract, scalarize, and insert/splat.
2710927109
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
27110-
const SDLoc &DL) {
27110+
const SDLoc &DL, bool LegalTypes) {
2711127111
SDValue N0 = N->getOperand(0);
2711227112
SDValue N1 = N->getOperand(1);
2711327113
unsigned Opcode = N->getOpcode();
@@ -27129,7 +27129,12 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
2712927129
Src0.getValueType().getVectorElementType() != EltVT ||
2713027130
Src1.getValueType().getVectorElementType() != EltVT ||
2713127131
!(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
27132-
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
27132+
// If before type legalization, allow scalar types that will eventually be
27133+
// made legal.
27134+
!TLI.isOperationLegalOrCustom(
27135+
Opcode, LegalTypes
27136+
? EltVT
27137+
: TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
2713327138
return SDValue();
2713427139

2713527140
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
@@ -27295,7 +27300,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
2729527300
}
2729627301
}
2729727302

27298-
if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
27303+
if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
2729927304
return V;
2730027305

2730127306
return SDValue();

llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,30 +16,29 @@ define fastcc i8 @allocno_reload_assign() {
1616
; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h
1717
; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b
1818
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
19-
; CHECK-NEXT: ptrue p0.b
2019
; CHECK-NEXT: fmov w8, s0
2120
; CHECK-NEXT: mov z0.b, #0 // =0x0
22-
; CHECK-NEXT: sbfx x8, x8, #0, #1
2321
; CHECK-NEXT: uunpklo z1.h, z0.b
2422
; CHECK-NEXT: uunpkhi z0.h, z0.b
25-
; CHECK-NEXT: whilelo p1.b, xzr, x8
26-
; CHECK-NEXT: not p0.b, p0/z, p1.b
23+
; CHECK-NEXT: mvn w8, w8
24+
; CHECK-NEXT: sbfx x8, x8, #0, #1
25+
; CHECK-NEXT: whilelo p0.b, xzr, x8
2726
; CHECK-NEXT: uunpklo z2.s, z1.h
2827
; CHECK-NEXT: uunpkhi z3.s, z1.h
2928
; CHECK-NEXT: uunpklo z5.s, z0.h
3029
; CHECK-NEXT: uunpkhi z7.s, z0.h
3130
; CHECK-NEXT: punpklo p1.h, p0.b
3231
; CHECK-NEXT: punpkhi p0.h, p0.b
3332
; CHECK-NEXT: punpklo p2.h, p1.b
33+
; CHECK-NEXT: punpkhi p3.h, p1.b
3434
; CHECK-NEXT: uunpklo z0.d, z2.s
3535
; CHECK-NEXT: uunpkhi z1.d, z2.s
36-
; CHECK-NEXT: punpkhi p3.h, p1.b
36+
; CHECK-NEXT: punpklo p5.h, p0.b
3737
; CHECK-NEXT: uunpklo z2.d, z3.s
3838
; CHECK-NEXT: uunpkhi z3.d, z3.s
39-
; CHECK-NEXT: punpklo p5.h, p0.b
39+
; CHECK-NEXT: punpkhi p7.h, p0.b
4040
; CHECK-NEXT: uunpklo z4.d, z5.s
4141
; CHECK-NEXT: uunpkhi z5.d, z5.s
42-
; CHECK-NEXT: punpkhi p7.h, p0.b
4342
; CHECK-NEXT: uunpklo z6.d, z7.s
4443
; CHECK-NEXT: uunpkhi z7.d, z7.s
4544
; CHECK-NEXT: punpklo p0.h, p2.b

0 commit comments

Comments
 (0)