Skip to content

Commit af6261b

Browse files
AZero13RKSimon
andauthored
[DAG] visitINSERT_VECTOR_ELT - convert to or mask if all insertions are -1 (#138213)
We did this for 0 and and, but we can do this with or and -1. Co-authored-by: Simon Pilgrim <[email protected]>
1 parent c248903 commit af6261b

File tree

4 files changed

+75
-262
lines changed

4 files changed

+75
-262
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23018,18 +23018,33 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2301823018
return NewShuffle;
2301923019
}
2302023020

23021-
// If all insertions are zero value, try to convert to AND mask.
23022-
// TODO: Do this for -1 with OR mask?
23023-
if (!LegalOperations && llvm::isNullConstant(InVal) &&
23024-
all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
23025-
count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
23026-
SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
23027-
SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
23028-
SmallVector<SDValue, 8> Mask(NumElts);
23029-
for (unsigned I = 0; I != NumElts; ++I)
23030-
Mask[I] = Ops[I] ? Zero : AllOnes;
23031-
return DAG.getNode(ISD::AND, DL, VT, CurVec,
23032-
DAG.getBuildVector(VT, DL, Mask));
23021+
if (!LegalOperations) {
23022+
bool IsNull = llvm::isNullConstant(InVal);
23023+
// We can convert to AND/OR mask if all insertions are zero or -1
23024+
// respectively.
23025+
if ((IsNull || llvm::isAllOnesConstant(InVal)) &&
23026+
all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
23027+
count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
23028+
SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
23029+
SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
23030+
SmallVector<SDValue, 8> Mask(NumElts);
23031+
23032+
// Build the mask and return the corresponding DAG node.
23033+
auto BuildMaskAndNode = [&](SDValue TrueVal, SDValue FalseVal,
23034+
unsigned MaskOpcode) {
23035+
for (unsigned I = 0; I != NumElts; ++I)
23036+
Mask[I] = Ops[I] ? TrueVal : FalseVal;
23037+
return DAG.getNode(MaskOpcode, DL, VT, CurVec,
23038+
DAG.getBuildVector(VT, DL, Mask));
23039+
};
23040+
23041+
// If all elements are zero, we can use AND with all ones.
23042+
if (IsNull)
23043+
return BuildMaskAndNode(Zero, AllOnes, ISD::AND);
23044+
23045+
// If all elements are -1, we can use OR with zero.
23046+
return BuildMaskAndNode(AllOnes, Zero, ISD::OR);
23047+
}
2303323048
}
2303423049

2303523050
// Failed to find a match in the chain - bail.

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -101,19 +101,13 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
101101
define i8 @test_v9i8(<9 x i8> %a) nounwind {
102102
; CHECK-LABEL: test_v9i8:
103103
; CHECK: // %bb.0:
104-
; CHECK-NEXT: mov v1.16b, v0.16b
105-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
106-
; CHECK-NEXT: mov v1.b[9], w8
107-
; CHECK-NEXT: mov v1.b[10], w8
108-
; CHECK-NEXT: mov v1.b[11], w8
109-
; CHECK-NEXT: mov v1.b[12], w8
110-
; CHECK-NEXT: mov v1.b[13], w8
111-
; CHECK-NEXT: mov v1.b[14], w8
112-
; CHECK-NEXT: mov v1.b[15], w8
104+
; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff00
105+
; CHECK-NEXT: fmov x8, d0
106+
; CHECK-NEXT: orr v1.16b, v0.16b, v1.16b
113107
; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
114108
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
115-
; CHECK-NEXT: fmov x8, d0
116-
; CHECK-NEXT: and x8, x8, x8, lsr #32
109+
; CHECK-NEXT: fmov x9, d0
110+
; CHECK-NEXT: and x8, x9, x8, lsr #32
117111
; CHECK-NEXT: and x8, x8, x8, lsr #16
118112
; CHECK-NEXT: lsr x9, x8, #8
119113
; CHECK-NEXT: and w0, w8, w9

llvm/test/CodeGen/X86/avx-cvt-3.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,13 @@ define <8 x float> @sitofp_shuffle_zero_v8i32(<8 x i32> %a0) {
4848
define <8 x float> @sitofp_insert_allbits_v8i32(<8 x i32> %a0) {
4949
; X86-LABEL: sitofp_insert_allbits_v8i32:
5050
; X86: # %bb.0:
51-
; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
52-
; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
53-
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
51+
; X86-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
5452
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
5553
; X86-NEXT: retl
5654
;
5755
; X64-LABEL: sitofp_insert_allbits_v8i32:
5856
; X64: # %bb.0:
59-
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
60-
; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
61-
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
57+
; X64-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
6258
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
6359
; X64-NEXT: retq
6460
%1 = insertelement <8 x i32> %a0, i32 -1, i32 0

0 commit comments

Comments
 (0)