Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit bc177f5

Browse files
committed
[X86] In LowerHorizontalByteSum, emit vector_shuffle nodes instead of directly using X86ISD::UNPCKL/X86ISD::UNPCKH.
This gives shuffle lowering the freedom to use zero_extend_vector_inreg for the unpckl shuffle. Shuffle combining usually makes this swap later, but not when AVX512 is enabled it seems. While there also use DAG.getConstant to create a 0 vector instead of using the helper the forces a specific BUILD_VECTOR. I don't think that helper is usually needed. We're basically free to create a constant build_vector anytime and it will be legalized on its own. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346574 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent e0f010c commit bc177f5

File tree

3 files changed

+8
-8
lines changed

3 files changed

+8
-8
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25038,7 +25038,7 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
2503825038
// PSADBW instruction horizontally add all bytes and leave the result in i64
2503925039
// chunks, thus directly computes the pop count for v2i64 and v4i64.
2504025040
if (EltVT == MVT::i64) {
25041-
SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
25041+
SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT);
2504225042
MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
2504325043
V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);
2504425044
return DAG.getBitcast(VT, V);
@@ -25050,13 +25050,13 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
2505025050
// this is that it lines up the results of two PSADBW instructions to be
2505125051
// two v2i64 vectors which concatenated are the 4 population counts. We can
2505225052
// then use PACKUSWB to shrink and concatenate them into a v4i32 again.
25053-
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL);
25053+
SDValue Zeros = DAG.getConstant(0, DL, VT);
2505425054
SDValue V32 = DAG.getBitcast(VT, V);
25055-
SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V32, Zeros);
25056-
SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V32, Zeros);
25055+
SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros);
25056+
SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros);
2505725057

2505825058
// Do the horizontal sums into two v2i64s.
25059-
Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
25059+
Zeros = DAG.getConstant(0, DL, ByteVecVT);
2506025060
MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
2506125061
Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
2506225062
DAG.getBitcast(ByteVecVT, Low), Zeros);

test/CodeGen/X86/vector-popcnt-128.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
308308
; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
309309
; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
310310
; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
311-
; BITALG-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
311+
; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
312312
; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
313313
; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
314314
; BITALG-NEXT: retq

test/CodeGen/X86/vector-tzcnt-128.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
633633
; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
634634
; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
635635
; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
636-
; BITALG-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
636+
; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
637637
; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
638638
; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
639639
; BITALG-NEXT: retq
@@ -876,7 +876,7 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
876876
; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
877877
; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
878878
; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
879-
; BITALG-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
879+
; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
880880
; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
881881
; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
882882
; BITALG-NEXT: retq

0 commit comments

Comments
 (0)