Skip to content

Commit ac534d2

Browse files
committed
[X86] combineArithReduction - use PACKUSWB directly for PSADBW(TRUNCATE(v8i16 X)) reduction patterns
Avoids a crash in the D152928 patch due to a reduction pattern appearing after legalization We can probably extend this further to avoid truncating to sub-128-bit vXi8 (and then calling WidenToV16I8) entirely, but we can't currently hit other cases.
1 parent 6b25890 commit ac534d2

File tree

2 files changed

+35
-78
lines changed

2 files changed

+35
-78
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43935,10 +43935,15 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
4393543935
DAG.computeKnownBits(Rdx).getMaxValue().ule(255) &&
4393643936
(EltSizeInBits == 16 || Rdx.getOpcode() == ISD::ZERO_EXTEND ||
4393743937
Subtarget.hasAVX512())) {
43938-
EVT ByteVT = VecVT.changeVectorElementType(MVT::i8);
43939-
Rdx = DAG.getNode(ISD::TRUNCATE, DL, ByteVT, Rdx);
43940-
if (ByteVT.getSizeInBits() < 128)
43941-
Rdx = WidenToV16I8(Rdx, true);
43938+
if (Rdx.getValueType() == MVT::v8i16) {
43939+
Rdx = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Rdx,
43940+
DAG.getUNDEF(MVT::v8i16));
43941+
} else {
43942+
EVT ByteVT = VecVT.changeVectorElementType(MVT::i8);
43943+
Rdx = DAG.getNode(ISD::TRUNCATE, DL, ByteVT, Rdx);
43944+
if (ByteVT.getSizeInBits() < 128)
43945+
Rdx = WidenToV16I8(Rdx, true);
43946+
}
4394243947

4394343948
// Build the PSADBW, split as 128/256/512 bits for SSE/AVX2/AVX512BW.
4394443949
auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL,

llvm/test/CodeGen/X86/vector-trunc.ll

Lines changed: 26 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1949,79 +1949,31 @@ define void @PR34773(ptr %a0, ptr %a1) {
19491949
}
19501950

19511951
define i16 @PR66194(i8 %q) {
1952-
; SSE2-LABEL: PR66194:
1953-
; SSE2: # %bb.0: # %entry
1954-
; SSE2-NEXT: xorl %eax, %eax
1955-
; SSE2-NEXT: xorl %ecx, %ecx
1956-
; SSE2-NEXT: testb %dil, %dil
1957-
; SSE2-NEXT: setne %al
1958-
; SSE2-NEXT: sete %cl
1959-
; SSE2-NEXT: movl %ecx, %edx
1960-
; SSE2-NEXT: shll $16, %edx
1961-
; SSE2-NEXT: orl %eax, %edx
1962-
; SSE2-NEXT: movd %edx, %xmm0
1963-
; SSE2-NEXT: pinsrw $2, %eax, %xmm0
1964-
; SSE2-NEXT: pinsrw $3, %eax, %xmm0
1965-
; SSE2-NEXT: pinsrw $4, %ecx, %xmm0
1966-
; SSE2-NEXT: pinsrw $5, %eax, %xmm0
1967-
; SSE2-NEXT: pinsrw $6, %eax, %xmm0
1968-
; SSE2-NEXT: pinsrw $7, %ecx, %xmm0
1969-
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
1970-
; SSE2-NEXT: psubw %xmm1, %xmm0
1971-
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1972-
; SSE2-NEXT: packuswb %xmm0, %xmm0
1973-
; SSE2-NEXT: pxor %xmm1, %xmm1
1974-
; SSE2-NEXT: psadbw %xmm0, %xmm1
1975-
; SSE2-NEXT: movd %xmm1, %eax
1976-
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
1977-
; SSE2-NEXT: retq
1978-
;
1979-
; SSSE3-LABEL: PR66194:
1980-
; SSSE3: # %bb.0: # %entry
1981-
; SSSE3-NEXT: xorl %eax, %eax
1982-
; SSSE3-NEXT: xorl %ecx, %ecx
1983-
; SSSE3-NEXT: testb %dil, %dil
1984-
; SSSE3-NEXT: setne %al
1985-
; SSSE3-NEXT: sete %cl
1986-
; SSSE3-NEXT: movl %ecx, %edx
1987-
; SSSE3-NEXT: shll $16, %edx
1988-
; SSSE3-NEXT: orl %eax, %edx
1989-
; SSSE3-NEXT: movd %edx, %xmm0
1990-
; SSSE3-NEXT: pinsrw $2, %eax, %xmm0
1991-
; SSSE3-NEXT: pinsrw $3, %eax, %xmm0
1992-
; SSSE3-NEXT: pinsrw $4, %ecx, %xmm0
1993-
; SSSE3-NEXT: pinsrw $5, %eax, %xmm0
1994-
; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
1995-
; SSSE3-NEXT: pinsrw $7, %ecx, %xmm0
1996-
; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
1997-
; SSSE3-NEXT: psubw %xmm1, %xmm0
1998-
; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1999-
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
2000-
; SSSE3-NEXT: shll $8, %eax
2001-
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
2002-
; SSSE3-NEXT: orl %eax, %ecx
2003-
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
2004-
; SSSE3-NEXT: shll $8, %eax
2005-
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
2006-
; SSSE3-NEXT: orl %eax, %edx
2007-
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
2008-
; SSSE3-NEXT: shll $16, %eax
2009-
; SSSE3-NEXT: orl %edx, %eax
2010-
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
2011-
; SSSE3-NEXT: shll $24, %edx
2012-
; SSSE3-NEXT: orl %eax, %edx
2013-
; SSSE3-NEXT: movd %edx, %xmm0
2014-
; SSSE3-NEXT: pinsrw $2, %ecx, %xmm0
2015-
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
2016-
; SSSE3-NEXT: shll $8, %eax
2017-
; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
2018-
; SSSE3-NEXT: orl %eax, %ecx
2019-
; SSSE3-NEXT: pinsrw $3, %ecx, %xmm0
2020-
; SSSE3-NEXT: pxor %xmm1, %xmm1
2021-
; SSSE3-NEXT: psadbw %xmm0, %xmm1
2022-
; SSSE3-NEXT: movd %xmm1, %eax
2023-
; SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
2024-
; SSSE3-NEXT: retq
1952+
; SSE2-SSSE3-LABEL: PR66194:
1953+
; SSE2-SSSE3: # %bb.0: # %entry
1954+
; SSE2-SSSE3-NEXT: xorl %eax, %eax
1955+
; SSE2-SSSE3-NEXT: xorl %ecx, %ecx
1956+
; SSE2-SSSE3-NEXT: testb %dil, %dil
1957+
; SSE2-SSSE3-NEXT: setne %al
1958+
; SSE2-SSSE3-NEXT: sete %cl
1959+
; SSE2-SSSE3-NEXT: movl %ecx, %edx
1960+
; SSE2-SSSE3-NEXT: shll $16, %edx
1961+
; SSE2-SSSE3-NEXT: orl %eax, %edx
1962+
; SSE2-SSSE3-NEXT: movd %edx, %xmm0
1963+
; SSE2-SSSE3-NEXT: pinsrw $2, %eax, %xmm0
1964+
; SSE2-SSSE3-NEXT: pinsrw $3, %eax, %xmm0
1965+
; SSE2-SSSE3-NEXT: pinsrw $4, %ecx, %xmm0
1966+
; SSE2-SSSE3-NEXT: pinsrw $5, %eax, %xmm0
1967+
; SSE2-SSSE3-NEXT: pinsrw $6, %eax, %xmm0
1968+
; SSE2-SSSE3-NEXT: pinsrw $7, %ecx, %xmm0
1969+
; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
1970+
; SSE2-SSSE3-NEXT: psubw %xmm1, %xmm0
1971+
; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
1972+
; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm1
1973+
; SSE2-SSSE3-NEXT: psadbw %xmm0, %xmm1
1974+
; SSE2-SSSE3-NEXT: movd %xmm1, %eax
1975+
; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
1976+
; SSE2-SSSE3-NEXT: retq
20251977
;
20261978
; SSE41-LABEL: PR66194:
20271979
; SSE41: # %bb.0: # %entry
@@ -2040,7 +1992,7 @@ define i16 @PR66194(i8 %q) {
20401992
; SSE41-NEXT: pinsrb $14, %ecx, %xmm0
20411993
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
20421994
; SSE41-NEXT: psubw %xmm1, %xmm0
2043-
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1995+
; SSE41-NEXT: packuswb %xmm0, %xmm0
20441996
; SSE41-NEXT: pxor %xmm1, %xmm1
20451997
; SSE41-NEXT: psadbw %xmm0, %xmm1
20461998
; SSE41-NEXT: movd %xmm1, %eax

0 commit comments

Comments
 (0)