Skip to content

Commit 8859a4f

Browse files
committed
[X86] LowerBUILD_VECTOR - don't use insert_element(constant, elt, idx) if we have a freeze(undef) element
Fixes #74736
1 parent 901c5be commit 8859a4f

File tree

2 files changed

+69
-0
lines changed

2 files changed

+69
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8727,6 +8727,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
87278727
// constants. Insertion into a zero vector is handled as a special-case
87288728
// somewhere below here.
87298729
if (NumConstants == NumElems - 1 && NumNonZero != 1 &&
8730+
FrozenUndefMask.isZero() &&
87308731
(isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) ||
87318732
isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) {
87328733
// Create an all-constant vector. The variable element in the old

llvm/test/CodeGen/X86/pr74736.ll

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
4+
5+
define void @main(<16 x i32> %0, i32 %1) {
6+
; SSE-LABEL: main:
7+
; SSE: # %bb.0: # %entry
8+
; SSE-NEXT: movd %edi, %xmm4
9+
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,0,0,0]
10+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm4[1,0]
11+
; SSE-NEXT: paddd %xmm0, %xmm0
12+
; SSE-NEXT: paddd %xmm1, %xmm1
13+
; SSE-NEXT: paddd %xmm3, %xmm3
14+
; SSE-NEXT: paddd %xmm2, %xmm2
15+
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,1,3]
16+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
17+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
18+
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[1,0]
19+
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[1,3]
20+
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm3[1,0]
21+
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[1,3]
22+
; SSE-NEXT: xorps %xmm2, %xmm0
23+
; SSE-NEXT: xorps %xmm4, %xmm1
24+
; SSE-NEXT: xorps %xmm0, %xmm1
25+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
26+
; SSE-NEXT: pxor %xmm1, %xmm0
27+
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
28+
; SSE-NEXT: pxor %xmm0, %xmm1
29+
; SSE-NEXT: movd %xmm1, 0
30+
; SSE-NEXT: retq
31+
;
32+
; AVX-LABEL: main:
33+
; AVX: # %bb.0: # %entry
34+
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
35+
; AVX-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
36+
; AVX-NEXT: movl $1, %eax
37+
; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
38+
; AVX-NEXT: vpinsrd $3, %edi, %xmm2, %xmm2
39+
; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
40+
; AVX-NEXT: vpaddd %ymm0, %ymm0, %ymm0
41+
; AVX-NEXT: vpaddd %ymm1, %ymm1, %ymm1
42+
; AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,1,3,3,5,5,7]
43+
; AVX-NEXT: vpermd %ymm0, %ymm2, %ymm2
44+
; AVX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
45+
; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7]
46+
; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,1,3,4,5,5,7]
47+
; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
48+
; AVX-NEXT: vpxor %ymm0, %ymm2, %ymm0
49+
; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
50+
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
51+
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
52+
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
53+
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
54+
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
55+
; AVX-NEXT: vmovd %xmm0, 0
56+
; AVX-NEXT: vzeroupper
57+
; AVX-NEXT: retq
58+
entry:
59+
%2 = insertelement <16 x i32> %0, i32 1, i64 1
60+
%3 = insertelement <16 x i32> %2, i32 %1, i64 3
61+
%4 = insertelement <16 x i32> %3, i32 0, i64 0
62+
%5 = shl <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
63+
%6 = shufflevector <16 x i32> %5, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15>
64+
%7 = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %6)
65+
store i32 %7, ptr null, align 4
66+
ret void
67+
}
68+
declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>)

0 commit comments

Comments
 (0)