Skip to content

Commit 3b7fe9f

Browse files
committed
Check hasOneUse on the source vector. This gives up some of the x86 improvements
1 parent 1e4544e commit 3b7fe9f

File tree

4 files changed

+17
-11
lines changed

4 files changed

+17
-11
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23874,8 +23874,12 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
2387423874
// If we only found a single constant indexed extract_vector_elt feeding the
2387523875
// build_vector, do not produce a more complicated shuffle if the extract is
2387623876
// cheap.
23877+
23878+
// TODO: This should be more aggressive about skipping the shuffle formation
23879+
// (e.g., always do this for VecIn[1]->hasOneUse())
2387723880
if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, VT) &&
23878-
TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
23881+
(VecIn[1].hasOneUse() &&
23882+
TLI.isExtractVecEltCheap(VT, OneConstExtractIndex)))
2387923883
return SDValue();
2388023884

2388123885
unsigned MaxIndex = 0;

llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -452,11 +452,11 @@ define amdgpu_kernel void @byte8_inselt(ptr addrspace(1) %out, <8 x i8> %vec, i3
452452
; GCN-NEXT: s_and_b32 s6, s4, 0x1010101
453453
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
454454
; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3]
455-
; GCN-NEXT: v_mov_b32_e32 v0, s0
456-
; GCN-NEXT: v_mov_b32_e32 v2, s2
457-
; GCN-NEXT: v_mov_b32_e32 v1, s1
458-
; GCN-NEXT: v_mov_b32_e32 v3, s3
459-
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
455+
; GCN-NEXT: v_mov_b32_e32 v3, s1
456+
; GCN-NEXT: v_mov_b32_e32 v0, s2
457+
; GCN-NEXT: v_mov_b32_e32 v1, s3
458+
; GCN-NEXT: v_mov_b32_e32 v2, s0
459+
; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
460460
; GCN-NEXT: s_endpgm
461461
entry:
462462
%v = insertelement <8 x i8> %vec, i8 1, i32 %sel

llvm/test/CodeGen/X86/avx512-build-vector.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@ define <16 x i32> @test2(<16 x i32> %x) {
1414
define <16 x float> @test3(<4 x float> %a) {
1515
; CHECK-LABEL: test3:
1616
; CHECK: ## %bb.0:
17-
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[8,9,10,11,0,1,2,3],zero,zero,zero,zero
18-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
19-
; CHECK-NEXT: vinserti32x4 $1, %xmm0, %zmm1, %zmm0
17+
; CHECK-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
18+
; CHECK-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,1,2,3,4,18,16,7,8,9,10,11,12,13,14,15]
19+
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
20+
; CHECK-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1
21+
; CHECK-NEXT: vmovaps %zmm1, %zmm0
2022
; CHECK-NEXT: retq
2123
%b = extractelement <4 x float> %a, i32 2
2224
%c = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %b, i32 5

llvm/test/CodeGen/X86/sse-align-12.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ define <4 x float> @b(ptr %y, <4 x float> %z) nounwind {
4040
define <2 x double> @c(ptr %y) nounwind {
4141
; CHECK-LABEL: c:
4242
; CHECK: # %bb.0:
43-
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
44-
; CHECK-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
43+
; CHECK-NEXT: movups (%rdi), %xmm0
44+
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
4545
; CHECK-NEXT: retq
4646
%x = load <2 x double>, ptr %y, align 8
4747
%a = extractelement <2 x double> %x, i32 0

0 commit comments

Comments
 (0)