Skip to content

Commit 6596be8

Browse files
committed
Count the number of extract uses
1 parent bcd1177 commit 6596be8

14 files changed

+681
-699
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23811,6 +23811,8 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
2381123811
// value.
2381223812
unsigned OneConstExtractIndex = ~0u;
2381323813

23814+
unsigned NumExtracts = 0;
23815+
2381423816
for (unsigned i = 0; i != NumElems; ++i) {
2381523817
SDValue Op = N->getOperand(i);
2381623818

@@ -23847,7 +23849,10 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
2384723849
ExtractedFromVec.getValueType().getVectorElementType())
2384823850
return SDValue();
2384923851

23850-
OneConstExtractIndex = ExtractIdx->getZExtValue();
23852+
if (OneConstExtractIndex == ~0u)
23853+
OneConstExtractIndex = ExtractIdx->getZExtValue();
23854+
23855+
++NumExtracts;
2385123856

2385223857
// Have we seen this input vector before?
2385323858
// The vectors are expected to be tiny (usually 1 or 2 elements), so using
@@ -23878,8 +23883,11 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
2387823883
// TODO: This should be more aggressive about skipping the shuffle formation
2387923884
// (e.g., always do this for VecIn[1]->hasOneUse())
2388023885
if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, VT) &&
23881-
(VecIn[1].hasOneUse() &&
23882-
TLI.isExtractVecEltCheap(VT, OneConstExtractIndex)))
23886+
TLI.isTypeLegal(VT.getVectorElementType()) &&
23887+
// VecIn[1].hasOneUse() &&
23888+
NumExtracts == 1
23889+
//&& TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
23890+
)
2388323891
return SDValue();
2388423892

2388523893
unsigned MaxIndex = 0;

llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -452,11 +452,11 @@ define amdgpu_kernel void @byte8_inselt(ptr addrspace(1) %out, <8 x i8> %vec, i3
452452
; GCN-NEXT: s_and_b32 s6, s4, 0x1010101
453453
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
454454
; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3]
455-
; GCN-NEXT: v_mov_b32_e32 v3, s1
456-
; GCN-NEXT: v_mov_b32_e32 v0, s2
457-
; GCN-NEXT: v_mov_b32_e32 v1, s3
458-
; GCN-NEXT: v_mov_b32_e32 v2, s0
459-
; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
455+
; GCN-NEXT: v_mov_b32_e32 v0, s0
456+
; GCN-NEXT: v_mov_b32_e32 v2, s2
457+
; GCN-NEXT: v_mov_b32_e32 v1, s1
458+
; GCN-NEXT: v_mov_b32_e32 v3, s3
459+
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
460460
; GCN-NEXT: s_endpgm
461461
entry:
462462
%v = insertelement <8 x i8> %vec, i8 1, i32 %sel

0 commit comments

Comments
 (0)