Skip to content

Commit 38f52d3

Browse files
author
git apple-llvm automerger
committed
Merge commit '5db63d29fdc1' from llvm.org/main into next
2 parents 1ea8597 + 5db63d2 commit 38f52d3

File tree

3 files changed

+109
-49
lines changed

3 files changed

+109
-49
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,6 @@ static Value *promoteAllocaUserToVector(
386386
};
387387

388388
Type *VecEltTy = VectorTy->getElementType();
389-
const unsigned NumVecElts = VectorTy->getNumElements();
390389

391390
switch (Inst->getOpcode()) {
392391
case Instruction::Load: {
@@ -419,11 +418,12 @@ static Value *promoteAllocaUserToVector(
419418
auto *SubVecTy = FixedVectorType::get(VecEltTy, NumLoadedElts);
420419
assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));
421420

422-
unsigned IndexVal = cast<ConstantInt>(Index)->getZExtValue();
423421
Value *SubVec = PoisonValue::get(SubVecTy);
424422
for (unsigned K = 0; K < NumLoadedElts; ++K) {
423+
Value *CurIdx =
424+
Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
425425
SubVec = Builder.CreateInsertElement(
426-
SubVec, Builder.CreateExtractElement(CurVal, IndexVal + K), K);
426+
SubVec, Builder.CreateExtractElement(CurVal, CurIdx), K);
427427
}
428428

429429
if (AccessTy->isPtrOrPtrVectorTy())
@@ -479,12 +479,12 @@ static Value *promoteAllocaUserToVector(
479479

480480
Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
481481

482-
unsigned IndexVal = cast<ConstantInt>(Index)->getZExtValue();
483482
Value *CurVec = GetOrLoadCurrentVectorValue();
484-
for (unsigned K = 0; K < NumWrittenElts && ((IndexVal + K) < NumVecElts);
485-
++K) {
483+
for (unsigned K = 0; K < NumWrittenElts; ++K) {
484+
Value *CurIdx =
485+
Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
486486
CurVec = Builder.CreateInsertElement(
487-
CurVec, Builder.CreateExtractElement(Val, K), IndexVal + K);
487+
CurVec, Builder.CreateExtractElement(Val, K), CurIdx);
488488
}
489489
return CurVec;
490490
}

llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,11 @@ define <4 x i64> @test_fullvec_out_of_bounds(<4 x i64> %arg) {
4343
; CHECK-SAME: (<4 x i64> [[ARG:%.*]]) {
4444
; CHECK-NEXT: entry:
4545
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG]], i64 0
46-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i64 3
47-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> <i64 undef, i64 poison, i64 poison, i64 poison>, i64 [[TMP0]], i64 1
48-
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
46+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 3
47+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[ARG]], i64 1
48+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[ARG]], i64 2
49+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[ARG]], i64 3
50+
; CHECK-NEXT: ret <4 x i64> poison
4951
;
5052
entry:
5153
%stack = alloca [4 x i64], align 4, addrspace(5)
@@ -159,9 +161,9 @@ define void @alloca_load_store_ptr_mixed_ptrvec(<2 x ptr addrspace(3)> %arg) {
159161
; CHECK-NEXT: entry:
160162
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(3)> [[ARG]] to <2 x i32>
161163
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
162-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i64 0
164+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0
163165
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
164-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP3]], i64 1
166+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP3]], i32 1
165167
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
166168
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP3]], i64 1
167169
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x ptr addrspace(3)>

0 commit comments

Comments
 (0)