Skip to content

Commit 525d412

Browse files
committed
[AMDGPU] Fix typing error introduce in promote alloca change
Fix type error when GEP uses i64 offset introduced in #127973.
1 parent 7decd04 commit 525d412

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,10 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
463463

464464
Value *Offset = VarOffset.first;
465465
if (!Quot.isOne()) {
466-
ConstantInt *ConstMul = ConstantInt::get(GEP->getContext(), Quot);
466+
auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
467+
if (!OffsetType)
468+
return nullptr;
469+
ConstantInt *ConstMul = ConstantInt::get(OffsetType, Quot.getZExtValue());
467470
Offset = Builder.CreateMul(Offset, ConstMul);
468471
if (Instruction *NewInst = dyn_cast<Instruction>(Offset))
469472
NewInsts.push_back(NewInst);

llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,48 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3(ptr %out) {
198198
ret void
199199
}
200200

201+
define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(ptr %out) {
202+
; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(
203+
; CHECK-SAME: ptr [[OUT:%.*]]) {
204+
; CHECK-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
205+
; CHECK-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y()
206+
; CHECK-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3
207+
; CHECK-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3
208+
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2
209+
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]]
210+
; CHECK-NEXT: [[SEL3:%.*]] = zext i32 [[SEL2]] to i64
211+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
212+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP1]]
213+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i64> poison, i64 [[TMP2]], i64 0
214+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP1]], 1
215+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP4]]
216+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <3 x i64> [[TMP3]], i64 [[TMP5]], i64 1
217+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP1]], 2
218+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP7]]
219+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x i64> [[TMP6]], i64 [[TMP8]], i64 2
220+
; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP9]], i32 2
221+
; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
222+
; CHECK-NEXT: ret void
223+
;
224+
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
225+
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
226+
%c1 = icmp uge i32 %x, 3
227+
%c2 = icmp uge i32 %y, 3
228+
%sel1 = select i1 %c1, i32 1, i32 2
229+
%sel2 = select i1 %c2, i32 0, i32 %sel1
230+
%sel3 = zext i32 %sel2 to i64
231+
%alloca = alloca [2 x [3 x i64]], align 16, addrspace(5)
232+
%gep.00 = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i32 0
233+
%gep.01 = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i32 0, i32 1, i32 0
234+
store <3 x i64> <i64 0, i64 1, i64 2>, ptr addrspace(5) %gep.00
235+
store <3 x i64> <i64 3, i64 4, i64 5>, ptr addrspace(5) %gep.01
236+
%gep = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i64 0, i64 %sel3
237+
%load = load <3 x i64>, ptr addrspace(5) %gep
238+
%elem = extractelement <3 x i64> %load, i32 2
239+
store i64 %elem, ptr %out
240+
ret void
241+
}
242+
201243
define amdgpu_kernel void @i64_2d_load_store_subvec_4(ptr %out) {
202244
; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_4(
203245
; CHECK-SAME: ptr [[OUT:%.*]]) {

0 commit comments

Comments
 (0)