-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Fix producing invalid IR on vector typed getelementptr #114113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Fix producing invalid IR on vector typed getelementptr #114113
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis did not consider the IR change to allow a scalar base with a vector In this situation we could handle the vector GEP, but that is a larger Full diff: https://github.com/llvm/llvm-project/pull/114113.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index f8744d6a483cff..7dd7388376f474 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -1159,7 +1159,6 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
if (LI->isVolatile())
return false;
-
continue;
}
@@ -1170,12 +1169,19 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
// Reject if the stored value is not the pointer operand.
if (SI->getPointerOperand() != Val)
return false;
- } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
+ continue;
+ }
+
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
if (RMW->isVolatile())
return false;
- } else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
+ continue;
+ }
+
+ if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
if (CAS->isVolatile())
return false;
+ continue;
}
// Only promote a select if we know that the other select operand
@@ -1186,6 +1192,7 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
// May need to rewrite constant operands.
WorkList.push_back(ICmp);
+ continue;
}
// TODO: If we know the address is only observed through flat pointers, we
@@ -1198,8 +1205,9 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
if (isa<InsertValueInst>(User) || isa<InsertElementInst>(User))
return false;
+ // TODO: Handle vectors of pointers.
if (!User->getType()->isPointerTy())
- continue;
+ return false;
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
// Be conservative if an address could be computed outside the bounds of
@@ -1504,6 +1512,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,
PointerType *NewTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS);
+ assert(isa<PointerType>(V->getType()));
+
// FIXME: It doesn't really make sense to try to do this for all
// instructions.
V->mutateType(NewTy);
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll
new file mode 100644
index 00000000000000..b0d578e421e280
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
+
+; Check that invalid IR is not produced on a vector typed
+; getelementptr with a scalar alloca pointer base.
+
+define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() {
+; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[GETELEMENTPTR]], i64 0
+; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4
+; CHECK-NEXT: ret void
+;
+bb:
+ %alloca = alloca i32, align 4, addrspace(5)
+ %getelementptr = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+ %extractelement = extractelement <4 x ptr addrspace(5)> %getelementptr, i64 0
+ store i32 0, ptr addrspace(5) %extractelement
+ ret void
+}
+
+define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(
+; CHECK-SAME: i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 3, i64 2, i64 1, i64 0>
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(5)> [[GETELEMENTPTR0]], <4 x ptr addrspace(5)> [[GETELEMENTPTR1]]
+; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[SELECT]], i64 1
+; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4
+; CHECK-NEXT: ret void
+;
+bb:
+ %alloca = alloca i32, align 4, addrspace(5)
+ %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+ %getelementptr1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 3, i64 2, i64 1, i64 0>
+ %select = select i1 %cond, <4 x ptr addrspace(5)> %getelementptr0, <4 x ptr addrspace(5)> %getelementptr1
+ %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1
+ store i32 0, ptr addrspace(5) %extractelement
+ ret void
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense, thanks again.
This did not consider the IR change to allow a scalar base with a vector offset part. Reject any users that are not explicitly handled. In this situation we could handle the vector GEP, but that is a larger change. This just avoids the IR verifier error by rejecting it.
be705d0
to
38c949d
Compare
…114113) This did not consider the IR change to allow a scalar base with a vector offset part. Reject any users that are not explicitly handled. In this situation we could handle the vector GEP, but that is a larger change. This just avoids the IR verifier error by rejecting it.
This did not consider the IR change to allow a scalar base with a vector
offset part. Reject any users that are not explicitly handled.
In this situation we could handle the vector GEP, but that is a larger
change. This just avoids the IR verifier error by rejecting it.