Skip to content

[InstCombine] Support ptrtoint of gep folds for chain of geps #137323

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 38 additions & 23 deletions llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2067,6 +2067,42 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
return nullptr;
}

Value *InstCombinerImpl::foldPtrToIntOfGEP(Type *IntTy, Value *Ptr) {
// Look through chain of one-use GEPs.
Type *PtrTy = Ptr->getType();
SmallVector<GEPOperator *> GEPs;
while (true) {
auto *GEP = dyn_cast<GEPOperator>(Ptr);
if (!GEP || !GEP->hasOneUse())
break;
GEPs.push_back(GEP);
Ptr = GEP->getPointerOperand();
}

// Don't handle case where GEP converts from pointer to vector.
if (GEPs.empty() || PtrTy != Ptr->getType())
return nullptr;

// Check whether we know the integer value of the base pointer.
Value *Res;
Type *IdxTy = DL.getIndexType(PtrTy);
if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) &&
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing test for the multi-use case. Do we need a one-use constraint on inttoptr?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pre-existing code tested here:

define i32 @ptr_add_in_int_extra_use1(i32 %x) {
; CHECK-LABEL: @ptr_add_in_int_extra_use1(
; CHECK-NEXT: [[PTR:%.*]] = inttoptr i32 [[X:%.*]] to ptr
; CHECK-NEXT: call void @use_ptr(ptr [[PTR]])
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i32 4096
; CHECK-NEXT: [[R:%.*]] = ptrtoint ptr [[P2]] to i32
; CHECK-NEXT: ret i32 [[R]]
;
%ptr = inttoptr i32 %x to ptr
call void @use_ptr(ptr %ptr)
%p2 = getelementptr inbounds i8, ptr %ptr, i32 4096
%r = ptrtoint ptr %p2 to i32
ret i32 %r
}

I agree that this one-use requirement is unnecessary.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like removing this one-use check has some negative interaction with the indexed compare fold.

Res->getType() == IntTy && IntTy == IdxTy) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Res->getType() == IntTy && IntTy == IdxTy) {
Res->getType() == IdxTy) {

If IntTy != IdxTy, we will zext or trunc the offset accumulation into IntTy.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is correct, but it's worth noting that we can only run into this if the inttoptr hasn't been canonicalized yet (otherwise it would be guaranteed to use IntTy), so it's not really possible to test this case.

// pass
} else if (isa<ConstantPointerNull>(Ptr)) {
Res = Constant::getNullValue(IdxTy);
} else {
return nullptr;
}

// Perform the entire operation on integers instead.
for (GEPOperator *GEP : reverse(GEPs)) {
Value *Offset = EmitGEPOffset(GEP);
Res = Builder.CreateAdd(Res, Offset, "", GEP->hasNoUnsignedWrap());
}
return Builder.CreateZExtOrTrunc(Res, IntTy);
}

Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
Expand All @@ -2093,29 +2129,8 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
Mask->getType() == Ty)
return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);

if (auto *GEP = dyn_cast<GEPOperator>(SrcOp)) {
// Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
// While this can increase the number of instructions it doesn't actually
// increase the overall complexity since the arithmetic is just part of
// the GEP otherwise.
if (GEP->hasOneUse() &&
isa<ConstantPointerNull>(GEP->getPointerOperand())) {
return replaceInstUsesWith(CI,
Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
/*isSigned=*/false));
}

// (ptrtoint (gep (inttoptr Base), ...)) -> Base + Offset
Value *Base;
if (GEP->hasOneUse() &&
match(GEP->getPointerOperand(), m_OneUse(m_IntToPtr(m_Value(Base)))) &&
Base->getType() == Ty) {
Value *Offset = EmitGEPOffset(GEP);
auto *NewOp = BinaryOperator::CreateAdd(Base, Offset);
NewOp->setHasNoUnsignedWrap(GEP->hasNoUnsignedWrap());
return NewOp;
}
}
if (Value *V = foldPtrToIntOfGEP(Ty, SrcOp))
return replaceInstUsesWith(CI, V);

Value *Vec, *Scalar, *Index;
if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)),
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
/// folded operation.
void PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN);

Value *foldPtrToIntOfGEP(Type *IntTy, Value *Ptr);
Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, CmpPredicate Cond,
Instruction &I);
Instruction *foldSelectICmp(CmpPredicate Pred, SelectInst *SI, Value *RHS,
Expand Down
102 changes: 101 additions & 1 deletion llvm/test/Transforms/InstCombine/cast_ptr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; Tests to make sure elimination of casts is working correctly
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

target datalayout = "p:32:32-p1:32:32-p2:16:16"
target datalayout = "p:32:32-p1:32:32-p2:16:16-p3:32:32:32:16"

@global = global i8 0

Expand Down Expand Up @@ -432,3 +432,103 @@ define i32 @ptr_add_in_int_extra_use2(i32 %x) {
%r = ptrtoint ptr %p2 to i32
ret i32 %r
}

define i32 @ptrtoint_of_inttoptr_multiple_gep(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @ptrtoint_of_inttoptr_multiple_gep(
; CHECK-NEXT: [[PTR2_IDX:%.*]] = shl nuw i32 [[Y:%.*]], 1
; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[X:%.*]], [[PTR2_IDX]]
; CHECK-NEXT: [[PTR3_IDX:%.*]] = shl i32 [[Z:%.*]], 2
; CHECK-NEXT: [[R:%.*]] = add i32 [[TMP1]], [[PTR3_IDX]]
; CHECK-NEXT: ret i32 [[R]]
;
%ptr = inttoptr i32 %x to ptr
%ptr2 = getelementptr nuw i16, ptr %ptr, i32 %y
%ptr3 = getelementptr i32, ptr %ptr2, i32 %z
%r = ptrtoint ptr %ptr3 to i32
ret i32 %r
}

define i32 @ptrtoint_of_inttoptr_multiple_gep_extra_use(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @ptrtoint_of_inttoptr_multiple_gep_extra_use(
; CHECK-NEXT: [[PTR:%.*]] = inttoptr i32 [[X:%.*]] to ptr
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr i16, ptr [[PTR]], i32 [[Y:%.*]]
; CHECK-NEXT: call void @use_ptr(ptr [[PTR2]])
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i32, ptr [[PTR2]], i32 [[Z:%.*]]
; CHECK-NEXT: [[R:%.*]] = ptrtoint ptr [[PTR3]] to i32
; CHECK-NEXT: ret i32 [[R]]
;
%ptr = inttoptr i32 %x to ptr
%ptr2 = getelementptr i16, ptr %ptr, i32 %y
call void @use_ptr(ptr %ptr2)
%ptr3 = getelementptr i32, ptr %ptr2, i32 %z
%r = ptrtoint ptr %ptr3 to i32
ret i32 %r
}

define i32 @ptrtoint_of_inttoptr_index_type(i32 %x, i16 %y) {
; CHECK-LABEL: @ptrtoint_of_inttoptr_index_type(
; CHECK-NEXT: [[PTR:%.*]] = inttoptr i32 [[X:%.*]] to ptr addrspace(3)
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr i16, ptr addrspace(3) [[PTR]], i16 [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = ptrtoint ptr addrspace(3) [[PTR2]] to i32
; CHECK-NEXT: ret i32 [[R]]
;
%ptr = inttoptr i32 %x to ptr addrspace(3)
%ptr2 = getelementptr i16, ptr addrspace(3) %ptr, i16 %y
%r = ptrtoint ptr addrspace(3) %ptr2 to i32
ret i32 %r
}

define i32 @ptrtoint_of_null_multiple_gep(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @ptrtoint_of_null_multiple_gep(
; CHECK-NEXT: [[PTR2_IDX:%.*]] = shl i32 [[X:%.*]], 1
; CHECK-NEXT: [[PTR3_IDX:%.*]] = shl nuw i32 [[Y:%.*]], 2
; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[PTR2_IDX]], [[PTR3_IDX]]
; CHECK-NEXT: [[PTR4_IDX:%.*]] = shl i32 [[Z:%.*]], 3
; CHECK-NEXT: [[R:%.*]] = add i32 [[TMP1]], [[PTR4_IDX]]
; CHECK-NEXT: ret i32 [[R]]
;
%ptr2 = getelementptr i16, ptr null, i32 %x
%ptr3 = getelementptr nuw i32, ptr %ptr2, i32 %y
%ptr4 = getelementptr i64, ptr %ptr3, i32 %z
%r = ptrtoint ptr %ptr4 to i32
ret i32 %r
}

define i32 @ptrtoint_of_null_multiple_gep_extra_use(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @ptrtoint_of_null_multiple_gep_extra_use(
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr i16, ptr null, i32 [[X:%.*]]
; CHECK-NEXT: call void @use_ptr(ptr [[PTR2]])
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr nuw i32, ptr [[PTR2]], i32 [[Y:%.*]]
; CHECK-NEXT: [[PTR4:%.*]] = getelementptr i64, ptr [[PTR3]], i32 [[Z:%.*]]
; CHECK-NEXT: [[R:%.*]] = ptrtoint ptr [[PTR4]] to i32
; CHECK-NEXT: ret i32 [[R]]
;
%ptr2 = getelementptr i16, ptr null, i32 %x
call void @use_ptr(ptr %ptr2)
%ptr3 = getelementptr nuw i32, ptr %ptr2, i32 %y
%ptr4 = getelementptr i64, ptr %ptr3, i32 %z
%r = ptrtoint ptr %ptr4 to i32
ret i32 %r
}

define i32 @ptrtoint_of_null_index_type(i16 %x) {
; CHECK-LABEL: @ptrtoint_of_null_index_type(
; CHECK-NEXT: [[PTR_IDX:%.*]] = shl i16 [[X:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = zext i16 [[PTR_IDX]] to i32
; CHECK-NEXT: ret i32 [[R]]
;
%ptr = getelementptr i16, ptr addrspace(3) null, i16 %x
%r = ptrtoint ptr addrspace(3) %ptr to i32
ret i32 %r
}

define <2 x i32> @ptrtoint_of_null_splat(<2 x i16> %x) {
; CHECK-LABEL: @ptrtoint_of_null_splat(
; CHECK-NEXT: [[PTR:%.*]] = getelementptr i16, ptr addrspace(3) null, <2 x i16> [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = ptrtoint <2 x ptr addrspace(3)> [[PTR]] to <2 x i32>
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%ptr = getelementptr i16, ptr addrspace(3) null, <2 x i16> %x
%r = ptrtoint <2 x ptr addrspace(3)> %ptr to <2 x i32>
ret <2 x i32> %r
}
Loading