Skip to content

[IR] Fix GEP offset computations for vector GEPs #75448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/CGExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5292,8 +5292,8 @@ static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal,
} else {
// Otherwise this is array-like indexing. The local offset is the index
// multiplied by the element size.
auto *ElementSize = llvm::ConstantInt::get(
IntPtrTy, DL.getTypeAllocSize(GTI.getIndexedType()));
auto *ElementSize =
llvm::ConstantInt::get(IntPtrTy, GTI.getSequentialElementStride(DL));
auto *IndexS = Builder.CreateIntCast(Index, IntPtrTy, /*isSigned=*/true);
LocalOffset = eval(BO_Mul, ElementSize, IndexS);
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1041,7 +1041,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
if (TargetType->isScalableTy())
return TTI::TCC_Basic;
int64_t ElementSize =
DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue();
GTI.getSequentialElementStride(DL).getFixedValue();
if (ConstIdx) {
BaseOffset +=
ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
Expand Down
57 changes: 54 additions & 3 deletions llvm/include/llvm/IR/GetElementPtrTypeIterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/User.h"
Expand All @@ -30,7 +31,39 @@ template <typename ItTy = User::const_op_iterator>
class generic_gep_type_iterator {

ItTy OpIt;
PointerUnion<StructType *, Type *> CurTy;
// We use two different mechanisms to store the type a GEP index applies to.
// In some cases, we need to know the outer aggregate type the index is
// applied within, e.g. a struct. In such cases, we store the aggregate type
// in the iterator, and derive the element type on the fly.
//
// However, this is not always possible, because for the outermost index there
// is no containing type. In such cases, or if the containing type is not
// relevant, e.g. for arrays, the element type is stored as Type* in CurTy.
//
// If CurTy contains a Type* value, this does not imply anything about the
// type itself, because it is the element type and not the outer type.
// In particular, Type* can be a struct type.
//
// Consider this example:
//
// %my.struct = type { i32, [ 4 x float ] }
// [...]
// %gep = getelementptr %my.struct, ptr %ptr, i32 10, i32 1, 32 3
//
// Iterating over the indices of this GEP, CurTy will contain the following
// values:
// * i32 10: The outer index always operates on the GEP value type.
// CurTy contains a Type* pointing at `%my.struct`.
// * i32 1: This index is within a struct.
// CurTy contains a StructType* pointing at `%my.struct`.
// * i32 3: This index is within an array. We reuse the "flat" indexing
// for arrays which is also used in the top level GEP index.
// CurTy contains a Type* pointing at `float`.
//
// Vectors are handled separately because the layout of vectors is different
// for overaligned elements: Vectors are always bit-packed, whereas arrays
// respect ABI alignment of the elements.
PointerUnion<StructType *, VectorType *, Type *> CurTy;

generic_gep_type_iterator() = default;

Expand Down Expand Up @@ -69,6 +102,8 @@ class generic_gep_type_iterator {
Type *getIndexedType() const {
if (auto *T = dyn_cast_if_present<Type *>(CurTy))
return T;
if (auto *VT = dyn_cast_if_present<VectorType *>(CurTy))
return VT->getElementType();
return cast<StructType *>(CurTy)->getTypeAtIndex(getOperand());
}

Expand All @@ -79,7 +114,7 @@ class generic_gep_type_iterator {
if (auto *ATy = dyn_cast<ArrayType>(Ty))
CurTy = ATy->getElementType();
else if (auto *VTy = dyn_cast<VectorType>(Ty))
CurTy = VTy->getElementType();
CurTy = VTy;
else
CurTy = dyn_cast<StructType>(Ty);
++OpIt;
Expand Down Expand Up @@ -108,7 +143,23 @@ class generic_gep_type_iterator {
// that.

bool isStruct() const { return isa<StructType *>(CurTy); }
bool isSequential() const { return isa<Type *>(CurTy); }
bool isVector() const { return isa<VectorType *>(CurTy); }
bool isSequential() const { return !isStruct(); }

// For sequential GEP indices (all except those into structs), the index value
// can be translated into a byte offset by multiplying with an element stride.
// This function returns this stride, which both depends on the element type,
// and the containing aggregate type, as vectors always tightly bit-pack their
// elements.
TypeSize getSequentialElementStride(const DataLayout &DL) const {
assert(isSequential());
Type *ElemTy = getIndexedType();
if (isVector()) {
assert(DL.typeSizeEqualsStoreSize(ElemTy) && "Not byte-addressable");
return DL.getTypeStoreSize(ElemTy);
}
return DL.getTypeAllocSize(ElemTy);
}

StructType *getStructType() const { return cast<StructType *>(CurTy); }

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Analysis/BasicAliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
continue;

// Don't attempt to analyze GEPs if the scalable index is not zero.
TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
if (AllocTypeSize.isScalable()) {
Decomposed.Base = V;
return Decomposed;
Expand All @@ -650,7 +650,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
continue;
}

TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
if (AllocTypeSize.isScalable()) {
Decomposed.Base = V;
return Decomposed;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/InlineCost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1429,7 +1429,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
continue;
}

APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL));
Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
}
return true;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
// Convert to correct type.
if (Op->getType() != IntIdxTy)
Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c");
TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType());
TypeSize TSize = GTI.getSequentialElementStride(DL);
if (TSize != TypeSize::getFixed(1)) {
Value *Scale = Builder->CreateTypeSize(IntIdxTy->getScalarType(), TSize);
if (IntIdxTy->isVectorTy())
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2703,7 +2703,10 @@ static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {

// If it's a type with the same allocation size as the result of the GEP we
// can peel off the zero index.
if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
TypeSize ElemSize = GEPTI.isStruct()
? DL.getTypeAllocSize(GEPTI.getIndexedType())
: GEPTI.getSequentialElementStride(DL);
if (ElemSize != GEPAllocSize)
break;
--LastOperand;
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1230,7 +1230,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
KnownBits IndexBits(IndexBitWidth);
computeKnownBits(Index, IndexBits, Depth + 1, Q);
TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy);
TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL);
uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
KnownBits ScalingFactor(IndexBitWidth);
// Multiply by current sizeof type.
Expand Down Expand Up @@ -2158,7 +2158,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
}

// If we have a zero-sized type, the index doesn't matter. Keep looping.
if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).isZero())
if (GTI.getSequentialElementStride(Q.DL).isZero())
continue;

// Fast path the constant operand case both for efficiency and so we don't
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4787,7 +4787,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
ConstantOffset += SL->getElementOffset(Idx);
} else {
TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType());
TypeSize TS = GTI.getSequentialElementStride(DL);
if (TS.isNonZero()) {
// The optimisations below currently only work for fixed offsets.
if (TS.isScalable())
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1545,7 +1545,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
continue;
} else {
uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
uint64_t ElementSize = GTI.getSequentialElementStride(*DL);

// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
Expand Down
6 changes: 2 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,15 +560,13 @@ bool FastISel::selectGetElementPtr(const User *I) {
}
}
} else {
Type *Ty = GTI.getIndexedType();

// If this is a constant subscript, handle it quickly.
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero())
continue;
// N = N + Offset
uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
TotalOffs += GTI.getSequentialElementStride(DL) * IdxN;
if (TotalOffs >= MaxOffs) {
N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
Expand All @@ -585,7 +583,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
}

// N = N + Idx * ElementSize;
uint64_t ElementSize = DL.getTypeAllocSize(Ty);
uint64_t ElementSize = GTI.getSequentialElementStride(DL);
Register IdxN = getRegForGEPIndex(Idx);
if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
return false;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4112,7 +4112,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
MVT IdxTy = MVT::getIntegerVT(IdxSize);
TypeSize ElementSize =
DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
GTI.getSequentialElementStride(DAG.getDataLayout());
// We intentionally mask away the high bits here; ElementSize may not
// fit in IdxTy.
APInt ElementMul(IdxSize, ElementSize.getKnownMinValue());
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1074,7 +1074,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
assert(BitWidth == 64 && "Invalid index type for getelementptr");
Idx = (int64_t)IdxGV.IntVal.getZExtValue();
}
Total += getDataLayout().getTypeAllocSize(I.getIndexedType()) * Idx;
Total += I.getSequentialElementStride(getDataLayout()) * Idx;
}
}

Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/IR/DataLayout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,9 +936,8 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
// Add in the offset, as calculated by the structure layout info...
Result += Layout->getElementOffset(FieldNo);
} else {
// Get the array index and the size of each array element.
if (int64_t arrayIdx = cast<ConstantInt>(Idx)->getSExtValue())
Result += arrayIdx * getTypeAllocSize(GTI.getIndexedType());
if (int64_t ArrayIdx = cast<ConstantInt>(Idx)->getSExtValue())
Result += ArrayIdx * GTI.getSequentialElementStride(*this);
}
}

Expand Down
12 changes: 5 additions & 7 deletions llvm/lib/IR/Operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const {
/// If the index isn't known, we take 1 because it is the index that will
/// give the worse alignment of the offset.
const uint64_t ElemCount = OpC ? OpC->getZExtValue() : 1;
Offset = DL.getTypeAllocSize(GTI.getIndexedType()) * ElemCount;
Offset = GTI.getSequentialElementStride(DL) * ElemCount;
}
Result = Align(MinAlign(Offset, Result.value()));
}
Expand Down Expand Up @@ -157,7 +157,7 @@ bool GEPOperator::accumulateConstantOffset(
continue;
}
if (!AccumulateOffset(ConstOffset->getValue(),
DL.getTypeAllocSize(GTI.getIndexedType())))
GTI.getSequentialElementStride(DL)))
return false;
continue;
}
Expand All @@ -170,8 +170,7 @@ bool GEPOperator::accumulateConstantOffset(
if (!ExternalAnalysis(*V, AnalysisIndex))
return false;
UsedExternalAnalysis = true;
if (!AccumulateOffset(AnalysisIndex,
DL.getTypeAllocSize(GTI.getIndexedType())))
if (!AccumulateOffset(AnalysisIndex, GTI.getSequentialElementStride(DL)))
return false;
}
return true;
Expand Down Expand Up @@ -218,14 +217,13 @@ bool GEPOperator::collectOffset(
continue;
}
CollectConstantOffset(ConstOffset->getValue(),
DL.getTypeAllocSize(GTI.getIndexedType()));
GTI.getSequentialElementStride(DL));
continue;
}

if (STy || ScalableType)
return false;
APInt IndexedSize =
APInt(BitWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
APInt IndexedSize = APInt(BitWidth, GTI.getSequentialElementStride(DL));
// Insert an initial offset of 0 for V iff none exists already, then
// increment the offset by IndexedSize.
if (!IndexedSize.isZero()) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/IR/Value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) {

// Otherwise, we have a sequential type like an array or fixed-length
// vector. Multiply the index by the ElementSize.
TypeSize Size = DL.getTypeAllocSize(GTI.getIndexedType());
TypeSize Size = GTI.getSequentialElementStride(DL);
if (Size.isScalable())
return std::nullopt;
Offset += Size.getFixedValue() * OpC->getSExtValue();
Expand Down
10 changes: 4 additions & 6 deletions llvm/lib/Target/AArch64/AArch64FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
uint64_t S = GTI.getSequentialElementStride(DL);
while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
Expand Down Expand Up @@ -4987,15 +4987,13 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
if (Field)
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
} else {
Type *Ty = GTI.getIndexedType();

// If this is a constant subscript, handle it quickly.
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero())
continue;
// N = N + Offset
TotalOffs +=
DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
TotalOffs += GTI.getSequentialElementStride(DL) *
cast<ConstantInt>(CI)->getSExtValue();
continue;
}
if (TotalOffs) {
Expand All @@ -5006,7 +5004,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
}

// N = N + Idx * ElementSize;
uint64_t ElementSize = DL.getTypeAllocSize(Ty);
uint64_t ElementSize = GTI.getSequentialElementStride(DL);
unsigned IdxN = getRegForGEPIndex(Idx);
if (!IdxN)
return false;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMFastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
uint64_t S = GTI.getSequentialElementStride(DL);
while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/Mips/MipsFastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
uint64_t S = GTI.getSequentialElementStride(DL);
while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCFastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
uint64_t S = GTI.getSequentialElementStride(DL);
for (;;) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr,

VecOperand = i;

TypeSize TS = DL->getTypeAllocSize(GTI.getIndexedType());
TypeSize TS = GTI.getSequentialElementStride(*DL);
if (TS.isScalable())
return std::make_pair(nullptr, nullptr);

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
uint64_t S = GTI.getSequentialElementStride(DL);
for (;;) {
if (const auto *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -918,7 +918,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {

// A array/variable index is always of the form i*S where S is the
// constant scale size. See if we can push the scale into immediates.
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
uint64_t S = GTI.getSequentialElementStride(DL);
for (;;) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
Expand Down
Loading