Skip to content

[LowerMemIntrinsics][NFC] Use Align in TTI::getMemcpyLoopLoweringType #100984

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1587,7 +1587,7 @@ class TargetTransformInfo {
/// \returns The type to use in a loop expansion of a memcpy call.
Type *getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize = std::nullopt) const;

/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
Expand All @@ -1599,7 +1599,7 @@ class TargetTransformInfo {
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign,
Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize = std::nullopt) const;

/// \returns True if the two functions have compatible attributes for inlining
Expand Down Expand Up @@ -2133,13 +2133,13 @@ class TargetTransformInfo::Concept {
Type *ExpectedType) = 0;
virtual Type *getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const = 0;

virtual void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign,
Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const = 0;
virtual bool areInlineCompatible(const Function *Caller,
const Function *Callee) const = 0;
Expand Down Expand Up @@ -2838,7 +2838,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
}
Type *getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const override {
return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
DestAddrSpace, SrcAlign, DestAlign,
Expand All @@ -2847,7 +2847,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign,
Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const override {
Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
SrcAddrSpace, DestAddrSpace,
Expand Down
4 changes: 2 additions & 2 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,7 @@ class TargetTransformInfoImplBase {
Type *
getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign,
Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const {
return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
: Type::getInt8Ty(Context);
Expand All @@ -848,7 +848,7 @@ class TargetTransformInfoImplBase {
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign,
Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const {
unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1198,7 +1198,7 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(

Type *TargetTransformInfo::getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const {
return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
DestAddrSpace, SrcAlign, DestAlign,
Expand All @@ -1208,7 +1208,7 @@ Type *TargetTransformInfo::getMemcpyLoopLoweringType(
void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign,
Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const {
TTIImpl->getMemcpyLoopResidualLoweringType(
OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,19 +418,19 @@ int64_t GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const {
// FIXME: This could use fine tuning and microbenchmarks.
Type *GCNTTIImpl::getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const {

if (AtomicElementSize)
return Type::getIntNTy(Context, *AtomicElementSize * 8);

unsigned MinAlign = std::min(SrcAlign, DestAlign);
Align MinAlign = std::min(SrcAlign, DestAlign);

// A (multi-)dword access at an address == 2 (mod 4) will be decomposed by the
// hardware into byte accesses. If you assume all alignments are equally
// probable, it's more efficient on average to use short accesses for this
// case.
if (MinAlign == 2)
if (MinAlign == Align(2))
return Type::getInt16Ty(Context);

// Not all subtargets have 128-bit DS instructions, and we currently don't
Expand All @@ -450,7 +450,7 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType(
void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign,
Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const {
assert(RemainingBytes < 16);

Expand All @@ -459,9 +459,9 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
DestAlign, AtomicCpySize);

unsigned MinAlign = std::min(SrcAlign, DestAlign);
Align MinAlign = std::min(SrcAlign, DestAlign);

if (MinAlign != 2) {
if (MinAlign != Align(2)) {
Type *I64Ty = Type::getInt64Ty(Context);
while (RemainingBytes >= 8) {
OpsOut.push_back(I64Ty);
Expand Down
11 changes: 6 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,16 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
unsigned AddrSpace) const;

int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
Type *getMemcpyLoopLoweringType(
LLVMContext & Context, Value * Length, unsigned SrcAddrSpace,
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
std::optional<uint32_t> AtomicElementSize) const;
Type *
getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAddrSpace, unsigned DestAddrSpace,
Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const;

void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign,
Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const;
unsigned getMaxInterleaveFactor(ElementCount VF);

Expand Down
26 changes: 12 additions & 14 deletions llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ void llvm::createMemCpyLoopKnownSize(

Type *TypeOfCopyLen = CopyLen->getType();
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
AtomicElementSize);
Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
"Atomic memcpy lowering is not supported for vector operand type");

Expand Down Expand Up @@ -111,8 +110,8 @@ void llvm::createMemCpyLoopKnownSize(

SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
SrcAS, DstAS, SrcAlign.value(),
DstAlign.value(), AtomicElementSize);
SrcAS, DstAS, SrcAlign, DstAlign,
AtomicElementSize);

for (auto *OpTy : RemainingOps) {
Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
Expand Down Expand Up @@ -197,8 +196,7 @@ void llvm::createMemCpyLoopUnknownSize(
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();

Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
AtomicElementSize);
Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
"Atomic memcpy lowering is not supported for vector operand type");
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
Expand Down Expand Up @@ -411,8 +409,8 @@ static void createMemMoveLoopUnknownSize(Instruction *InsertBefore,
unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();

Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
SrcAlign, DstAlign);
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
Type *Int8Type = Type::getInt8Ty(Ctx);
bool LoopOpIsInt8 = LoopOpType == Int8Type;
Expand Down Expand Up @@ -668,8 +666,8 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();

Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
SrcAlign, DstAlign);
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);

// Calculate the loop trip count and remaining bytes to copy after the loop.
Expand Down Expand Up @@ -737,8 +735,8 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
IRBuilder<> BwdResBuilder(CopyBackwardsBB->getFirstNonPHI());
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
SrcAS, DstAS, PartSrcAlign.value(),
PartDstAlign.value());
SrcAS, DstAS, PartSrcAlign,
PartDstAlign);
for (auto *OpTy : RemainingOps) {
// reverse the order of the emitted operations
BwdResBuilder.SetInsertPoint(CopyBackwardsBB->getFirstNonPHI());
Expand Down Expand Up @@ -818,8 +816,8 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
IRBuilder<> FwdResBuilder(FwdResidualBB->getTerminator());
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
SrcAS, DstAS, PartSrcAlign.value(),
PartDstAlign.value());
SrcAS, DstAS, PartSrcAlign,
PartDstAlign);
for (auto *OpTy : RemainingOps)
GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied);
}
Expand Down
Loading