-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LowerMemIntrinsics][NFC] Use Align in TTI::getMemcpyLoopLoweringType #100984
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
ritter-x2a
merged 1 commit into
llvm:main
from
ritter-x2a:align-for-loop-lowering-types
Jul 29, 2024
Merged
[LowerMemIntrinsics][NFC] Use Align in TTI::getMemcpyLoopLoweringType #100984
ritter-x2a
merged 1 commit into
llvm:main
from
ritter-x2a:align-for-loop-lowering-types
Jul 29, 2024
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
...and also in TTI::getMemcpyLoopResidualLoweringType.
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-transforms Author: Fabian Ritter (ritter-x2a) Changes...and also in TTI::getMemcpyLoopResidualLoweringType. Full diff: https://github.com/llvm/llvm-project/pull/100984.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 2411b2b31d293..38e8b9da21397 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1587,7 +1587,7 @@ class TargetTransformInfo {
/// \returns The type to use in a loop expansion of a memcpy call.
Type *getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
@@ -1599,7 +1599,7 @@ class TargetTransformInfo {
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
/// \returns True if the two functions have compatible attributes for inlining
@@ -2133,13 +2133,13 @@ class TargetTransformInfo::Concept {
Type *ExpectedType) = 0;
virtual Type *getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const = 0;
virtual void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const = 0;
virtual bool areInlineCompatible(const Function *Caller,
const Function *Callee) const = 0;
@@ -2838,7 +2838,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
}
Type *getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const override {
return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
DestAddrSpace, SrcAlign, DestAlign,
@@ -2847,7 +2847,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const override {
Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
SrcAddrSpace, DestAddrSpace,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 00efa474a91b5..899c5041aba4d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -839,7 +839,7 @@ class TargetTransformInfoImplBase {
Type *
getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const {
return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
: Type::getInt8Ty(Context);
@@ -848,7 +848,7 @@ class TargetTransformInfoImplBase {
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const {
unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 6a0fa98089ba5..dcde78925bfa9 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1198,7 +1198,7 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
Type *TargetTransformInfo::getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const {
return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
DestAddrSpace, SrcAlign, DestAlign,
@@ -1208,7 +1208,7 @@ Type *TargetTransformInfo::getMemcpyLoopLoweringType(
void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const {
TTIImpl->getMemcpyLoopResidualLoweringType(
OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0b1ecc002ae25..2b18d501e60b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -418,19 +418,19 @@ int64_t GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const {
// FIXME: This could use fine tuning and microbenchmarks.
Type *GCNTTIImpl::getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const {
if (AtomicElementSize)
return Type::getIntNTy(Context, *AtomicElementSize * 8);
- unsigned MinAlign = std::min(SrcAlign, DestAlign);
+ Align MinAlign = std::min(SrcAlign, DestAlign);
// A (multi-)dword access at an address == 2 (mod 4) will be decomposed by the
// hardware into byte accesses. If you assume all alignments are equally
// probable, it's more efficient on average to use short accesses for this
// case.
- if (MinAlign == 2)
+ if (MinAlign == Align(2))
return Type::getInt16Ty(Context);
// Not all subtargets have 128-bit DS instructions, and we currently don't
@@ -450,7 +450,7 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType(
void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const {
assert(RemainingBytes < 16);
@@ -459,9 +459,9 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
DestAlign, AtomicCpySize);
- unsigned MinAlign = std::min(SrcAlign, DestAlign);
+ Align MinAlign = std::min(SrcAlign, DestAlign);
- if (MinAlign != 2) {
+ if (MinAlign != Align(2)) {
Type *I64Ty = Type::getInt64Ty(Context);
while (RemainingBytes >= 8) {
OpsOut.push_back(I64Ty);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index b423df17302ca..01df2e6caaba1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -137,15 +137,16 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
unsigned AddrSpace) const;
int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
- Type *getMemcpyLoopLoweringType(
- LLVMContext & Context, Value * Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
- std::optional<uint32_t> AtomicElementSize) const;
+ Type *
+ getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+ unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ Align SrcAlign, Align DestAlign,
+ std::optional<uint32_t> AtomicElementSize) const;
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const;
unsigned getMaxInterleaveFactor(ElementCount VF);
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index ee0d95bbce64f..f0c7e31b9c223 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -45,8 +45,7 @@ void llvm::createMemCpyLoopKnownSize(
Type *TypeOfCopyLen = CopyLen->getType();
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
- AtomicElementSize);
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
"Atomic memcpy lowering is not supported for vector operand type");
@@ -111,8 +110,8 @@ void llvm::createMemCpyLoopKnownSize(
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
- SrcAS, DstAS, SrcAlign.value(),
- DstAlign.value(), AtomicElementSize);
+ SrcAS, DstAS, SrcAlign, DstAlign,
+ AtomicElementSize);
for (auto *OpTy : RemainingOps) {
Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
@@ -197,8 +196,7 @@ void llvm::createMemCpyLoopUnknownSize(
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
- AtomicElementSize);
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
"Atomic memcpy lowering is not supported for vector operand type");
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
@@ -411,8 +409,8 @@ static void createMemMoveLoopUnknownSize(Instruction *InsertBefore,
unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
+ Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
+ SrcAlign, DstAlign);
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
Type *Int8Type = Type::getInt8Ty(Ctx);
bool LoopOpIsInt8 = LoopOpType == Int8Type;
@@ -668,8 +666,8 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
+ Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
+ SrcAlign, DstAlign);
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
// Calculate the loop trip count and remaining bytes to copy after the loop.
@@ -737,8 +735,8 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
IRBuilder<> BwdResBuilder(CopyBackwardsBB->getFirstNonPHI());
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
- SrcAS, DstAS, PartSrcAlign.value(),
- PartDstAlign.value());
+ SrcAS, DstAS, PartSrcAlign,
+ PartDstAlign);
for (auto *OpTy : RemainingOps) {
// reverse the order of the emitted operations
BwdResBuilder.SetInsertPoint(CopyBackwardsBB->getFirstNonPHI());
@@ -818,8 +816,8 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
IRBuilder<> FwdResBuilder(FwdResidualBB->getTerminator());
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
- SrcAS, DstAS, PartSrcAlign.value(),
- PartDstAlign.value());
+ SrcAS, DstAS, PartSrcAlign,
+ PartDstAlign);
for (auto *OpTy : RemainingOps)
GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied);
}
|
arsenm
approved these changes
Jul 29, 2024
searlmc1
pushed a commit
to ROCm/llvm-project
that referenced
this pull request
Dec 10, 2024
…llvm#100984) ...and also in TTI::getMemcpyLoopResidualLoweringType. (cherry picked from commit 9e462b7) Change-Id: Iee645ead746236ec28d31742f3eb4933af0c7747
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
backend:AMDGPU
llvm:analysis
Includes value tracking, cost tables and constant folding
llvm:transforms
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
...and also in TTI::getMemcpyLoopResidualLoweringType.