-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "Revert "[llvm][ARM]Add widen global arrays pass"" #113288
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This reverts commit 370fd74.
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Nashe Mncube (nasherm) ChangesReverts llvm/llvm-project#112701 Also adds config file for tests so that they pass buildbot Patch is 34.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113288.diff 18 Files Affected:
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0459941fe05cdc..0dc513d8e65b76 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1819,6 +1819,10 @@ class TargetTransformInfo {
/// \return The maximum number of function arguments the target supports.
unsigned getMaxNumArgs() const;
+ /// \return For an array of given Size, return alignment boundary to
+ /// pad to. Default is no padding.
+ unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
+
/// @}
private:
@@ -2225,6 +2229,8 @@ class TargetTransformInfo::Concept {
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
virtual bool hasArmWideBranch(bool Thumb) const = 0;
virtual unsigned getMaxNumArgs() const = 0;
+ virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const = 0;
};
template <typename T>
@@ -3026,6 +3032,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
unsigned getMaxNumArgs() const override {
return Impl.getMaxNumArgs();
}
+
+ unsigned getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const override {
+ return Impl.getNumBytesToPadGlobalArray(Size, ArrayType);
+ }
};
template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index dbdfb4d8cdfa32..0b7792f89a05c4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1006,6 +1006,10 @@ class TargetTransformInfoImplBase {
unsigned getMaxNumArgs() const { return UINT_MAX; }
+ unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const {
+ return 0;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index a47462b61e03b2..60704733637614 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1383,6 +1383,12 @@ bool TargetTransformInfo::isVectorShiftByScalarCheap(Type *Ty) const {
return TTIImpl->isVectorShiftByScalarCheap(Ty);
}
+unsigned
+TargetTransformInfo::getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const {
+ return TTIImpl->getNumBytesToPadGlobalArray(Size, ArrayType);
+}
+
TargetTransformInfo::Concept::~Concept() = default;
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 835ae98efb852d..9f6e5e5ab1421c 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -56,6 +56,10 @@ static cl::opt<bool>
AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
cl::desc("Enable the generation of WLS loops"));
+static cl::opt<bool> UseWidenGlobalArrays(
+ "widen-global-strings", cl::Hidden, cl::init(true),
+ cl::desc("Enable the widening of global strings to alignment boundaries"));
+
extern cl::opt<TailPredication::Mode> EnableTailPredication;
extern cl::opt<bool> EnableMaskedGatherScatters;
@@ -2805,3 +2809,32 @@ bool ARMTTIImpl::isProfitableToSinkOperands(Instruction *I,
}
return true;
}
+
+unsigned ARMTTIImpl::getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const {
+ if (!UseWidenGlobalArrays) {
+ LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n");
+ return false;
+ }
+
+ // Don't modify none integer array types
+ if (!ArrayType || !ArrayType->isArrayTy() ||
+ !ArrayType->getArrayElementType()->isIntegerTy())
+ return 0;
+
+ // We pad to 4 byte boundaries
+ if (Size % 4 == 0)
+ return 0;
+
+ unsigned NumBytesToPad = 4 - (Size % 4);
+ unsigned NewSize = Size + NumBytesToPad;
+
+ // Max number of bytes that memcpy allows for lowering to load/stores before
+ // it uses library function (__aeabi_memcpy).
+ unsigned MaxMemIntrinsicSize = getMaxMemIntrinsicInlineSizeThreshold();
+
+ if (NewSize > MaxMemIntrinsicSize)
+ return 0;
+
+ return NumBytesToPad;
+}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index b0a75134ee02b7..3a4f940088b2e3 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -337,6 +337,9 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
bool isProfitableToSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const;
+
+ unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
+
/// @}
};
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index aae4926e027ff4..4647c65a5c850f 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -92,6 +92,8 @@ STATISTIC(NumInternalFunc, "Number of internal functions");
STATISTIC(NumColdCC, "Number of functions marked coldcc");
STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs");
STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed");
+STATISTIC(NumGlobalArraysPadded,
+ "Number of global arrays padded to alignment boundary");
static cl::opt<bool>
EnableColdCCStressTest("enable-coldcc-stress-test",
@@ -2029,6 +2031,165 @@ OptimizeFunctions(Module &M,
return Changed;
}
+static bool callInstIsMemcpy(CallInst *CI) {
+ if (!CI)
+ return false;
+
+ Function *F = CI->getCalledFunction();
+ if (!F || !F->isIntrinsic() || F->getIntrinsicID() != Intrinsic::memcpy)
+ return false;
+
+ return true;
+}
+
+static bool destArrayCanBeWidened(CallInst *CI) {
+ auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+
+ if (!Alloca || !IsVolatile || IsVolatile->isOne())
+ return false;
+
+ if (!Alloca->isStaticAlloca())
+ return false;
+
+ if (!Alloca->getAllocatedType()->isArrayTy())
+ return false;
+
+ return true;
+}
+
+static GlobalVariable *widenGlobalVariable(GlobalVariable *OldVar, Function *F,
+ unsigned NumBytesToPad,
+ unsigned NumBytesToCopy) {
+ if (!OldVar->hasInitializer())
+ return nullptr;
+
+ ConstantDataArray *DataArray =
+ dyn_cast<ConstantDataArray>(OldVar->getInitializer());
+ if (!DataArray)
+ return nullptr;
+
+ // Update to be word aligned (memcpy(...,X,...))
+ // create replacement with padded null bytes.
+ StringRef Data = DataArray->getRawDataValues();
+ std::vector<uint8_t> StrData(Data.begin(), Data.end());
+ for (unsigned int p = 0; p < NumBytesToPad; p++)
+ StrData.push_back('\0');
+ auto Arr = ArrayRef(StrData.data(), NumBytesToCopy + NumBytesToPad);
+ // Create new padded version of global variable.
+ Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr);
+ GlobalVariable *NewGV = new GlobalVariable(
+ *(F->getParent()), SourceReplace->getType(), true, OldVar->getLinkage(),
+ SourceReplace, SourceReplace->getName());
+ // Copy any other attributes from original global variable
+ // e.g. unamed_addr
+ NewGV->copyAttributesFrom(OldVar);
+ NewGV->takeName(OldVar);
+ return NewGV;
+}
+
+static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad,
+ const unsigned NumBytesToCopy,
+ ConstantDataArray *SourceDataArray) {
+
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+ if (Alloca) {
+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
+ unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
+ unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth);
+ // Update destination array to be word aligned (memcpy(X,...,...))
+ IRBuilder<> BuildAlloca(Alloca);
+ AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get(
+ Alloca->getAllocatedType()->getArrayElementType(), NumElementsToCopy));
+ NewAlloca->takeName(Alloca);
+ NewAlloca->setAlignment(Alloca->getAlign());
+ Alloca->replaceAllUsesWith(NewAlloca);
+ Alloca->eraseFromParent();
+ }
+}
+
+static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar,
+ const unsigned NumBytesToPad,
+ const unsigned NumBytesToCopy,
+ ConstantInt *BytesToCopyOp,
+ ConstantDataArray *SourceDataArray) {
+ auto *NewSourceGV =
+ widenGlobalVariable(SourceVar, F, NumBytesToPad, NumBytesToCopy);
+ if (!NewSourceGV)
+ return false;
+
+ // Update arguments of remaining uses that
+ // are memcpys.
+ for (auto *User : SourceVar->users()) {
+ auto *CI = dyn_cast<CallInst>(User);
+ if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI))
+ continue;
+
+ if (CI->getArgOperand(1) != SourceVar)
+ continue;
+
+ widenDestArray(CI, NumBytesToPad, NumBytesToCopy, SourceDataArray);
+
+ CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(),
+ NumBytesToCopy + NumBytesToPad));
+ }
+ SourceVar->replaceAllUsesWith(NewSourceGV);
+
+ NumGlobalArraysPadded++;
+ return true;
+}
+
+static bool tryWidenGlobalArraysUsedByMemcpy(
+ GlobalVariable *GV,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+
+ if (!GV->hasInitializer() || !GV->isConstant() || !GV->hasLocalLinkage() ||
+ !GV->hasGlobalUnnamedAddr())
+ return false;
+
+ for (auto *User : GV->users()) {
+ CallInst *CI = dyn_cast<CallInst>(User);
+ if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI))
+ continue;
+
+ Function *F = CI->getCalledFunction();
+
+ auto *BytesToCopyOp = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!BytesToCopyOp)
+ continue;
+
+ ConstantDataArray *SourceDataArray =
+ dyn_cast<ConstantDataArray>(GV->getInitializer());
+ if (!SourceDataArray)
+ continue;
+
+ unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue();
+
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+ uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
+ uint64_t SZSize = SourceDataArray->getType()->getNumElements();
+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
+ // Calculate the number of elements to copy while avoiding floored
+ // division of integers returning wrong values i.e. copying one byte
+ // from an array of i16 would yield 0 elements to copy as supposed to 1.
+ unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth);
+
+ // For safety purposes lets add a constraint and only pad when
+ // NumElementsToCopy == destination array size ==
+ // source which is a constant
+ if (NumElementsToCopy != DZSize || DZSize != SZSize)
+ continue;
+
+ unsigned NumBytesToPad = GetTTI(*F).getNumBytesToPadGlobalArray(
+ NumBytesToCopy, SourceDataArray->getType());
+ if (NumBytesToPad) {
+ return tryWidenGlobalArrayAndDests(F, GV, NumBytesToPad, NumBytesToCopy,
+ BytesToCopyOp, SourceDataArray);
+ }
+ }
+ return false;
+}
+
static bool
OptimizeGlobalVars(Module &M,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
@@ -2058,6 +2219,10 @@ OptimizeGlobalVars(Module &M,
continue;
}
+ // For global variable arrays called in a memcpy
+ // we try to pad to nearest valid alignment boundary
+ Changed |= tryWidenGlobalArraysUsedByMemcpy(&GV, GetTTI);
+
Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
}
return Changed;
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll
new file mode 100644
index 00000000000000..ab04e0a5bc697e
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define void @memcpy_struct() {
+; CHECK-LABEL: define void @memcpy_struct() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca { i8, i8, i8 }, align 1
+; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca {i8, i8, i8}, align 1
+ %call1 = call i32 @bar(ptr nonnull %something)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+ ret void
+}
+
+
+@.i8_multi = private unnamed_addr constant [2 x [3 x i8]] [[3 x i8] [i8 1, i8 2, i8 3], [3 x i8] [i8 4, i8 5, i8 6]] , align 1
+
+define void @memcpy_array_multidimensional() {
+; CHECK-LABEL: define void @memcpy_array_multidimensional() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [2 x [3 x i8]], align 1
+; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [2 x [3 x i8]], align 1
+ %call1 = call i32 @bar(ptr nonnull %something)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false)
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll
new file mode 100644
index 00000000000000..f435ffdeed2c8e
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+; CHECK: [3 x i8]
+@other = private unnamed_addr global [3 x i8] [i8 1, i8 2, i8 3] , align 1
+; CHECK: [4 x i8]
+@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define void @memcpy_multiple() {
+; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull @other)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [3 x i8], align 1
+ %call1 = call i32 @bar(ptr nonnull %something)
+ %call2 = call i32 @bar(ptr nonnull @other)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll
new file mode 100644
index 00000000000000..c7ca7271fd3d27
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+@.i16 = private unnamed_addr constant [5 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5] , align 1
+
+define void @memcpy_i16_array() {
+; CHECK-LABEL: define void @memcpy_i16_array() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [6 x i16], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false)
+; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [5 x i16], align 1
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 10, i1 false)
+ %call2 = call i32 @bar(ptr nonnull %something)
+ ret void
+}
+
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll
new file mode 100644
index 00000000000000..3d9c42fe1f3dd8
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+@.str = unnamed_addr global [3 x i8] c"12\00", align 1
+
+define void @foo() {
+; CHECK-LABEL: define void @foo() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false)
+; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [3 x i8], align 1
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false)
+ %call1 = call i32 @bar(ptr nonnull %something)
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll
new file mode 100644
index 00000000000000..e37925a78d2c3a
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define void @memcpy_multiple() {
+; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING2:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: [[SOMETHING3:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferencea...
[truncated]
|
@llvm/pr-subscribers-backend-arm Author: Nashe Mncube (nasherm) ChangesReverts llvm/llvm-project#112701 Also adds config file for tests so that they pass buildbot Patch is 34.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113288.diff 18 Files Affected:
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0459941fe05cdc..0dc513d8e65b76 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1819,6 +1819,10 @@ class TargetTransformInfo {
/// \return The maximum number of function arguments the target supports.
unsigned getMaxNumArgs() const;
+ /// \return For an array of given Size, return alignment boundary to
+ /// pad to. Default is no padding.
+ unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
+
/// @}
private:
@@ -2225,6 +2229,8 @@ class TargetTransformInfo::Concept {
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
virtual bool hasArmWideBranch(bool Thumb) const = 0;
virtual unsigned getMaxNumArgs() const = 0;
+ virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const = 0;
};
template <typename T>
@@ -3026,6 +3032,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
unsigned getMaxNumArgs() const override {
return Impl.getMaxNumArgs();
}
+
+ unsigned getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const override {
+ return Impl.getNumBytesToPadGlobalArray(Size, ArrayType);
+ }
};
template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index dbdfb4d8cdfa32..0b7792f89a05c4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1006,6 +1006,10 @@ class TargetTransformInfoImplBase {
unsigned getMaxNumArgs() const { return UINT_MAX; }
+ unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const {
+ return 0;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index a47462b61e03b2..60704733637614 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1383,6 +1383,12 @@ bool TargetTransformInfo::isVectorShiftByScalarCheap(Type *Ty) const {
return TTIImpl->isVectorShiftByScalarCheap(Ty);
}
+unsigned
+TargetTransformInfo::getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const {
+ return TTIImpl->getNumBytesToPadGlobalArray(Size, ArrayType);
+}
+
TargetTransformInfo::Concept::~Concept() = default;
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 835ae98efb852d..9f6e5e5ab1421c 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -56,6 +56,10 @@ static cl::opt<bool>
AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
cl::desc("Enable the generation of WLS loops"));
+static cl::opt<bool> UseWidenGlobalArrays(
+ "widen-global-strings", cl::Hidden, cl::init(true),
+ cl::desc("Enable the widening of global strings to alignment boundaries"));
+
extern cl::opt<TailPredication::Mode> EnableTailPredication;
extern cl::opt<bool> EnableMaskedGatherScatters;
@@ -2805,3 +2809,32 @@ bool ARMTTIImpl::isProfitableToSinkOperands(Instruction *I,
}
return true;
}
+
+unsigned ARMTTIImpl::getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const {
+ if (!UseWidenGlobalArrays) {
+ LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n");
+ return false;
+ }
+
+ // Don't modify none integer array types
+ if (!ArrayType || !ArrayType->isArrayTy() ||
+ !ArrayType->getArrayElementType()->isIntegerTy())
+ return 0;
+
+ // We pad to 4 byte boundaries
+ if (Size % 4 == 0)
+ return 0;
+
+ unsigned NumBytesToPad = 4 - (Size % 4);
+ unsigned NewSize = Size + NumBytesToPad;
+
+ // Max number of bytes that memcpy allows for lowering to load/stores before
+ // it uses library function (__aeabi_memcpy).
+ unsigned MaxMemIntrinsicSize = getMaxMemIntrinsicInlineSizeThreshold();
+
+ if (NewSize > MaxMemIntrinsicSize)
+ return 0;
+
+ return NumBytesToPad;
+}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index b0a75134ee02b7..3a4f940088b2e3 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -337,6 +337,9 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
bool isProfitableToSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const;
+
+ unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
+
/// @}
};
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index aae4926e027ff4..4647c65a5c850f 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -92,6 +92,8 @@ STATISTIC(NumInternalFunc, "Number of internal functions");
STATISTIC(NumColdCC, "Number of functions marked coldcc");
STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs");
STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed");
+STATISTIC(NumGlobalArraysPadded,
+ "Number of global arrays padded to alignment boundary");
static cl::opt<bool>
EnableColdCCStressTest("enable-coldcc-stress-test",
@@ -2029,6 +2031,165 @@ OptimizeFunctions(Module &M,
return Changed;
}
+static bool callInstIsMemcpy(CallInst *CI) {
+ if (!CI)
+ return false;
+
+ Function *F = CI->getCalledFunction();
+ if (!F || !F->isIntrinsic() || F->getIntrinsicID() != Intrinsic::memcpy)
+ return false;
+
+ return true;
+}
+
+static bool destArrayCanBeWidened(CallInst *CI) {
+ auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+
+ if (!Alloca || !IsVolatile || IsVolatile->isOne())
+ return false;
+
+ if (!Alloca->isStaticAlloca())
+ return false;
+
+ if (!Alloca->getAllocatedType()->isArrayTy())
+ return false;
+
+ return true;
+}
+
+static GlobalVariable *widenGlobalVariable(GlobalVariable *OldVar, Function *F,
+ unsigned NumBytesToPad,
+ unsigned NumBytesToCopy) {
+ if (!OldVar->hasInitializer())
+ return nullptr;
+
+ ConstantDataArray *DataArray =
+ dyn_cast<ConstantDataArray>(OldVar->getInitializer());
+ if (!DataArray)
+ return nullptr;
+
+ // Update to be word aligned (memcpy(...,X,...))
+ // create replacement with padded null bytes.
+ StringRef Data = DataArray->getRawDataValues();
+ std::vector<uint8_t> StrData(Data.begin(), Data.end());
+ for (unsigned int p = 0; p < NumBytesToPad; p++)
+ StrData.push_back('\0');
+ auto Arr = ArrayRef(StrData.data(), NumBytesToCopy + NumBytesToPad);
+ // Create new padded version of global variable.
+ Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr);
+ GlobalVariable *NewGV = new GlobalVariable(
+ *(F->getParent()), SourceReplace->getType(), true, OldVar->getLinkage(),
+ SourceReplace, SourceReplace->getName());
+ // Copy any other attributes from original global variable
+ // e.g. unamed_addr
+ NewGV->copyAttributesFrom(OldVar);
+ NewGV->takeName(OldVar);
+ return NewGV;
+}
+
+static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad,
+ const unsigned NumBytesToCopy,
+ ConstantDataArray *SourceDataArray) {
+
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+ if (Alloca) {
+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
+ unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
+ unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth);
+ // Update destination array to be word aligned (memcpy(X,...,...))
+ IRBuilder<> BuildAlloca(Alloca);
+ AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get(
+ Alloca->getAllocatedType()->getArrayElementType(), NumElementsToCopy));
+ NewAlloca->takeName(Alloca);
+ NewAlloca->setAlignment(Alloca->getAlign());
+ Alloca->replaceAllUsesWith(NewAlloca);
+ Alloca->eraseFromParent();
+ }
+}
+
+static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar,
+ const unsigned NumBytesToPad,
+ const unsigned NumBytesToCopy,
+ ConstantInt *BytesToCopyOp,
+ ConstantDataArray *SourceDataArray) {
+ auto *NewSourceGV =
+ widenGlobalVariable(SourceVar, F, NumBytesToPad, NumBytesToCopy);
+ if (!NewSourceGV)
+ return false;
+
+ // Update arguments of remaining uses that
+ // are memcpys.
+ for (auto *User : SourceVar->users()) {
+ auto *CI = dyn_cast<CallInst>(User);
+ if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI))
+ continue;
+
+ if (CI->getArgOperand(1) != SourceVar)
+ continue;
+
+ widenDestArray(CI, NumBytesToPad, NumBytesToCopy, SourceDataArray);
+
+ CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(),
+ NumBytesToCopy + NumBytesToPad));
+ }
+ SourceVar->replaceAllUsesWith(NewSourceGV);
+
+ NumGlobalArraysPadded++;
+ return true;
+}
+
+static bool tryWidenGlobalArraysUsedByMemcpy(
+ GlobalVariable *GV,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+
+ if (!GV->hasInitializer() || !GV->isConstant() || !GV->hasLocalLinkage() ||
+ !GV->hasGlobalUnnamedAddr())
+ return false;
+
+ for (auto *User : GV->users()) {
+ CallInst *CI = dyn_cast<CallInst>(User);
+ if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI))
+ continue;
+
+ Function *F = CI->getCalledFunction();
+
+ auto *BytesToCopyOp = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!BytesToCopyOp)
+ continue;
+
+ ConstantDataArray *SourceDataArray =
+ dyn_cast<ConstantDataArray>(GV->getInitializer());
+ if (!SourceDataArray)
+ continue;
+
+ unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue();
+
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+ uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
+ uint64_t SZSize = SourceDataArray->getType()->getNumElements();
+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
+ // Calculate the number of elements to copy while avoiding floored
+ // division of integers returning wrong values i.e. copying one byte
+ // from an array of i16 would yield 0 elements to copy as supposed to 1.
+ unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth);
+
+ // For safety purposes lets add a constraint and only pad when
+ // NumElementsToCopy == destination array size ==
+ // source which is a constant
+ if (NumElementsToCopy != DZSize || DZSize != SZSize)
+ continue;
+
+ unsigned NumBytesToPad = GetTTI(*F).getNumBytesToPadGlobalArray(
+ NumBytesToCopy, SourceDataArray->getType());
+ if (NumBytesToPad) {
+ return tryWidenGlobalArrayAndDests(F, GV, NumBytesToPad, NumBytesToCopy,
+ BytesToCopyOp, SourceDataArray);
+ }
+ }
+ return false;
+}
+
static bool
OptimizeGlobalVars(Module &M,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
@@ -2058,6 +2219,10 @@ OptimizeGlobalVars(Module &M,
continue;
}
+ // For global variable arrays called in a memcpy
+ // we try to pad to nearest valid alignment boundary
+ Changed |= tryWidenGlobalArraysUsedByMemcpy(&GV, GetTTI);
+
Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
}
return Changed;
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll
new file mode 100644
index 00000000000000..ab04e0a5bc697e
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define void @memcpy_struct() {
+; CHECK-LABEL: define void @memcpy_struct() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca { i8, i8, i8 }, align 1
+; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca {i8, i8, i8}, align 1
+ %call1 = call i32 @bar(ptr nonnull %something)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+ ret void
+}
+
+
+@.i8_multi = private unnamed_addr constant [2 x [3 x i8]] [[3 x i8] [i8 1, i8 2, i8 3], [3 x i8] [i8 4, i8 5, i8 6]] , align 1
+
+define void @memcpy_array_multidimensional() {
+; CHECK-LABEL: define void @memcpy_array_multidimensional() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [2 x [3 x i8]], align 1
+; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [2 x [3 x i8]], align 1
+ %call1 = call i32 @bar(ptr nonnull %something)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false)
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll
new file mode 100644
index 00000000000000..f435ffdeed2c8e
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+; CHECK: [3 x i8]
+@other = private unnamed_addr global [3 x i8] [i8 1, i8 2, i8 3] , align 1
+; CHECK: [4 x i8]
+@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define void @memcpy_multiple() {
+; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull @other)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [3 x i8], align 1
+ %call1 = call i32 @bar(ptr nonnull %something)
+ %call2 = call i32 @bar(ptr nonnull @other)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll
new file mode 100644
index 00000000000000..c7ca7271fd3d27
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+@.i16 = private unnamed_addr constant [5 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5] , align 1
+
+define void @memcpy_i16_array() {
+; CHECK-LABEL: define void @memcpy_i16_array() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [6 x i16], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false)
+; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [5 x i16], align 1
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 10, i1 false)
+ %call2 = call i32 @bar(ptr nonnull %something)
+ ret void
+}
+
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll
new file mode 100644
index 00000000000000..3d9c42fe1f3dd8
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+@.str = unnamed_addr global [3 x i8] c"12\00", align 1
+
+define void @foo() {
+; CHECK-LABEL: define void @foo() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false)
+; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [3 x i8], align 1
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false)
+ %call1 = call i32 @bar(ptr nonnull %something)
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll
new file mode 100644
index 00000000000000..e37925a78d2c3a
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define void @memcpy_multiple() {
+; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING2:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: [[SOMETHING3:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferencea...
[truncated]
|
Reverts #112701
Also adds config file for tests so that they pass buildbot