Skip to content

Commit c17ff0d

Browse files
committed
Correcting and refactoring elimination
1 parent 41aec6f commit c17ff0d

File tree

6 files changed

+78
-57
lines changed

6 files changed

+78
-57
lines changed

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 67 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2031,56 +2031,56 @@ OptimizeFunctions(Module &M,
20312031
return Changed;
20322032
}
20332033

2034-
static bool tryWidenDestArray(Function *F, CallInst *CI,
2035-
GlobalVariable *SourceVar, unsigned NumBytesToPad,
2036-
unsigned NumBytesToCopy,
2037-
ConstantDataArray *SourceDataArray) {
2034+
static bool callInstIsMemcpy(CallInst *CI) {
2035+
if (!CI)
2036+
return false;
2037+
2038+
Function *F = CI->getCalledFunction();
2039+
if (!F || !F->isIntrinsic() || F->getIntrinsicID() != Intrinsic::memcpy)
2040+
return false;
2041+
2042+
return true;
2043+
}
2044+
2045+
static bool destArrayCanBeWidened(CallInst *CI) {
20382046
auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
20392047
auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
20402048

20412049
if (!Alloca || !IsVolatile || IsVolatile->isOne())
20422050
return false;
20432051

2044-
if (!SourceVar->hasInitializer() || !SourceVar->isConstant() ||
2045-
!SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr())
2046-
return false;
2047-
20482052
if (!Alloca->isStaticAlloca())
20492053
return false;
20502054

2051-
uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
2052-
uint64_t SZSize = SourceDataArray->getType()->getNumElements();
2053-
unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
2054-
// Calculate the number of elements to copy while avoiding floored
2055-
// division of integers returning wrong values i.e. copying one byte
2056-
// from an array of i16 would yield 0 elements to copy as supposed to 1.
2057-
unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth);
2058-
2059-
// For safety purposes lets add a constraint and only pad when
2060-
// NumElementsToCopy == destination array size ==
2061-
// source string which is a constant
2062-
if (NumElementsToCopy != DZSize || DZSize != SZSize)
2063-
return false;
2055+
return true;
2056+
}
20642057

2058+
static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad,
2059+
const unsigned NumBytesToCopy,
2060+
ConstantDataArray *SourceDataArray) {
2061+
unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
20652062
unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
2066-
NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth);
2067-
2063+
unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth);
20682064
// Update destination array to be word aligned (memcpy(X,...,...))
2065+
auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
20692066
IRBuilder<> BuildAlloca(Alloca);
20702067
AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get(
20712068
Alloca->getAllocatedType()->getArrayElementType(), NumElementsToCopy));
20722069
NewAlloca->takeName(Alloca);
20732070
NewAlloca->setAlignment(Alloca->getAlign());
20742071
Alloca->replaceAllUsesWith(NewAlloca);
20752072
Alloca->eraseFromParent();
2076-
return true;
20772073
}
20782074

2079-
static bool widenGlobalArray(Function *F, CallInst *CI,
2080-
GlobalVariable *SourceVar, unsigned NumBytesToPad,
2081-
unsigned NumBytesToCopy,
2082-
ConstantInt *BytesToCopyOp,
2083-
ConstantDataArray *SourceDataArray) {
2075+
static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar,
2076+
const unsigned NumBytesToPad,
2077+
const unsigned NumBytesToCopy,
2078+
ConstantInt *BytesToCopyOp,
2079+
ConstantDataArray *SourceDataArray) {
2080+
if (!SourceVar->hasInitializer() || !SourceVar->isConstant() ||
2081+
!SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr())
2082+
return false;
2083+
20842084
// Update source to be word aligned (memcpy(...,X,...))
20852085
// create replacement with padded null bytes.
20862086
StringRef Data = SourceDataArray->getRawDataValues();
@@ -2100,48 +2100,69 @@ static bool widenGlobalArray(Function *F, CallInst *CI,
21002100
NewGV->copyAttributesFrom(SourceVar);
21012101
NewGV->takeName(SourceVar);
21022102

2103-
CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(),
2104-
NumBytesToCopy + NumBytesToPad));
2103+
// Update arguments of remaining uses that
2104+
// are memcpys.
2105+
for (auto *User : SourceVar->users()) {
2106+
auto *CI = dyn_cast<CallInst>(User);
2107+
if (!callInstIsMemcpy(CI))
2108+
continue;
2109+
2110+
widenDestArray(CI, NumBytesToPad, NumBytesToCopy, SourceDataArray);
2111+
2112+
CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(),
2113+
NumBytesToCopy + NumBytesToPad));
2114+
}
2115+
SourceVar->replaceAllUsesWith(NewGV);
2116+
21052117
NumGlobalArraysPadded++;
21062118
return true;
21072119
}
21082120

21092121
static bool tryWidenGlobalArraysUsedByMemcpy(
21102122
GlobalVariable *GV,
21112123
function_ref<TargetTransformInfo &(Function &)> GetTTI) {
2124+
2125+
if (!GV->hasInitializer())
2126+
return false;
2127+
21122128
for (auto *User : GV->users()) {
21132129
CallInst *CI = dyn_cast<CallInst>(User);
2114-
if (!CI)
2130+
if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI))
21152131
continue;
21162132

21172133
Function *F = CI->getCalledFunction();
2118-
if (!F || !F->isIntrinsic() || F->getIntrinsicID() != Intrinsic::memcpy)
2119-
continue;
21202134

2121-
TargetTransformInfo &TTI = GetTTI(*F);
21222135
auto *BytesToCopyOp = dyn_cast<ConstantInt>(CI->getArgOperand(2));
21232136
if (!BytesToCopyOp)
21242137
continue;
21252138

2126-
if (!GV->hasInitializer())
2127-
continue;
2128-
21292139
ConstantDataArray *SourceDataArray =
21302140
dyn_cast<ConstantDataArray>(GV->getInitializer());
21312141
if (!SourceDataArray)
21322142
continue;
21332143

21342144
unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue();
2135-
unsigned NumBytesToPad = TTI.getNumBytesToPadGlobalArray(
2136-
NumBytesToCopy, SourceDataArray->getType());
21372145

2146+
auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
2147+
uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
2148+
uint64_t SZSize = SourceDataArray->getType()->getNumElements();
2149+
unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
2150+
// Calculate the number of elements to copy while avoiding floored
2151+
// division of integers returning wrong values i.e. copying one byte
2152+
// from an array of i16 would yield 0 elements to copy as supposed to 1.
2153+
unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth);
2154+
2155+
// For safety purposes lets add a constraint and only pad when
2156+
// NumElementsToCopy == destination array size ==
2157+
// source which is a constant
2158+
if (NumElementsToCopy != DZSize || DZSize != SZSize)
2159+
continue;
2160+
2161+
unsigned NumBytesToPad = GetTTI(*F).getNumBytesToPadGlobalArray(
2162+
NumBytesToCopy, SourceDataArray->getType());
21382163
if (NumBytesToPad) {
2139-
bool DestWidened = tryWidenDestArray(F, CI, GV, NumBytesToPad,
2140-
NumBytesToCopy, SourceDataArray);
2141-
if (DestWidened) {
2142-
return widenGlobalArray(F, CI, GV, NumBytesToPad, NumBytesToCopy,
2143-
BytesToCopyOp, SourceDataArray);
2144-
}
2164+
return tryWidenGlobalArrayAndDests(F, GV, NumBytesToPad, NumBytesToCopy,
2165+
BytesToCopyOp, SourceDataArray);
21452166
}
21462167
}
21472168
return false;

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define void @memcpy_i16_array() {
77
; CHECK-LABEL: define void @memcpy_i16_array() local_unnamed_addr {
88
; CHECK-NEXT: [[ENTRY:.*:]]
99
; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [6 x i16], align 1
10-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @[[GLOB0:[0-9]+]], i32 12, i1 false)
10+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false)
1111
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]])
1212
; CHECK-NEXT: ret void
1313
;

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@
66
define void @memcpy_multiple() {
77
; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr {
88
; CHECK-NEXT: [[ENTRY:.*:]]
9-
; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1
10-
; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1
119
; CHECK-NEXT: [[SOMETHING2:%.*]] = alloca [4 x i8], align 1
12-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0:[0-9]+]], i32 4, i1 false)
13-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0]], i32 4, i1 false)
14-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0]], i32 4, i1 false)
15-
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
10+
; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1
11+
; CHECK-NEXT: [[SOMETHING3:%.*]] = alloca [4 x i8], align 1
12+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
13+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
14+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING3]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
15+
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING2]])
1616
; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]])
17-
; CHECK-NEXT: [[CALL4:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING2]])
17+
; CHECK-NEXT: [[CALL4:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING3]])
1818
; CHECK-NEXT: ret void
1919
;
2020
entry:

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define void @foo() {
77
; CHECK-LABEL: define void @foo() local_unnamed_addr {
88
; CHECK-NEXT: [[ENTRY:.*:]]
99
; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [12 x i8], align 1
10-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @[[GLOB0:[0-9]+]], i32 12, i1 false)
10+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 12, i1 false)
1111
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
1212
; CHECK-NEXT: ret void
1313
;

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define void @foo() {
77
; CHECK-LABEL: define void @foo() local_unnamed_addr {
88
; CHECK-NEXT: [[ENTRY:.*:]]
99
; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [64 x i8], align 1
10-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(62) @[[GLOB0:[0-9]+]], i32 64, i1 false)
10+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(62) @.str, i32 64, i1 false)
1111
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
1212
; CHECK-NEXT: ret void
1313
;

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define i32 @f() {
1111
; CHECK-NEXT: [[POS:%.*]] = alloca i32, align 4
1212
; CHECK-NEXT: [[TOKEN:%.*]] = alloca ptr, align 4
1313
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 45, ptr [[STRING1]])
14-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STRING1]], ptr align 1 @[[GLOB0:[0-9]+]], i32 48, i1 false)
14+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STRING1]], ptr align 1 @f.string1, i32 48, i1 false)
1515
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[POS]])
1616
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TOKEN]])
1717
; CHECK-NEXT: [[CALL:%.*]] = call ptr @strchr(ptr [[STRING1]], i32 101)

0 commit comments

Comments
 (0)