Skip to content

Commit 41aec6f

Browse files
committed
Review comments: eliminating generation of multiple globals
1 parent 21ca2ba commit 41aec6f

10 files changed

+65
-61
lines changed

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2031,11 +2031,10 @@ OptimizeFunctions(Module &M,
20312031
return Changed;
20322032
}
20332033

2034-
static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar,
2035-
unsigned NumBytesToPad, unsigned NumBytesToCopy,
2036-
ConstantInt *BytesToCopyOp,
2037-
ConstantDataArray *SourceDataArray) {
2038-
auto *F = CI->getCalledFunction();
2034+
static bool tryWidenDestArray(Function *F, CallInst *CI,
2035+
GlobalVariable *SourceVar, unsigned NumBytesToPad,
2036+
unsigned NumBytesToCopy,
2037+
ConstantDataArray *SourceDataArray) {
20392038
auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
20402039
auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
20412040

@@ -2055,8 +2054,7 @@ static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar,
20552054
// Calculate the number of elements to copy while avoiding floored
20562055
// division of integers returning wrong values i.e. copying one byte
20572056
// from an array of i16 would yield 0 elements to copy as supposed to 1.
2058-
unsigned NumElementsToCopy =
2059-
(NumBytesToCopy + ElementByteWidth - 1) / ElementByteWidth;
2057+
unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth);
20602058

20612059
// For safety purposes lets add a constraint and only pad when
20622060
// NumElementsToCopy == destination array size ==
@@ -2065,7 +2063,7 @@ static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar,
20652063
return false;
20662064

20672065
unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
2068-
NumElementsToCopy = (TotalBytes + ElementByteWidth - 1) / ElementByteWidth;
2066+
NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth);
20692067

20702068
// Update destination array to be word aligned (memcpy(X,...,...))
20712069
IRBuilder<> BuildAlloca(Alloca);
@@ -2075,14 +2073,21 @@ static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar,
20752073
NewAlloca->setAlignment(Alloca->getAlign());
20762074
Alloca->replaceAllUsesWith(NewAlloca);
20772075
Alloca->eraseFromParent();
2076+
return true;
2077+
}
20782078

2079+
static bool widenGlobalArray(Function *F, CallInst *CI,
2080+
GlobalVariable *SourceVar, unsigned NumBytesToPad,
2081+
unsigned NumBytesToCopy,
2082+
ConstantInt *BytesToCopyOp,
2083+
ConstantDataArray *SourceDataArray) {
20792084
// Update source to be word aligned (memcpy(...,X,...))
20802085
// create replacement with padded null bytes.
20812086
StringRef Data = SourceDataArray->getRawDataValues();
20822087
std::vector<uint8_t> StrData(Data.begin(), Data.end());
20832088
for (unsigned int p = 0; p < NumBytesToPad; p++)
20842089
StrData.push_back('\0');
2085-
auto Arr = ArrayRef(StrData.data(), TotalBytes);
2090+
auto Arr = ArrayRef(StrData.data(), NumBytesToCopy + NumBytesToPad);
20862091

20872092
// Create new padded version of global variable.
20882093
Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr);
@@ -2095,11 +2100,8 @@ static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar,
20952100
NewGV->copyAttributesFrom(SourceVar);
20962101
NewGV->takeName(SourceVar);
20972102

2098-
// Replace intrinsic source.
2099-
CI->setArgOperand(1, NewGV);
2100-
2101-
// Update number of bytes to copy (memcpy(...,...,X))
2102-
CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(), TotalBytes));
2103+
CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(),
2104+
NumBytesToCopy + NumBytesToPad));
21032105
NumGlobalArraysPadded++;
21042106
return true;
21052107
}
@@ -2133,9 +2135,14 @@ static bool tryWidenGlobalArraysUsedByMemcpy(
21332135
unsigned NumBytesToPad = TTI.getNumBytesToPadGlobalArray(
21342136
NumBytesToCopy, SourceDataArray->getType());
21352137

2136-
if (NumBytesToPad)
2137-
return tryWidenGlobalArray(CI, GV, NumBytesToPad, NumBytesToCopy,
2138-
BytesToCopyOp, SourceDataArray);
2138+
if (NumBytesToPad) {
2139+
bool DestWidened = tryWidenDestArray(F, CI, GV, NumBytesToPad,
2140+
NumBytesToCopy, SourceDataArray);
2141+
if (DestWidened) {
2142+
return widenGlobalArray(F, CI, GV, NumBytesToPad, NumBytesToCopy,
2143+
BytesToCopyOp, SourceDataArray);
2144+
}
2145+
}
21392146
}
21402147
return false;
21412148
}
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
2+
; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
33

44
@.i16 = private unnamed_addr constant [5 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5] , align 1
55

6-
define hidden void @memcpy_i16_array() local_unnamed_addr {
7-
; CHECK-LABEL: define hidden void @memcpy_i16_array() local_unnamed_addr {
6+
define void @memcpy_i16_array() {
7+
; CHECK-LABEL: define void @memcpy_i16_array() local_unnamed_addr {
88
; CHECK-NEXT: [[ENTRY:.*:]]
99
; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [6 x i16], align 1
10-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false)
10+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @[[GLOB0:[0-9]+]], i32 12, i1 false)
1111
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]])
1212
; CHECK-NEXT: ret void
1313
;
@@ -19,4 +19,4 @@ entry:
1919
}
2020

2121

22-
declare i32 @bar(...) local_unnamed_addr
22+
declare i32 @bar(...)

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
2+
; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
33

44
@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
55

6-
define hidden void @memcpy_multiple() local_unnamed_addr {
7-
; CHECK-LABEL: define hidden void @memcpy_multiple() local_unnamed_addr {
6+
define void @memcpy_multiple() {
7+
; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr {
88
; CHECK-NEXT: [[ENTRY:.*:]]
99
; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1
1010
; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1
1111
; CHECK-NEXT: [[SOMETHING2:%.*]] = alloca [4 x i8], align 1
12-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB1:[0-9]+]], i32 4, i1 false)
13-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0:[0-9]+]], i32 4, i1 false)
14-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
12+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0:[0-9]+]], i32 4, i1 false)
13+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0]], i32 4, i1 false)
14+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0]], i32 4, i1 false)
1515
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
1616
; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]])
1717
; CHECK-NEXT: [[CALL4:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING2]])
@@ -30,4 +30,4 @@ entry:
3030
ret void
3131
}
3232

33-
declare i32 @bar(...) local_unnamed_addr
33+
declare i32 @bar(...)
Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
2+
; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
33

4-
; CHECK: [12 x i8]
54
@.str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1
65

7-
define hidden void @foo() local_unnamed_addr {
8-
; CHECK-LABEL: define hidden void @foo() local_unnamed_addr {
6+
define void @foo() {
7+
; CHECK-LABEL: define void @foo() local_unnamed_addr {
98
; CHECK-NEXT: [[ENTRY:.*:]]
109
; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [12 x i8], align 1
11-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 12, i1 false)
10+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @[[GLOB0:[0-9]+]], i32 12, i1 false)
1211
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
1312
; CHECK-NEXT: ret void
1413
;
@@ -19,4 +18,4 @@ entry:
1918
ret void
2019
}
2120

22-
declare i32 @bar(...) local_unnamed_addr
21+
declare i32 @bar(...)
Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
2+
; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
33

4-
; CHECK: [64 x i8]
54
@.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
65

7-
define hidden void @foo() local_unnamed_addr {
8-
; CHECK-LABEL: define hidden void @foo() local_unnamed_addr {
6+
define void @foo() {
7+
; CHECK-LABEL: define void @foo() local_unnamed_addr {
98
; CHECK-NEXT: [[ENTRY:.*:]]
109
; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [64 x i8], align 1
11-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(62) @.str, i32 64, i1 false)
10+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(62) @[[GLOB0:[0-9]+]], i32 64, i1 false)
1211
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
1312
; CHECK-NEXT: ret void
1413
;
@@ -19,4 +18,4 @@ entry:
1918
ret void
2019
}
2120

22-
declare i32 @bar(...) local_unnamed_addr
21+
declare i32 @bar(...)

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
2+
; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
33
; CHECK: [17 x i8]
44
@.str = private unnamed_addr constant [17 x i8] c"aaaaaaaaaaaaaaaa\00", align 1
55

66
; Function Attrs: nounwind
7-
define hidden void @foo() local_unnamed_addr #0 {
8-
; CHECK-LABEL: define hidden void @foo() local_unnamed_addr {
7+
define void @foo() {
8+
; CHECK-LABEL: define void @foo() local_unnamed_addr {
99
; CHECK-NEXT: [[ENTRY:.*:]]
1010
; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [20 x i8], align 1
1111
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr nonnull [[SOMETHING]])
@@ -23,4 +23,4 @@ entry:
2323
ret void
2424
}
2525

26-
declare i32 @bar(...) local_unnamed_addr #2
26+
declare i32 @bar(...) #2

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
2+
; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
33

44
; CHECK: [65 x i8]
55
; CHECK-NOT: [68 x i8]
66
@.str = private unnamed_addr constant [65 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzz\00", align 1
77

88
; Function Attrs: nounwind
9-
define hidden void @foo() local_unnamed_addr #0 {
10-
; CHECK-LABEL: define hidden void @foo() local_unnamed_addr {
9+
define void @foo() {
10+
; CHECK-LABEL: define void @foo() local_unnamed_addr {
1111
; CHECK-NEXT: [[ENTRY:.*:]]
1212
; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [65 x i8], align 1
1313
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 65, ptr nonnull [[SOMETHING]])
@@ -25,4 +25,4 @@ entry:
2525
ret void
2626
}
2727

28-
declare i32 @bar(...) local_unnamed_addr #2
28+
declare i32 @bar(...) #2

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
2+
; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
33

4-
; CHECK: [48 x i8]
54
@f.string1 = private unnamed_addr constant [45 x i8] c"The quick brown dog jumps over the lazy fox.\00", align 1
65

76
; Function Attrs: nounwind
8-
define hidden i32 @f() {
9-
; CHECK-LABEL: define hidden i32 @f() local_unnamed_addr {
7+
define i32 @f() {
8+
; CHECK-LABEL: define i32 @f() local_unnamed_addr {
109
; CHECK-NEXT: [[ENTRY:.*:]]
1110
; CHECK-NEXT: [[STRING1:%.*]] = alloca [48 x i8], align 1
1211
; CHECK-NEXT: [[POS:%.*]] = alloca i32, align 4
1312
; CHECK-NEXT: [[TOKEN:%.*]] = alloca ptr, align 4
1413
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 45, ptr [[STRING1]])
15-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STRING1]], ptr align 1 @f.string1, i32 48, i1 false)
14+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STRING1]], ptr align 1 @[[GLOB0:[0-9]+]], i32 48, i1 false)
1615
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[POS]])
1716
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TOKEN]])
1817
; CHECK-NEXT: [[CALL:%.*]] = call ptr @strchr(ptr [[STRING1]], i32 101)

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
2+
; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
33
%struct.P = type { i32, [13 x i8] }
44

55
; CHECK-NOT: [16 x i8]
66
@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
77

88
; Function Attrs: nounwind
9-
define hidden i32 @main() local_unnamed_addr #0 {
10-
; CHECK-LABEL: define hidden i32 @main() local_unnamed_addr {
9+
define i32 @main() {
10+
; CHECK-LABEL: define i32 @main() local_unnamed_addr {
1111
; CHECK-NEXT: [[ENTRY:.*:]]
1212
; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_P:%.*]], align 4
1313
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr nonnull [[P]])

llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
2+
; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
33

44
; CHECK-NOT: [64 x i8]
55
@.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
66

77
; Function Attrs: nounwind
8-
define hidden void @foo() local_unnamed_addr #0 {
9-
; CHECK-LABEL: define hidden void @foo() local_unnamed_addr {
8+
define void @foo() {
9+
; CHECK-LABEL: define void @foo() local_unnamed_addr {
1010
; CHECK-NEXT: [[ENTRY:.*:]]
1111
; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [62 x i8], align 1
1212
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [62 x i8], ptr [[SOMETHING]], i32 0, i32 0
@@ -26,4 +26,4 @@ entry:
2626
ret void
2727
}
2828

29-
declare i32 @bar(...) local_unnamed_addr #2
29+
declare i32 @bar(...) #2

0 commit comments

Comments
 (0)