@@ -2031,56 +2031,56 @@ OptimizeFunctions(Module &M,
2031
2031
return Changed;
2032
2032
}
2033
2033
2034
- static bool tryWidenDestArray (Function *F, CallInst *CI,
2035
- GlobalVariable *SourceVar, unsigned NumBytesToPad,
2036
- unsigned NumBytesToCopy,
2037
- ConstantDataArray *SourceDataArray) {
2034
+ static bool callInstIsMemcpy (CallInst *CI) {
2035
+ if (!CI)
2036
+ return false ;
2037
+
2038
+ Function *F = CI->getCalledFunction ();
2039
+ if (!F || !F->isIntrinsic () || F->getIntrinsicID () != Intrinsic::memcpy)
2040
+ return false ;
2041
+
2042
+ return true ;
2043
+ }
2044
+
2045
+ static bool destArrayCanBeWidened (CallInst *CI) {
2038
2046
auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand (0 ));
2039
2047
auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand (3 ));
2040
2048
2041
2049
if (!Alloca || !IsVolatile || IsVolatile->isOne ())
2042
2050
return false ;
2043
2051
2044
- if (!SourceVar->hasInitializer () || !SourceVar->isConstant () ||
2045
- !SourceVar->hasLocalLinkage () || !SourceVar->hasGlobalUnnamedAddr ())
2046
- return false ;
2047
-
2048
2052
if (!Alloca->isStaticAlloca ())
2049
2053
return false ;
2050
2054
2051
- uint64_t DZSize = Alloca->getAllocatedType ()->getArrayNumElements ();
2052
- uint64_t SZSize = SourceDataArray->getType ()->getNumElements ();
2053
- unsigned ElementByteWidth = SourceDataArray->getElementByteSize ();
2054
- // Calculate the number of elements to copy while avoiding floored
2055
- // division of integers returning wrong values i.e. copying one byte
2056
- // from an array of i16 would yield 0 elements to copy as supposed to 1.
2057
- unsigned NumElementsToCopy = divideCeil (NumBytesToCopy, ElementByteWidth);
2058
-
2059
- // For safety purposes lets add a constraint and only pad when
2060
- // NumElementsToCopy == destination array size ==
2061
- // source string which is a constant
2062
- if (NumElementsToCopy != DZSize || DZSize != SZSize)
2063
- return false ;
2055
+ return true ;
2056
+ }
2064
2057
2058
+ static void widenDestArray (CallInst *CI, const unsigned NumBytesToPad,
2059
+ const unsigned NumBytesToCopy,
2060
+ ConstantDataArray *SourceDataArray) {
2061
+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize ();
2065
2062
unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
2066
- NumElementsToCopy = divideCeil (TotalBytes, ElementByteWidth);
2067
-
2063
+ unsigned NumElementsToCopy = divideCeil (TotalBytes, ElementByteWidth);
2068
2064
// Update destination array to be word aligned (memcpy(X,...,...))
2065
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand (0 ));
2069
2066
IRBuilder<> BuildAlloca (Alloca);
2070
2067
AllocaInst *NewAlloca = BuildAlloca.CreateAlloca (ArrayType::get (
2071
2068
Alloca->getAllocatedType ()->getArrayElementType (), NumElementsToCopy));
2072
2069
NewAlloca->takeName (Alloca);
2073
2070
NewAlloca->setAlignment (Alloca->getAlign ());
2074
2071
Alloca->replaceAllUsesWith (NewAlloca);
2075
2072
Alloca->eraseFromParent ();
2076
- return true ;
2077
2073
}
2078
2074
2079
- static bool widenGlobalArray (Function *F, CallInst *CI,
2080
- GlobalVariable *SourceVar, unsigned NumBytesToPad,
2081
- unsigned NumBytesToCopy,
2082
- ConstantInt *BytesToCopyOp,
2083
- ConstantDataArray *SourceDataArray) {
2075
+ static bool tryWidenGlobalArrayAndDests (Function *F, GlobalVariable *SourceVar,
2076
+ const unsigned NumBytesToPad,
2077
+ const unsigned NumBytesToCopy,
2078
+ ConstantInt *BytesToCopyOp,
2079
+ ConstantDataArray *SourceDataArray) {
2080
+ if (!SourceVar->hasInitializer () || !SourceVar->isConstant () ||
2081
+ !SourceVar->hasLocalLinkage () || !SourceVar->hasGlobalUnnamedAddr ())
2082
+ return false ;
2083
+
2084
2084
// Update source to be word aligned (memcpy(...,X,...))
2085
2085
// create replacement with padded null bytes.
2086
2086
StringRef Data = SourceDataArray->getRawDataValues ();
@@ -2100,48 +2100,69 @@ static bool widenGlobalArray(Function *F, CallInst *CI,
2100
2100
NewGV->copyAttributesFrom (SourceVar);
2101
2101
NewGV->takeName (SourceVar);
2102
2102
2103
- CI->setArgOperand (2 , ConstantInt::get (BytesToCopyOp->getType (),
2104
- NumBytesToCopy + NumBytesToPad));
2103
+ // Update arguments of remaining uses that
2104
+ // are memcpys.
2105
+ for (auto *User : SourceVar->users ()) {
2106
+ auto *CI = dyn_cast<CallInst>(User);
2107
+ if (!callInstIsMemcpy (CI))
2108
+ continue ;
2109
+
2110
+ widenDestArray (CI, NumBytesToPad, NumBytesToCopy, SourceDataArray);
2111
+
2112
+ CI->setArgOperand (2 , ConstantInt::get (BytesToCopyOp->getType (),
2113
+ NumBytesToCopy + NumBytesToPad));
2114
+ }
2115
+ SourceVar->replaceAllUsesWith (NewGV);
2116
+
2105
2117
NumGlobalArraysPadded++;
2106
2118
return true ;
2107
2119
}
2108
2120
2109
2121
static bool tryWidenGlobalArraysUsedByMemcpy (
2110
2122
GlobalVariable *GV,
2111
2123
function_ref<TargetTransformInfo &(Function &)> GetTTI) {
2124
+
2125
+ if (!GV->hasInitializer ())
2126
+ return false ;
2127
+
2112
2128
for (auto *User : GV->users ()) {
2113
2129
CallInst *CI = dyn_cast<CallInst>(User);
2114
- if (!CI )
2130
+ if (!callInstIsMemcpy (CI) || ! destArrayCanBeWidened (CI) )
2115
2131
continue ;
2116
2132
2117
2133
Function *F = CI->getCalledFunction ();
2118
- if (!F || !F->isIntrinsic () || F->getIntrinsicID () != Intrinsic::memcpy)
2119
- continue ;
2120
2134
2121
- TargetTransformInfo &TTI = GetTTI (*F);
2122
2135
auto *BytesToCopyOp = dyn_cast<ConstantInt>(CI->getArgOperand (2 ));
2123
2136
if (!BytesToCopyOp)
2124
2137
continue ;
2125
2138
2126
- if (!GV->hasInitializer ())
2127
- continue ;
2128
-
2129
2139
ConstantDataArray *SourceDataArray =
2130
2140
dyn_cast<ConstantDataArray>(GV->getInitializer ());
2131
2141
if (!SourceDataArray)
2132
2142
continue ;
2133
2143
2134
2144
unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue ();
2135
- unsigned NumBytesToPad = TTI.getNumBytesToPadGlobalArray (
2136
- NumBytesToCopy, SourceDataArray->getType ());
2137
2145
2146
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand (0 ));
2147
+ uint64_t DZSize = Alloca->getAllocatedType ()->getArrayNumElements ();
2148
+ uint64_t SZSize = SourceDataArray->getType ()->getNumElements ();
2149
+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize ();
2150
+ // Calculate the number of elements to copy while avoiding floored
2151
+ // division of integers returning wrong values i.e. copying one byte
2152
+ // from an array of i16 would yield 0 elements to copy as supposed to 1.
2153
+ unsigned NumElementsToCopy = divideCeil (NumBytesToCopy, ElementByteWidth);
2154
+
2155
+ // For safety purposes lets add a constraint and only pad when
2156
+ // NumElementsToCopy == destination array size ==
2157
+ // source which is a constant
2158
+ if (NumElementsToCopy != DZSize || DZSize != SZSize)
2159
+ continue ;
2160
+
2161
+ unsigned NumBytesToPad = GetTTI (*F).getNumBytesToPadGlobalArray (
2162
+ NumBytesToCopy, SourceDataArray->getType ());
2138
2163
if (NumBytesToPad) {
2139
- bool DestWidened = tryWidenDestArray (F, CI, GV, NumBytesToPad,
2140
- NumBytesToCopy, SourceDataArray);
2141
- if (DestWidened) {
2142
- return widenGlobalArray (F, CI, GV, NumBytesToPad, NumBytesToCopy,
2143
- BytesToCopyOp, SourceDataArray);
2144
- }
2164
+ return tryWidenGlobalArrayAndDests (F, GV, NumBytesToPad, NumBytesToCopy,
2165
+ BytesToCopyOp, SourceDataArray);
2145
2166
}
2146
2167
}
2147
2168
return false ;
0 commit comments