|
13 | 13 | #include "llvm/IR/IntrinsicInst.h"
|
14 | 14 | #include "llvm/IR/MDBuilder.h"
|
15 | 15 | #include "llvm/Support/Debug.h"
|
| 16 | +#include "llvm/Support/MathExtras.h" |
16 | 17 | #include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
17 | 18 | #include <optional>
|
18 | 19 |
|
@@ -155,6 +156,26 @@ void llvm::createMemCpyLoopKnownSize(
|
155 | 156 | "Bytes copied should match size in the call!");
|
156 | 157 | }
|
157 | 158 |
|
| 159 | +// \returns \p Len udiv \p OpSize, checking for optimization opportunities. |
| 160 | +static Value *getRuntimeLoopCount(const DataLayout &DL, IRBuilderBase &B, |
| 161 | + Value *Len, Value *OpSize, |
| 162 | + unsigned OpSizeVal) { |
| 163 | + // For powers of 2, we can lshr by log2 instead of using udiv. |
| 164 | + if (isPowerOf2_32(OpSizeVal)) |
| 165 | + return B.CreateLShr(Len, Log2_32(OpSizeVal)); |
| 166 | + return B.CreateUDiv(Len, OpSize); |
| 167 | +} |
| 168 | + |
| 169 | +// \returns \p Len urem \p OpSize, checking for optimization opportunities. |
| 170 | +static Value *getRuntimeLoopRemainder(const DataLayout &DL, IRBuilderBase &B, |
| 171 | + Value *Len, Value *OpSize, |
| 172 | + unsigned OpSizeVal) { |
| 173 | + // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem. |
| 174 | + if (isPowerOf2_32(OpSizeVal)) |
| 175 | + return B.CreateAnd(Len, OpSizeVal - 1); |
| 176 | + return B.CreateURem(Len, OpSize); |
| 177 | +} |
| 178 | + |
158 | 179 | void llvm::createMemCpyLoopUnknownSize(
|
159 | 180 | Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
|
160 | 181 | Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
|
@@ -194,9 +215,11 @@ void llvm::createMemCpyLoopUnknownSize(
|
194 | 215 | Type *Int8Type = Type::getInt8Ty(Ctx);
|
195 | 216 | bool LoopOpIsInt8 = LoopOpType == Int8Type;
|
196 | 217 | ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
|
197 |
| - Value *RuntimeLoopCount = LoopOpIsInt8 ? |
198 |
| - CopyLen : |
199 |
| - PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); |
| 218 | + Value *RuntimeLoopCount = LoopOpIsInt8 |
| 219 | + ? CopyLen |
| 220 | + : getRuntimeLoopCount(DL, PLBuilder, CopyLen, |
| 221 | + CILoopOpSize, LoopOpSize); |
| 222 | + |
200 | 223 | BasicBlock *LoopBB =
|
201 | 224 | BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB);
|
202 | 225 | IRBuilder<> LoopBuilder(LoopBB);
|
@@ -239,8 +262,8 @@ void llvm::createMemCpyLoopUnknownSize(
|
239 | 262 | assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) &&
|
240 | 263 | "Store size is expected to match type size");
|
241 | 264 |
|
242 |
| - // Add in the |
243 |
| - Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); |
| 265 | + Value *RuntimeResidual = getRuntimeLoopRemainder(DL, PLBuilder, CopyLen, |
| 266 | + CILoopOpSize, LoopOpSize); |
244 | 267 | Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
|
245 | 268 |
|
246 | 269 | // Loop body for the residual copy.
|
|
0 commit comments