Skip to content

Commit 1e36d92

Browse files
authored
[LowerMemIntrinsics] Avoid udiv/urem when type size is a power of 2 (#81238)
See #64620 - does not fix the issue but improves the generated code a bit.
1 parent 92d7992 commit 1e36d92

File tree

3 files changed

+137
-69
lines changed

3 files changed

+137
-69
lines changed

llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/IR/IntrinsicInst.h"
1414
#include "llvm/IR/MDBuilder.h"
1515
#include "llvm/Support/Debug.h"
16+
#include "llvm/Support/MathExtras.h"
1617
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
1718
#include <optional>
1819

@@ -155,6 +156,26 @@ void llvm::createMemCpyLoopKnownSize(
155156
"Bytes copied should match size in the call!");
156157
}
157158

159+
// \returns \p Len udiv \p OpSize, checking for optimization opportunities.
160+
static Value *getRuntimeLoopCount(const DataLayout &DL, IRBuilderBase &B,
161+
Value *Len, Value *OpSize,
162+
unsigned OpSizeVal) {
163+
// For powers of 2, we can lshr by log2 instead of using udiv.
164+
if (isPowerOf2_32(OpSizeVal))
165+
return B.CreateLShr(Len, Log2_32(OpSizeVal));
166+
return B.CreateUDiv(Len, OpSize);
167+
}
168+
169+
// \returns \p Len urem \p OpSize, checking for optimization opportunities.
170+
static Value *getRuntimeLoopRemainder(const DataLayout &DL, IRBuilderBase &B,
171+
Value *Len, Value *OpSize,
172+
unsigned OpSizeVal) {
173+
// For powers of 2, we can and by (OpSizeVal - 1) instead of using urem.
174+
if (isPowerOf2_32(OpSizeVal))
175+
return B.CreateAnd(Len, OpSizeVal - 1);
176+
return B.CreateURem(Len, OpSize);
177+
}
178+
158179
void llvm::createMemCpyLoopUnknownSize(
159180
Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
160181
Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
@@ -194,9 +215,11 @@ void llvm::createMemCpyLoopUnknownSize(
194215
Type *Int8Type = Type::getInt8Ty(Ctx);
195216
bool LoopOpIsInt8 = LoopOpType == Int8Type;
196217
ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
197-
Value *RuntimeLoopCount = LoopOpIsInt8 ?
198-
CopyLen :
199-
PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
218+
Value *RuntimeLoopCount = LoopOpIsInt8
219+
? CopyLen
220+
: getRuntimeLoopCount(DL, PLBuilder, CopyLen,
221+
CILoopOpSize, LoopOpSize);
222+
200223
BasicBlock *LoopBB =
201224
BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB);
202225
IRBuilder<> LoopBuilder(LoopBB);
@@ -239,8 +262,8 @@ void llvm::createMemCpyLoopUnknownSize(
239262
assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) &&
240263
"Store size is expected to match type size");
241264

242-
// Add in the
243-
Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
265+
Value *RuntimeResidual = getRuntimeLoopRemainder(DL, PLBuilder, CopyLen,
266+
CILoopOpSize, LoopOpSize);
244267
Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
245268

246269
// Loop body for the residual copy.

0 commit comments

Comments
 (0)