Skip to content

Commit 627f1ef

Browse files
committed
[IR] Initial introduction of memset_pattern
1 parent b7a457e commit 627f1ef

File tree

15 files changed

+1075
-2
lines changed

15 files changed

+1075
-2
lines changed

llvm/docs/LangRef.rst

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15230,6 +15230,62 @@ The behavior of '``llvm.memset.inline.*``' is equivalent to the behavior of
1523015230
'``llvm.memset.*``', but the generated code is guaranteed not to call any
1523115231
external functions.
1523215232

15233+
.. _int_memset_pattern:
15234+
15235+
'``llvm.memset_pattern``' Intrinsic
15236+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15237+
15238+
Syntax:
15239+
"""""""
15240+
15241+
This is an overloaded intrinsic. You can use ``llvm.memset_pattern`` on
15242+
any integer bit width and for different address spaces. Not all targets
15243+
support all bit widths however.
15244+
15245+
::
15246+
15247+
declare void @llvm.memset_pattern.p0.i64.i128(ptr <dest>, i128 <val>,
15248+
i64 <len>, i1 <isvolatile>)
15249+
15250+
Overview:
15251+
"""""""""
15252+
15253+
The '``llvm.memset_pattern.*``' intrinsics fill a block of memory with
15254+
a particular value. This may be expanded to an inline loop, a sequence of
15255+
stores, or a libcall depending on what is available for the target and the
15256+
expected performance and code size impact.
15257+
15258+
Arguments:
15259+
""""""""""
15260+
15261+
The first argument is a pointer to the destination to fill, the second
15262+
is the value with which to fill it, the third argument is an integer
15263+
argument specifying the number of bytes to fill, and the fourth is a boolean
15264+
indicating a volatile access.
15265+
15266+
The :ref:`align <attr_align>` parameter attribute can be provided
15267+
for the first argument.
15268+
15269+
If the ``isvolatile`` parameter is ``true``, the
15270+
``llvm.memset_pattern`` call is a :ref:`volatile operation <volatile>`. The
15271+
detailed access behavior is not very cleanly specified and it is unwise to
15272+
depend on it.
15273+
15274+
Semantics:
15275+
""""""""""
15276+
15277+
The '``llvm.memset_pattern.*``' intrinsics fill "len" bytes of memory
15278+
starting at the destination location. If the argument is known to be aligned
15279+
to some boundary, this can be specified as an attribute on the argument.
15280+
15281+
If ``<len>`` is not an integer multiple of the pattern width in bytes, then any
15282+
remainder bytes will be copied from ``<val>``.
15283+
If ``<len>`` is 0, it is no-op modulo the behavior of attributes attached to
15284+
the arguments.
15285+
If ``<len>`` is not a well-defined value, the behavior is undefined.
15286+
If ``<len>`` is not zero, ``<dest>`` should be well-defined, otherwise the
15287+
behavior is undefined.
15288+
1523315289
.. _int_sqrt:
1523415290

1523515291
'``llvm.sqrt.*``' Intrinsic

llvm/include/llvm/IR/InstVisitor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ class InstVisitor {
208208
RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I){ DELEGATE(IntrinsicInst); }
209209
RetTy visitMemSetInst(MemSetInst &I) { DELEGATE(MemIntrinsic); }
210210
RetTy visitMemSetInlineInst(MemSetInlineInst &I){ DELEGATE(MemSetInst); }
211+
RetTy visitMemSetPatternInst(MemSetPatternInst &I) { DELEGATE(MemSetInst); }
211212
RetTy visitMemCpyInst(MemCpyInst &I) { DELEGATE(MemTransferInst); }
212213
RetTy visitMemCpyInlineInst(MemCpyInlineInst &I){ DELEGATE(MemCpyInst); }
213214
RetTy visitMemMoveInst(MemMoveInst &I) { DELEGATE(MemTransferInst); }
@@ -295,6 +296,8 @@ class InstVisitor {
295296
case Intrinsic::memset: DELEGATE(MemSetInst);
296297
case Intrinsic::memset_inline:
297298
DELEGATE(MemSetInlineInst);
299+
case Intrinsic::memset_pattern:
300+
DELEGATE(MemSetPatternInst);
298301
case Intrinsic::vastart: DELEGATE(VAStartInst);
299302
case Intrinsic::vaend: DELEGATE(VAEndInst);
300303
case Intrinsic::vacopy: DELEGATE(VACopyInst);

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1208,6 +1208,7 @@ class MemIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
12081208
case Intrinsic::memmove:
12091209
case Intrinsic::memset:
12101210
case Intrinsic::memset_inline:
1211+
case Intrinsic::memset_pattern:
12111212
case Intrinsic::memcpy_inline:
12121213
return true;
12131214
default:
@@ -1219,14 +1220,16 @@ class MemIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
12191220
}
12201221
};
12211222

1222-
/// This class wraps the llvm.memset and llvm.memset.inline intrinsics.
1223+
/// This class wraps the llvm.memset, llvm.memset.inline, and
1224+
/// llvm.memset_pattern intrinsics.
12231225
class MemSetInst : public MemSetBase<MemIntrinsic> {
12241226
public:
12251227
// Methods for support type inquiry through isa, cast, and dyn_cast:
12261228
static bool classof(const IntrinsicInst *I) {
12271229
switch (I->getIntrinsicID()) {
12281230
case Intrinsic::memset:
12291231
case Intrinsic::memset_inline:
1232+
case Intrinsic::memset_pattern:
12301233
return true;
12311234
default:
12321235
return false;
@@ -1249,6 +1252,21 @@ class MemSetInlineInst : public MemSetInst {
12491252
}
12501253
};
12511254

1255+
/// This class wraps the llvm.memset.pattern intrinsic.
1256+
class MemSetPatternInst : public MemSetInst {
1257+
public:
1258+
ConstantInt *getLength() const {
1259+
return cast<ConstantInt>(MemSetInst::getLength());
1260+
}
1261+
// Methods for support type inquiry through isa, cast, and dyn_cast:
1262+
static bool classof(const IntrinsicInst *I) {
1263+
return I->getIntrinsicID() == Intrinsic::memset_pattern;
1264+
}
1265+
static bool classof(const Value *V) {
1266+
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
1267+
}
1268+
};
1269+
12521270
/// This class wraps the llvm.memcpy/memmove intrinsics.
12531271
class MemTransferInst : public MemTransferBase<MemIntrinsic> {
12541272
public:
@@ -1328,6 +1346,7 @@ class AnyMemIntrinsic : public MemIntrinsicBase<AnyMemIntrinsic> {
13281346
case Intrinsic::memmove:
13291347
case Intrinsic::memset:
13301348
case Intrinsic::memset_inline:
1349+
case Intrinsic::memset_pattern:
13311350
case Intrinsic::memcpy_element_unordered_atomic:
13321351
case Intrinsic::memmove_element_unordered_atomic:
13331352
case Intrinsic::memset_element_unordered_atomic:
@@ -1350,6 +1369,7 @@ class AnyMemSetInst : public MemSetBase<AnyMemIntrinsic> {
13501369
switch (I->getIntrinsicID()) {
13511370
case Intrinsic::memset:
13521371
case Intrinsic::memset_inline:
1372+
case Intrinsic::memset_pattern:
13531373
case Intrinsic::memset_element_unordered_atomic:
13541374
return true;
13551375
default:

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,14 @@ def int_memset_inline
10031003
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
10041004
ImmArg<ArgIndex<3>>]>;
10051005

1006+
// Memset variant that writes a given pattern.
1007+
def int_memset_pattern
1008+
: Intrinsic<[],
1009+
[llvm_anyptr_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_i1_ty],
1010+
[IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
1011+
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
1012+
ImmArg<ArgIndex<3>>], "llvm.memset_pattern">;
1013+
10061014
// FIXME: Add version of these floating point intrinsics which allow non-default
10071015
// rounding modes and FP exception handling.
10081016

llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,13 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
276276
Memset->eraseFromParent();
277277
break;
278278
}
279+
case Intrinsic::memset_pattern: {
280+
auto *Memset = cast<MemSetPatternInst>(Inst);
281+
expandMemSetAsLoop(Memset);
282+
Changed = true;
283+
Memset->eraseFromParent();
284+
break;
285+
}
279286
default:
280287
llvm_unreachable("unhandled intrinsic");
281288
}
@@ -294,6 +301,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
294301
case Intrinsic::memmove:
295302
case Intrinsic::memset:
296303
case Intrinsic::memset_inline:
304+
case Intrinsic::memset_pattern:
297305
Changed |= expandMemIntrinsicUses(F);
298306
break;
299307
case Intrinsic::load_relative:

llvm/lib/IR/Verifier.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5435,7 +5435,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
54355435
case Intrinsic::memcpy_inline:
54365436
case Intrinsic::memmove:
54375437
case Intrinsic::memset:
5438-
case Intrinsic::memset_inline: {
5438+
case Intrinsic::memset_inline:
5439+
case Intrinsic::memset_pattern: {
54395440
break;
54405441
}
54415442
case Intrinsic::memcpy_element_unordered_atomic:

llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,109 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
456456
ElseTerm->eraseFromParent();
457457
}
458458

459+
static void createMemSetPatternLoop(Instruction *InsertBefore, Value *DstAddr,
460+
Value *CopyLen, Value *SetValue,
461+
Align DstAlign, bool IsVolatile) {
462+
BasicBlock *OrigBB = InsertBefore->getParent();
463+
Function *F = OrigBB->getParent();
464+
const DataLayout &DL = F->getDataLayout();
465+
466+
if (DL.isBigEndian())
467+
report_fatal_error("memset_pattern.inline expansion not currently "
468+
"implemented for big-endian targets",
469+
false);
470+
471+
// To start with, let's assume SetValue is an i128 and bail out if it's not.
472+
if (!isPowerOf2_32(SetValue->getType()->getScalarSizeInBits()))
473+
report_fatal_error("Pattern width for memset_pattern must be a power of 2",
474+
false);
475+
unsigned PatternSize = SetValue->getType()->getScalarSizeInBits() / 8;
476+
477+
Type *TypeOfCopyLen = CopyLen->getType();
478+
479+
BasicBlock *NewBB = OrigBB->splitBasicBlock(InsertBefore, "split");
480+
BasicBlock *LoopBB =
481+
BasicBlock::Create(F->getContext(), "storeloop", F, NewBB);
482+
BasicBlock *RemCheckBB =
483+
BasicBlock::Create(F->getContext(), "remcheck", F, NewBB);
484+
BasicBlock *RemainderLoopBB =
485+
BasicBlock::Create(F->getContext(), "remainderloop", F, NewBB);
486+
IRBuilder<> Builder(OrigBB->getTerminator());
487+
488+
ConstantInt *CILoopOpSize =
489+
ConstantInt::get(dyn_cast<IntegerType>(TypeOfCopyLen), PatternSize);
490+
Value *RuntimeLoopCount =
491+
getRuntimeLoopCount(DL, Builder, CopyLen, CILoopOpSize, PatternSize);
492+
Value *RuntimeRemainder =
493+
getRuntimeLoopRemainder(DL, Builder, CopyLen, CILoopOpSize, PatternSize);
494+
495+
Builder.CreateCondBr(Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0),
496+
RuntimeLoopCount),
497+
RemCheckBB, LoopBB);
498+
OrigBB->getTerminator()->eraseFromParent();
499+
500+
IRBuilder<> LoopBuilder(LoopBB);
501+
PHINode *CurrentDst = LoopBuilder.CreatePHI(DstAddr->getType(), 0);
502+
CurrentDst->addIncoming(DstAddr, OrigBB);
503+
PHINode *LoopCount = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
504+
LoopCount->addIncoming(RuntimeLoopCount, OrigBB);
505+
506+
// Create the store instruction for the pattern
507+
LoopBuilder.CreateAlignedStore(SetValue, CurrentDst, DstAlign, IsVolatile);
508+
509+
Value *NextDst = LoopBuilder.CreateInBoundsGEP(
510+
SetValue->getType(), CurrentDst,
511+
ConstantInt::get(TypeOfCopyLen, PatternSize));
512+
CurrentDst->addIncoming(NextDst, LoopBB);
513+
514+
Value *NewLoopCount =
515+
LoopBuilder.CreateSub(LoopCount, ConstantInt::get(TypeOfCopyLen, 1));
516+
LoopCount->addIncoming(NewLoopCount, LoopBB);
517+
518+
LoopBuilder.CreateCondBr(
519+
LoopBuilder.CreateICmpNE(NewLoopCount,
520+
ConstantInt::get(TypeOfCopyLen, 0)),
521+
LoopBB, RemCheckBB);
522+
523+
IRBuilder<> RemCheckBuilder(RemCheckBB, RemCheckBB->begin());
524+
// Branch to the end if there are no remainder bytes.
525+
PHINode *RemainderDstPHI = RemCheckBuilder.CreatePHI(NextDst->getType(), 0);
526+
RemainderDstPHI->addIncoming(DstAddr, OrigBB);
527+
RemainderDstPHI->addIncoming(NextDst, LoopBB);
528+
RemCheckBuilder.CreateCondBr(
529+
RemCheckBuilder.CreateICmpEQ(RuntimeRemainder,
530+
ConstantInt::get(TypeOfCopyLen, 0)),
531+
NewBB, RemainderLoopBB);
532+
533+
// Remainder loop
534+
IRBuilder<> RemainderLoopBuilder(RemainderLoopBB);
535+
PHINode *ByteIndex = RemainderLoopBuilder.CreatePHI(TypeOfCopyLen, 0);
536+
ByteIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), RemCheckBB);
537+
Type *TypeOfSetValue = SetValue->getType();
538+
PHINode *ShiftedValue = RemainderLoopBuilder.CreatePHI(TypeOfSetValue, 0);
539+
ShiftedValue->addIncoming(SetValue, RemCheckBB);
540+
541+
Value *ByteToStore = RemainderLoopBuilder.CreateTrunc(
542+
ShiftedValue, RemainderLoopBuilder.getInt8Ty());
543+
544+
RemainderLoopBuilder.CreateStore(
545+
ByteToStore,
546+
RemainderLoopBuilder.CreateInBoundsGEP(RemainderLoopBuilder.getInt8Ty(),
547+
RemainderDstPHI, ByteIndex),
548+
IsVolatile);
549+
550+
Value *NewByteIndex = RemainderLoopBuilder.CreateAdd(
551+
ByteIndex, ConstantInt::get(TypeOfCopyLen, 1));
552+
ByteIndex->addIncoming(NewByteIndex, RemainderLoopBB);
553+
Value *NewShiftedValue = RemainderLoopBuilder.CreateLShr(
554+
ShiftedValue, ConstantInt::get(TypeOfSetValue, 8));
555+
ShiftedValue->addIncoming(NewShiftedValue, RemainderLoopBB);
556+
557+
RemainderLoopBuilder.CreateCondBr(
558+
RemainderLoopBuilder.CreateICmpULT(NewByteIndex, RuntimeRemainder),
559+
RemainderLoopBB, NewBB);
560+
}
561+
459562
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
460563
Value *CopyLen, Value *SetValue, Align DstAlign,
461564
bool IsVolatile) {
@@ -591,6 +694,16 @@ bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove,
591694
}
592695

593696
void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
697+
if (isa<MemSetPatternInst>(Memset)) {
698+
return createMemSetPatternLoop(
699+
/* InsertBefore */ Memset,
700+
/* DstAddr */ Memset->getRawDest(),
701+
/* CopyLen */ Memset->getLength(),
702+
/* SetValue */ Memset->getValue(),
703+
/* Alignment */ Memset->getDestAlign().valueOrOne(),
704+
Memset->isVolatile());
705+
}
706+
594707
createMemSetLoop(/* InsertBefore */ Memset,
595708
/* DstAddr */ Memset->getRawDest(),
596709
/* CopyLen */ Memset->getLength(),

0 commit comments

Comments
 (0)