Skip to content

Commit 6560a5c

Browse files
committed
[IR] Initial introduction of memset_pattern
1 parent 3b73cb3 commit 6560a5c

File tree

12 files changed

+981
-2
lines changed

12 files changed

+981
-2
lines changed

llvm/docs/LangRef.rst

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15231,6 +15231,62 @@ The behavior of '``llvm.memset.inline.*``' is equivalent to the behavior of
1523115231
'``llvm.memset.*``', but the generated code is guaranteed not to call any
1523215232
external functions.
1523315233

15234+
.. _int_memset_pattern:
15235+
15236+
'``llvm.memset_pattern``' Intrinsic
15237+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15238+
15239+
Syntax:
15240+
"""""""
15241+
15242+
This is an overloaded intrinsic. You can use ``llvm.memset_pattern`` on
15243+
any integer bit width and for different address spaces. Not all targets
15244+
support all bit widths however.
15245+
15246+
::
15247+
15248+
declare void @llvm.memset_pattern.p0.i64.i128(ptr <dest>, i128 <val>,
15249+
i64 <len>, i1 <isvolatile>)
15250+
15251+
Overview:
15252+
"""""""""
15253+
15254+
The '``llvm.memset_pattern.*``' intrinsics fill a block of memory with
15255+
a particular value. This may be expanded to an inline loop, a sequence of
15256+
stores, or a libcall depending on what is available for the target and the
15257+
expected performance and code size impact.
15258+
15259+
Arguments:
15260+
""""""""""
15261+
15262+
The first argument is a pointer to the destination to fill, the second
15263+
is the value with which to fill it, the third argument is an integer
15264+
argument specifying the number of bytes to fill, and the fourth is a boolean
15265+
indicating a volatile access.
15266+
15267+
The :ref:`align <attr_align>` parameter attribute can be provided
15268+
for the first argument.
15269+
15270+
If the ``isvolatile`` parameter is ``true``, the
15271+
``llvm.memset_pattern`` call is a :ref:`volatile operation <volatile>`. The
15272+
detailed access behavior is not very cleanly specified and it is unwise to
15273+
depend on it.
15274+
15275+
Semantics:
15276+
""""""""""
15277+
15278+
The '``llvm.memset_pattern.*``' intrinsics fill "len" bytes of memory
15279+
starting at the destination location. If the argument is known to be aligned
15280+
to some boundary, this can be specified as an attribute on the argument.
15281+
15282+
If ``<len>`` is not an integer multiple of the pattern width in bytes, then any
15283+
remainder bytes will be copied from ``<val>``.
15284+
If ``<len>`` is 0, it is no-op modulo the behavior of attributes attached to
15285+
the arguments.
15286+
If ``<len>`` is not a well-defined value, the behavior is undefined.
15287+
If ``<len>`` is not zero, ``<dest>`` should be well-defined, otherwise the
15288+
behavior is undefined.
15289+
1523415290
.. _int_sqrt:
1523515291

1523615292
'``llvm.sqrt.*``' Intrinsic

llvm/include/llvm/IR/InstVisitor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ class InstVisitor {
208208
RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I){ DELEGATE(IntrinsicInst); }
209209
RetTy visitMemSetInst(MemSetInst &I) { DELEGATE(MemIntrinsic); }
210210
RetTy visitMemSetInlineInst(MemSetInlineInst &I){ DELEGATE(MemSetInst); }
211+
RetTy visitMemSetPatternInst(MemSetPatternInst &I) { DELEGATE(MemSetInst); }
211212
RetTy visitMemCpyInst(MemCpyInst &I) { DELEGATE(MemTransferInst); }
212213
RetTy visitMemCpyInlineInst(MemCpyInlineInst &I){ DELEGATE(MemCpyInst); }
213214
RetTy visitMemMoveInst(MemMoveInst &I) { DELEGATE(MemTransferInst); }
@@ -295,6 +296,8 @@ class InstVisitor {
295296
case Intrinsic::memset: DELEGATE(MemSetInst);
296297
case Intrinsic::memset_inline:
297298
DELEGATE(MemSetInlineInst);
299+
case Intrinsic::memset_pattern:
300+
DELEGATE(MemSetPatternInst);
298301
case Intrinsic::vastart: DELEGATE(VAStartInst);
299302
case Intrinsic::vaend: DELEGATE(VAEndInst);
300303
case Intrinsic::vacopy: DELEGATE(VACopyInst);

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1171,6 +1171,7 @@ class MemIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
11711171
case Intrinsic::memmove:
11721172
case Intrinsic::memset:
11731173
case Intrinsic::memset_inline:
1174+
case Intrinsic::memset_pattern:
11741175
case Intrinsic::memcpy_inline:
11751176
return true;
11761177
default:
@@ -1182,14 +1183,16 @@ class MemIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
11821183
}
11831184
};
11841185

1185-
/// This class wraps the llvm.memset and llvm.memset.inline intrinsics.
1186+
/// This class wraps the llvm.memset, llvm.memset.inline, and
1187+
/// llvm.memset_pattern intrinsics.
11861188
class MemSetInst : public MemSetBase<MemIntrinsic> {
11871189
public:
11881190
// Methods for support type inquiry through isa, cast, and dyn_cast:
11891191
static bool classof(const IntrinsicInst *I) {
11901192
switch (I->getIntrinsicID()) {
11911193
case Intrinsic::memset:
11921194
case Intrinsic::memset_inline:
1195+
case Intrinsic::memset_pattern:
11931196
return true;
11941197
default:
11951198
return false;
@@ -1215,6 +1218,21 @@ class MemSetInlineInst : public MemSetInst {
12151218
}
12161219
};
12171220

1221+
/// This class wraps the llvm.memset.pattern intrinsic.
1222+
class MemSetPatternInst : public MemSetInst {
1223+
public:
1224+
ConstantInt *getLength() const {
1225+
return cast<ConstantInt>(MemSetInst::getLength());
1226+
}
1227+
// Methods for support type inquiry through isa, cast, and dyn_cast:
1228+
static bool classof(const IntrinsicInst *I) {
1229+
return I->getIntrinsicID() == Intrinsic::memset_pattern;
1230+
}
1231+
static bool classof(const Value *V) {
1232+
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
1233+
}
1234+
};
1235+
12181236
/// This class wraps the llvm.memcpy/memmove intrinsics.
12191237
class MemTransferInst : public MemTransferBase<MemIntrinsic> {
12201238
public:
@@ -1294,6 +1312,7 @@ class AnyMemIntrinsic : public MemIntrinsicBase<AnyMemIntrinsic> {
12941312
case Intrinsic::memmove:
12951313
case Intrinsic::memset:
12961314
case Intrinsic::memset_inline:
1315+
case Intrinsic::memset_pattern:
12971316
case Intrinsic::memcpy_element_unordered_atomic:
12981317
case Intrinsic::memmove_element_unordered_atomic:
12991318
case Intrinsic::memset_element_unordered_atomic:
@@ -1316,6 +1335,7 @@ class AnyMemSetInst : public MemSetBase<AnyMemIntrinsic> {
13161335
switch (I->getIntrinsicID()) {
13171336
case Intrinsic::memset:
13181337
case Intrinsic::memset_inline:
1338+
case Intrinsic::memset_pattern:
13191339
case Intrinsic::memset_element_unordered_atomic:
13201340
return true;
13211341
default:

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,14 @@ def int_memset_inline
10031003
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
10041004
ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
10051005

1006+
// Memset variant that writes a given pattern.
1007+
def int_memset_pattern
1008+
: Intrinsic<[],
1009+
[llvm_anyptr_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_i1_ty],
1010+
[IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
1011+
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
1012+
ImmArg<ArgIndex<3>>], "llvm.memset_pattern">;
1013+
10061014
// FIXME: Add version of these floating point intrinsics which allow non-default
10071015
// rounding modes and FP exception handling.
10081016

llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,13 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
263263

264264
break;
265265
}
266+
case Intrinsic::memset_pattern: {
267+
auto *Memset = cast<MemSetPatternInst>(Inst);
268+
expandMemSetAsLoop(Memset);
269+
Changed = true;
270+
Memset->eraseFromParent();
271+
break;
272+
}
266273
default:
267274
llvm_unreachable("unhandled intrinsic");
268275
}
@@ -280,6 +287,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
280287
case Intrinsic::memcpy:
281288
case Intrinsic::memmove:
282289
case Intrinsic::memset:
290+
case Intrinsic::memset_pattern:
283291
Changed |= expandMemIntrinsicUses(F);
284292
break;
285293
case Intrinsic::load_relative:

llvm/lib/IR/Verifier.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5446,7 +5446,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
54465446
case Intrinsic::memcpy_inline:
54475447
case Intrinsic::memmove:
54485448
case Intrinsic::memset:
5449-
case Intrinsic::memset_inline: {
5449+
case Intrinsic::memset_inline:
5450+
case Intrinsic::memset_pattern: {
54505451
break;
54515452
}
54525453
case Intrinsic::memcpy_element_unordered_atomic:

llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,106 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
455455
ElseTerm->eraseFromParent();
456456
}
457457

458+
static void createMemSetPatternLoop(Instruction *InsertBefore, Value *DstAddr,
459+
Value *CopyLen, Value *SetValue,
460+
Align DstAlign, bool IsVolatile) {
461+
462+
// To start with, let's assume SetValue is an i128 and bail out if it's not.
463+
if (SetValue->getType()->getScalarSizeInBits() != 128) {
464+
report_fatal_error("Only 128-bit variant supported for now");
465+
}
466+
unsigned PatternSize = SetValue->getType()->getScalarSizeInBits() / 8;
467+
468+
Type *TypeOfCopyLen = CopyLen->getType();
469+
BasicBlock *OrigBB = InsertBefore->getParent();
470+
Function *F = OrigBB->getParent();
471+
const DataLayout &DL = F->getDataLayout();
472+
473+
BasicBlock *NewBB = OrigBB->splitBasicBlock(InsertBefore, "split");
474+
BasicBlock *LoopBB =
475+
BasicBlock::Create(F->getContext(), "storeloop", F, NewBB);
476+
BasicBlock *RemCheckBB =
477+
BasicBlock::Create(F->getContext(), "remcheck", F, NewBB);
478+
BasicBlock *RemainderLoopBB =
479+
BasicBlock::Create(F->getContext(), "remainderloop", F, NewBB);
480+
IRBuilder<> Builder(OrigBB->getTerminator());
481+
482+
ConstantInt *CILoopOpSize =
483+
ConstantInt::get(dyn_cast<IntegerType>(TypeOfCopyLen), PatternSize);
484+
Value *RuntimeLoopCount =
485+
getRuntimeLoopCount(DL, Builder, CopyLen, CILoopOpSize, PatternSize);
486+
Value *RuntimeRemainder =
487+
getRuntimeLoopRemainder(DL, Builder, CopyLen, CILoopOpSize, PatternSize);
488+
489+
Builder.CreateCondBr(Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0),
490+
RuntimeLoopCount),
491+
RemCheckBB, LoopBB);
492+
OrigBB->getTerminator()->eraseFromParent();
493+
494+
IRBuilder<> LoopBuilder(LoopBB);
495+
PHINode *CurrentDst = LoopBuilder.CreatePHI(DstAddr->getType(), 0);
496+
CurrentDst->addIncoming(DstAddr, OrigBB);
497+
PHINode *LoopCount = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
498+
LoopCount->addIncoming(RuntimeLoopCount, OrigBB);
499+
500+
// Create the store instruction for the pattern
501+
LoopBuilder.CreateAlignedStore(SetValue, CurrentDst, DstAlign, IsVolatile);
502+
503+
Value *NextDst = LoopBuilder.CreateInBoundsGEP(
504+
SetValue->getType(), CurrentDst,
505+
ConstantInt::get(TypeOfCopyLen, PatternSize));
506+
CurrentDst->addIncoming(NextDst, LoopBB);
507+
508+
Value *NewLoopCount =
509+
LoopBuilder.CreateSub(LoopCount, ConstantInt::get(TypeOfCopyLen, 1));
510+
LoopCount->addIncoming(NewLoopCount, LoopBB);
511+
512+
LoopBuilder.CreateCondBr(
513+
LoopBuilder.CreateICmpNE(NewLoopCount,
514+
ConstantInt::get(TypeOfCopyLen, 0)),
515+
LoopBB, RemCheckBB);
516+
517+
IRBuilder<> RemCheckBuilder(RemCheckBB, RemCheckBB->begin());
518+
// Branch to the end if there are no remainder bytes.
519+
PHINode *RemainderDstPHI = RemCheckBuilder.CreatePHI(NextDst->getType(), 0);
520+
RemainderDstPHI->addIncoming(DstAddr, OrigBB);
521+
RemainderDstPHI->addIncoming(NextDst, LoopBB);
522+
RemCheckBuilder.CreateCondBr(
523+
RemCheckBuilder.CreateICmpEQ(RuntimeRemainder,
524+
ConstantInt::get(TypeOfCopyLen, 0)),
525+
NewBB, RemainderLoopBB);
526+
527+
// Remainder loop
528+
IRBuilder<> RemainderLoopBuilder(RemainderLoopBB);
529+
PHINode *ByteIndex = RemainderLoopBuilder.CreatePHI(TypeOfCopyLen, 0);
530+
ByteIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), RemCheckBB);
531+
Type *TypeOfSetValue = SetValue->getType();
532+
PHINode *ShiftedValue = RemainderLoopBuilder.CreatePHI(TypeOfSetValue, 0);
533+
ShiftedValue->addIncoming(SetValue, RemCheckBB);
534+
535+
// Get the byte to store
536+
Value *ByteToStore = RemainderLoopBuilder.CreateTrunc(
537+
ShiftedValue, RemainderLoopBuilder.getInt8Ty());
538+
539+
// Store the byte
540+
RemainderLoopBuilder.CreateStore(
541+
ByteToStore,
542+
RemainderLoopBuilder.CreateInBoundsGEP(RemainderLoopBuilder.getInt8Ty(),
543+
RemainderDstPHI, ByteIndex),
544+
IsVolatile);
545+
546+
Value *NewByteIndex = RemainderLoopBuilder.CreateAdd(
547+
ByteIndex, ConstantInt::get(TypeOfCopyLen, 1));
548+
ByteIndex->addIncoming(NewByteIndex, RemainderLoopBB);
549+
Value *NewShiftedValue = RemainderLoopBuilder.CreateLShr(
550+
ShiftedValue, ConstantInt::get(TypeOfSetValue, 8));
551+
ShiftedValue->addIncoming(NewShiftedValue, RemainderLoopBB);
552+
553+
RemainderLoopBuilder.CreateCondBr(
554+
RemainderLoopBuilder.CreateICmpULT(NewByteIndex, RuntimeRemainder),
555+
RemainderLoopBB, NewBB);
556+
}
557+
458558
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
459559
Value *CopyLen, Value *SetValue, Align DstAlign,
460560
bool IsVolatile) {
@@ -590,6 +690,16 @@ bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove,
590690
}
591691

592692
void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
693+
if (isa<MemSetPatternInst>(Memset)) {
694+
return createMemSetPatternLoop(
695+
/* InsertBefore */ Memset,
696+
/* DstAddr */ Memset->getRawDest(),
697+
/* CopyLen */ Memset->getLength(),
698+
/* SetValue */ Memset->getValue(),
699+
/* Alignment */ Memset->getDestAlign().valueOrOne(),
700+
Memset->isVolatile());
701+
}
702+
593703
createMemSetLoop(/* InsertBefore */ Memset,
594704
/* DstAddr */ Memset->getRawDest(),
595705
/* CopyLen */ Memset->getLength(),

0 commit comments

Comments
 (0)