Skip to content

Commit 442c72f

Browse files
committed
[PreISelIntrinsicLowering] Produce a memset_pattern16 libcall for llvm.experimental.memset.pattern when available
This is to enable a transition of LoopIdiomRecognize to selecting the llvm.experimental.memset.pattern intrinsic as requested in llvm#118632 (as opposed to supporting selection of the libcall or the intrinsic). As such, although it _is_ a FIXME to add costing considerations on whether to lower to the libcall (when available) or expand directly, lacking such logic is helpful at this stage in order to minimise any potential code gen changes in this transition.
1 parent f8d2704 commit 442c72f

File tree

2 files changed

+158
-1
lines changed

2 files changed

+158
-1
lines changed

llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "llvm/Support/Casting.h"
3333
#include "llvm/Target/TargetMachine.h"
3434
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
35+
#include "llvm/Transforms/Utils/BuildLibCalls.h"
3536
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
3637

3738
using namespace llvm;
@@ -232,6 +233,59 @@ static bool canEmitLibcall(const TargetMachine *TM, Function *F,
232233
return TLI->getLibcallName(LC) != nullptr;
233234
}
234235

236+
// Return a value appropriate for use with the memset_pattern16 libcall, if
237+
// possible and if we know how. (Adapted from equivalent helper in
238+
// LoopIdiomRecognize).
239+
static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
240+
const TargetLibraryInfo &TLI) {
241+
// FIXME: This could check for UndefValue because it can be merged into any
242+
// other valid pattern.
243+
244+
// Don't emit libcalls if a non-default address space is being used.
245+
if (Inst->getRawDest()->getType()->getPointerAddressSpace() != 0)
246+
return nullptr;
247+
248+
Value *V = Inst->getValue();
249+
const DataLayout &DL = Inst->getDataLayout();
250+
Module *M = Inst->getModule();
251+
252+
if (!isLibFuncEmittable(M, &TLI, LibFunc_memset_pattern16))
253+
return nullptr;
254+
255+
// If the value isn't a constant, we can't promote it to being in a constant
256+
// array. We could theoretically do a store to an alloca or something, but
257+
// that doesn't seem worthwhile.
258+
Constant *C = dyn_cast<Constant>(V);
259+
if (!C || isa<ConstantExpr>(C))
260+
return nullptr;
261+
262+
// Only handle simple values that are a power of two bytes in size.
263+
uint64_t Size = DL.getTypeSizeInBits(V->getType());
264+
if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
265+
return nullptr;
266+
267+
// Don't care enough about darwin/ppc to implement this.
268+
if (DL.isBigEndian())
269+
return nullptr;
270+
271+
// Convert to size in bytes.
272+
Size /= 8;
273+
274+
// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
275+
// if the top and bottom are the same (e.g. for vectors and large integers).
276+
if (Size > 16)
277+
return nullptr;
278+
279+
// If the constant is exactly 16 bytes, just use it.
280+
if (Size == 16)
281+
return C;
282+
283+
// Otherwise, we'll use an array of the constants.
284+
unsigned ArraySize = 16 / Size;
285+
ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
286+
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
287+
}
288+
235289
// TODO: Handle atomic memcpy and memcpy.inline
236290
// TODO: Pass ScalarEvolution
237291
bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
@@ -322,7 +376,41 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
322376
}
323377
case Intrinsic::experimental_memset_pattern: {
324378
auto *Memset = cast<MemSetPatternInst>(Inst);
325-
expandMemSetPatternAsLoop(Memset);
379+
const TargetLibraryInfo &TLI = LookupTLI(*Memset->getFunction());
380+
if (Constant *PatternValue = getMemSetPattern16Value(Memset, TLI)) {
381+
// FIXME: There is currently no profitability calculation for emitting
382+
// the libcall vs expanding the memset.pattern directly.
383+
IRBuilder<> Builder(Inst);
384+
Module *M = Memset->getModule();
385+
const DataLayout &DL = Memset->getDataLayout();
386+
387+
StringRef FuncName = "memset_pattern16";
388+
FunctionCallee MSP = getOrInsertLibFunc(
389+
M, TLI, LibFunc_memset_pattern16, Builder.getVoidTy(),
390+
Memset->getRawDest()->getType(), Builder.getPtrTy(),
391+
Memset->getLength()->getType());
392+
inferNonMandatoryLibFuncAttrs(M, FuncName, TLI);
393+
394+
// Otherwise we should form a memset_pattern16. PatternValue is known
395+
// to be an constant array of 16-bytes. Put the value into a mergable
396+
// global.
397+
GlobalVariable *GV = new GlobalVariable(
398+
*M, PatternValue->getType(), true, GlobalValue::PrivateLinkage,
399+
PatternValue, ".memset_pattern");
400+
GV->setUnnamedAddr(
401+
GlobalValue::UnnamedAddr::Global); // Ok to merge these.
402+
GV->setAlignment(Align(16));
403+
Value *PatternPtr = GV;
404+
Value *NumBytes = Builder.CreateMul(
405+
Builder.getInt64(
406+
DL.getTypeSizeInBits(Memset->getValue()->getType()) / 8),
407+
Memset->getLength());
408+
CallInst *MemsetPattern16Call = Builder.CreateCall(
409+
MSP, {Memset->getRawDest(), PatternPtr, NumBytes});
410+
MemsetPattern16Call->setAAMetadata(Memset->getAAMetadata());
411+
} else {
412+
expandMemSetPatternAsLoop(Memset);
413+
}
326414
Changed = true;
327415
Memset->eraseFromParent();
328416
break;
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=x86_64-apple-darwin10.0.0 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
3+
4+
define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
5+
; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
6+
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
8+
; CHECK: [[LOADSTORELOOP]]:
9+
; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
10+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP1]]
11+
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP2]], align 1
12+
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
13+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
14+
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
15+
; CHECK: [[SPLIT]]:
16+
; CHECK-NEXT: ret void
17+
;
18+
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
19+
ret void
20+
}
21+
22+
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
23+
; CHECK-LABEL: define void @memset_pattern_i128_1(
24+
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
25+
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 16)
26+
; CHECK-NEXT: ret void
27+
;
28+
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
29+
ret void
30+
}
31+
32+
define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounwind {
33+
; CHECK-LABEL: define void @memset_pattern_i128_1_nz_as(
34+
; CHECK-SAME: ptr addrspace(1) [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
35+
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
36+
; CHECK: [[LOADSTORELOOP]]:
37+
; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
38+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr addrspace(1) [[A]], i64 [[TMP1]]
39+
; CHECK-NEXT: store i128 -113427455635030943652277463699152839203, ptr addrspace(1) [[TMP2]], align 1
40+
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
41+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
42+
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
43+
; CHECK: [[SPLIT]]:
44+
; CHECK-NEXT: ret void
45+
;
46+
tail call void @llvm.experimental.memset.pattern(ptr addrspace(1) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
47+
ret void
48+
}
49+
50+
define void @memset_pattern_i128_16(ptr %a) nounwind {
51+
; CHECK-LABEL: define void @memset_pattern_i128_16(
52+
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
53+
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 256)
54+
; CHECK-NEXT: ret void
55+
;
56+
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 0)
57+
ret void
58+
}
59+
60+
define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
61+
; CHECK-LABEL: define void @memset_pattern_i128_x(
62+
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
63+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 16, [[X]]
64+
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP1]])
65+
; CHECK-NEXT: ret void
66+
;
67+
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 0)
68+
ret void
69+
}

0 commit comments

Comments
 (0)