Skip to content

Commit 522fd53

Browse files
authored
[Intrinsics][PreISelInstrinsicLowering] llvm.memcpy.inline length no longer needs to be constant (#98281)
Following on from the discussion in https://discourse.llvm.org/t/rfc-introducing-an-llvm-memset-pattern-inline-intrinsic/79496 and the equivalent change for llvm.memset.inline (#95397), this removes the requirement that the length of llvm.memcpy.inline is constant. PreISelInstrinsicLowering will expand llvm.memcpy.inline with non-constant lengths, while the codegen path for constant lengths is left unaltered.
1 parent 00f83a8 commit 522fd53

File tree

7 files changed

+69
-32
lines changed

7 files changed

+69
-32
lines changed

llvm/docs/LangRef.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15026,7 +15026,7 @@ Arguments:
1502615026
""""""""""
1502715027

1502815028
The first argument is a pointer to the destination, the second is a
15029-
pointer to the source. The third argument is a constant integer argument
15029+
pointer to the source. The third argument is an integer argument
1503015030
specifying the number of bytes to copy, and the fourth is a
1503115031
boolean indicating a volatile access.
1503215032

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,9 +1296,6 @@ class MemMoveInst : public MemTransferInst {
12961296
/// This class wraps the llvm.memcpy.inline intrinsic.
12971297
class MemCpyInlineInst : public MemCpyInst {
12981298
public:
1299-
ConstantInt *getLength() const {
1300-
return cast<ConstantInt>(MemCpyInst::getLength());
1301-
}
13021299
// Methods for support type inquiry through isa, cast, and dyn_cast:
13031300
static bool classof(const IntrinsicInst *I) {
13041301
return I->getIntrinsicID() == Intrinsic::memcpy_inline;

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -966,15 +966,14 @@ def int_memcpy : Intrinsic<[],
966966
// Memcpy semantic that is guaranteed to be inlined.
967967
// In particular this means that the generated code is not allowed to call any
968968
// external function.
969-
// The third argument (specifying the size) must be a constant.
970969
def int_memcpy_inline
971970
: Intrinsic<[],
972971
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty],
973972
[IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
974973
NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
975974
NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
976975
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
977-
ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
976+
ImmArg<ArgIndex<3>>]>;
978977

979978
def int_memmove : Intrinsic<[],
980979
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,

llvm/lib/Analysis/Lint.cpp

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,8 @@ void Lint::visitCallBase(CallBase &I) {
290290

291291
// TODO: Check more intrinsics
292292

293-
case Intrinsic::memcpy: {
293+
case Intrinsic::memcpy:
294+
case Intrinsic::memcpy_inline: {
294295
MemCpyInst *MCI = cast<MemCpyInst>(&I);
295296
visitMemoryReference(I, MemoryLocation::getForDest(MCI),
296297
MCI->getDestAlign(), nullptr, MemRef::Write);
@@ -311,23 +312,6 @@ void Lint::visitCallBase(CallBase &I) {
311312
"Undefined behavior: memcpy source and destination overlap", &I);
312313
break;
313314
}
314-
case Intrinsic::memcpy_inline: {
315-
MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I);
316-
const uint64_t Size = MCII->getLength()->getValue().getLimitedValue();
317-
visitMemoryReference(I, MemoryLocation::getForDest(MCII),
318-
MCII->getDestAlign(), nullptr, MemRef::Write);
319-
visitMemoryReference(I, MemoryLocation::getForSource(MCII),
320-
MCII->getSourceAlign(), nullptr, MemRef::Read);
321-
322-
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
323-
// isn't expressive enough for what we really want to do. Known partial
324-
// overlap is not distinguished from the case where nothing is known.
325-
const LocationSize LS = LocationSize::precise(Size);
326-
Check(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) !=
327-
AliasResult::MustAlias,
328-
"Undefined behavior: memcpy source and destination overlap", &I);
329-
break;
330-
}
331315
case Intrinsic::memmove: {
332316
MemMoveInst *MMI = cast<MemMoveInst>(&I);
333317
visitMemoryReference(I, MemoryLocation::getForDest(MMI),

llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,21 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
230230

231231
break;
232232
}
233+
case Intrinsic::memcpy_inline: {
234+
// Only expand llvm.memcpy.inline with non-constant length in this
235+
// codepath, leaving the current SelectionDAG expansion for constant
236+
// length memcpy intrinsics undisturbed.
237+
auto *Memcpy = cast<MemCpyInlineInst>(Inst);
238+
if (isa<ConstantInt>(Memcpy->getLength()))
239+
break;
240+
241+
Function *ParentFunc = Memcpy->getFunction();
242+
const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
243+
expandMemCpyAsLoop(Memcpy, TTI);
244+
Changed = true;
245+
Memcpy->eraseFromParent();
246+
break;
247+
}
233248
case Intrinsic::memmove: {
234249
auto *Memmove = cast<MemMoveInst>(Inst);
235250
Function *ParentFunc = Memmove->getFunction();
@@ -291,6 +306,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
291306
default:
292307
break;
293308
case Intrinsic::memcpy:
309+
case Intrinsic::memcpy_inline:
294310
case Intrinsic::memmove:
295311
case Intrinsic::memset:
296312
case Intrinsic::memset_inline:
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=x86_64-pc-linux-gnu -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
3+
4+
; Constant length memcpy.inline should be left unmodified.
5+
define void @memcpy_32(ptr %dst, ptr %src) nounwind {
6+
; CHECK-LABEL: define void @memcpy_32(
7+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
8+
; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 false)
9+
; CHECK-NEXT: tail call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 true)
10+
; CHECK-NEXT: ret void
11+
;
12+
call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 0)
13+
tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 1)
14+
ret void
15+
}
16+
17+
define void @memcpy_x(ptr %dst, ptr %src, i64 %x) nounwind {
18+
; CHECK-LABEL: define void @memcpy_x(
19+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
20+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[X]], 0
21+
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_MEMCPY_EXPANSION:.*]], label %[[POST_LOOP_MEMCPY_EXPANSION:.*]]
22+
; CHECK: [[LOOP_MEMCPY_EXPANSION]]:
23+
; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[LOOP_MEMCPY_EXPANSION]] ]
24+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX]]
25+
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
26+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[LOOP_INDEX]]
27+
; CHECK-NEXT: store i8 [[TMP3]], ptr [[TMP4]], align 1
28+
; CHECK-NEXT: [[TMP5]] = add i64 [[LOOP_INDEX]], 1
29+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]]
30+
; CHECK-NEXT: br i1 [[TMP6]], label %[[LOOP_MEMCPY_EXPANSION]], label %[[POST_LOOP_MEMCPY_EXPANSION]]
31+
; CHECK: [[POST_LOOP_MEMCPY_EXPANSION]]:
32+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[X]], 0
33+
; CHECK-NEXT: br i1 [[TMP7]], label %[[LOOP_MEMCPY_EXPANSION2:.*]], label %[[POST_LOOP_MEMCPY_EXPANSION1:.*]]
34+
; CHECK: [[LOOP_MEMCPY_EXPANSION2]]:
35+
; CHECK-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, %[[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP11:%.*]], %[[LOOP_MEMCPY_EXPANSION2]] ]
36+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX3]]
37+
; CHECK-NEXT: [[TMP9:%.*]] = load volatile i8, ptr [[TMP8]], align 1
38+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[LOOP_INDEX3]]
39+
; CHECK-NEXT: store volatile i8 [[TMP9]], ptr [[TMP10]], align 1
40+
; CHECK-NEXT: [[TMP11]] = add i64 [[LOOP_INDEX3]], 1
41+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[TMP11]], [[X]]
42+
; CHECK-NEXT: br i1 [[TMP12]], label %[[LOOP_MEMCPY_EXPANSION2]], label %[[POST_LOOP_MEMCPY_EXPANSION1]]
43+
; CHECK: [[POST_LOOP_MEMCPY_EXPANSION1]]:
44+
; CHECK-NEXT: ret void
45+
;
46+
call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 0)
47+
tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 1)
48+
ret void
49+
}

llvm/test/Verifier/intrinsic-immarg.ll

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,6 @@ define void @memcpy_inline_is_volatile(ptr %dest, ptr %src, i1 %is.volatile) {
3636
ret void
3737
}
3838

39-
define void @memcpy_inline_variable_size(ptr %dest, ptr %src, i32 %size) {
40-
; CHECK: immarg operand has non-immediate parameter
41-
; CHECK-NEXT: i32 %size
42-
; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %dest, ptr %src, i32 %size, i1 true)
43-
call void @llvm.memcpy.inline.p0.p0.i32(ptr %dest, ptr %src, i32 %size, i1 true)
44-
ret void
45-
}
46-
4739
declare void @llvm.memmove.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1)
4840
define void @memmove(ptr %dest, ptr %src, i1 %is.volatile) {
4941
; CHECK: immarg operand has non-immediate parameter

0 commit comments

Comments
 (0)