Skip to content

Commit ff5095d

Browse files
authored
[TTI] Provide a cost for memset_pattern which matches the libcall (#139978)
The motivation is that differences in unrolling were noticed when trying to switch from the libcall to the intrinsic. There are likely also differences not yet noticed in other cost based decisions - such as inlining, and possibly vectorization. Neither cost is a good, well considered, cost but for the moment, let's have them be equal to simplify migration. We can come back and refine this once we have it being exercised by default.
1 parent 61a98eb commit ff5095d

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2408,6 +2408,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
24082408
CmpInst::ICMP_ULT, CostKind);
24092409
return Cost;
24102410
}
2411+
case Intrinsic::experimental_memset_pattern:
2412+
// This cost is set to match the cost of the memset_pattern16 libcall.
2413+
// It should likely be re-evaluated after migration to this intrinsic
2414+
// is complete.
2415+
return TTI::TCC_Basic * 4;
24112416
case Intrinsic::abs:
24122417
ISD = ISD::ABS;
24132418
break;
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -mtriple=x86_64-apple-darwin10.0.0 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
3+
4+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
5+
6+
target triple = "x86_64-apple-darwin10.0.0"
7+
8+
@.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
9+
10+
define void @via_libcall(ptr %p) nounwind ssp {
11+
; CHECK-LABEL: 'via_libcall'
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @memset_pattern4(ptr %p, ptr @.memset_pattern, i64 200)
13+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @memset_pattern8(ptr %p, ptr @.memset_pattern, i64 200)
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @memset_pattern16(ptr %p, ptr @.memset_pattern, i64 200)
15+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
16+
;
17+
call void @memset_pattern4(ptr %p, ptr @.memset_pattern, i64 200)
18+
call void @memset_pattern8(ptr %p, ptr @.memset_pattern, i64 200)
19+
call void @memset_pattern16(ptr %p, ptr @.memset_pattern, i64 200)
20+
ret void
21+
}
22+
23+
declare void @memset_pattern4(ptr, ptr, i64)
24+
declare void @memset_pattern8(ptr, ptr, i64)
25+
declare void @memset_pattern16(ptr, ptr, i64)
26+
27+
define void @via_intrinsic(ptr %p) {
28+
; CHECK-LABEL: 'via_intrinsic'
29+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.memset.pattern.p0.i16.i64(ptr align 4 %p, i16 2, i64 100, i1 false)
30+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 %p, i32 2, i64 50, i1 false)
31+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 4 %p, i64 2, i64 25, i1 false)
32+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.memset.pattern.p0.i128.i64(ptr align 4 %p, i128 2, i64 12, i1 false)
33+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
34+
;
35+
call void @llvm.experimental.memset.pattern(ptr align 4 %p, i16 2, i64 100, i1 false)
36+
call void @llvm.experimental.memset.pattern(ptr align 4 %p, i32 2, i64 50, i1 false)
37+
call void @llvm.experimental.memset.pattern(ptr align 4 %p, i64 2, i64 25, i1 false)
38+
call void @llvm.experimental.memset.pattern(ptr align 4 %p, i128 2, i64 12, i1 false)
39+
ret void
40+
}

0 commit comments

Comments
 (0)