Skip to content

Commit 93c8235

Browse files
committed
[AArch64TTI] Compute imm materialization cost for AArch64 intrinsics
Currently, getIntImmCost returns TCC_Free for almost all intrinsics. For most AArch64 specific intrinsics however, it looks like integer constants cannot be folded into most of them (at least the ones I checked). Unless we know that we can fold integer operands with the intrinsic, we handle more cases correctly by returning the cost to materialize the immediate than return TCC_Free. Reviewers: SjoerdMeijer, dmgreen, t.p.northover, ributzka Reviewed By: SjoerdMeijer Differential Revision: https://reviews.llvm.org/D70669
1 parent 17e537b commit 93c8235

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,12 @@ int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
156156
if (BitSize == 0)
157157
return TTI::TCC_Free;
158158

159+
// Most (all?) AArch64 intrinsics do not support folding immediates into the
160+
// selected instruction, so we compute the materialization cost for the
161+
// immediate directly.
162+
if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
163+
return AArch64TTIImpl::getIntImmCost(Imm, Ty);
164+
159165
switch (IID) {
160166
default:
161167
return TTI::TCC_Free;
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -mtriple=arm64-darwin-unknown -S -consthoist < %s | FileCheck %s
3+
4+
; Make sure we hoist constants out of intrinsics.
5+
6+
define void @test_stxr(i64* %ptr) {
7+
; CHECK-LABEL: @test_stxr(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: [[CONST:%.*]] = bitcast i64 -9223372036317904832 to i64
10+
; CHECK-NEXT: [[PTR_0:%.*]] = getelementptr i64, i64* [[PTR:%.*]], i64 0
11+
; CHECK-NEXT: [[CONST_MAT:%.*]] = add i64 [[CONST]], -64
12+
; CHECK-NEXT: [[BAR_0:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[CONST_MAT]], i64* [[PTR_0]])
13+
; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr i64, i64* [[PTR]], i64 1
14+
; CHECK-NEXT: [[BAR_1:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[CONST]], i64* [[PTR_1]])
15+
; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr i64, i64* [[PTR]], i64 2
16+
; CHECK-NEXT: [[CONST_MAT1:%.*]] = add i64 [[CONST]], 64
17+
; CHECK-NEXT: [[BAR_2:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[CONST_MAT1]], i64* [[PTR_2]])
18+
; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr i64, i64* [[PTR]], i64 3
19+
; CHECK-NEXT: [[CONST_MAT2:%.*]] = add i64 [[CONST]], 128
20+
; CHECK-NEXT: [[BAR_3:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[CONST_MAT2]], i64* [[PTR_3]])
21+
; CHECK-NEXT: ret void
22+
;
23+
entry:
24+
%ptr.0 = getelementptr i64, i64* %ptr, i64 0
25+
%bar.0 = call i32 @llvm.aarch64.stxr.p0i64(i64 -9223372036317904896, i64* %ptr.0)
26+
%ptr.1 = getelementptr i64, i64* %ptr, i64 1
27+
%bar.1 = call i32 @llvm.aarch64.stxr.p0i64(i64 -9223372036317904832, i64* %ptr.1)
28+
%ptr.2 = getelementptr i64, i64* %ptr, i64 2
29+
%bar.2 = call i32 @llvm.aarch64.stxr.p0i64(i64 -9223372036317904768, i64* %ptr.2)
30+
%ptr.3 = getelementptr i64, i64* %ptr, i64 3
31+
%bar.3 = call i32 @llvm.aarch64.stxr.p0i64(i64 -9223372036317904704, i64* %ptr.3)
32+
ret void
33+
}
34+
35+
declare i32 @llvm.aarch64.stxr.p0i64(i64 , i64*)
36+
37+
define i64 @test_udiv(i64 %x) {
38+
; CHECK-LABEL: @test_udiv(
39+
; CHECK-NEXT: entry:
40+
; CHECK-NEXT: [[CONST:%.*]] = bitcast i64 -9223372036317904832 to i64
41+
; CHECK-NEXT: [[CONST_MAT:%.*]] = add i64 [[CONST]], -64
42+
; CHECK-NEXT: [[BAR_0:%.*]] = call i64 @llvm.aarch64.udiv.i64(i64 [[CONST_MAT]], i64 [[X:%.*]])
43+
; CHECK-NEXT: [[BAR_1:%.*]] = call i64 @llvm.aarch64.udiv.i64(i64 [[CONST]], i64 [[X]])
44+
; CHECK-NEXT: [[CONST_MAT1:%.*]] = add i64 [[CONST]], 64
45+
; CHECK-NEXT: [[BAR_2:%.*]] = call i64 @llvm.aarch64.udiv.i64(i64 [[CONST_MAT1]], i64 [[X]])
46+
; CHECK-NEXT: [[CONST_MAT2:%.*]] = add i64 [[CONST]], 128
47+
; CHECK-NEXT: [[BAR_3:%.*]] = call i64 @llvm.aarch64.udiv.i64(i64 [[CONST_MAT2]], i64 [[X]])
48+
; CHECK-NEXT: [[RES_1:%.*]] = add i64 [[BAR_0]], [[BAR_1]]
49+
; CHECK-NEXT: [[RES_2:%.*]] = add i64 [[RES_1]], [[BAR_2]]
50+
; CHECK-NEXT: [[RES_3:%.*]] = add i64 [[RES_2]], [[BAR_3]]
51+
; CHECK-NEXT: ret i64 [[RES_3]]
52+
;
53+
entry:
54+
%bar.0 = call i64 @llvm.aarch64.udiv.i64.i64(i64 -9223372036317904896, i64 %x)
55+
%bar.1 = call i64 @llvm.aarch64.udiv.i64.i64(i64 -9223372036317904832, i64 %x)
56+
%bar.2 = call i64 @llvm.aarch64.udiv.i64.i64(i64 -9223372036317904768, i64 %x)
57+
%bar.3 = call i64 @llvm.aarch64.udiv.i64.i64(i64 -9223372036317904704, i64 %x)
58+
%res.1 = add i64 %bar.0, %bar.1
59+
%res.2 = add i64 %res.1, %bar.2
60+
%res.3 = add i64 %res.2, %bar.3
61+
ret i64 %res.3
62+
}
63+
64+
declare i64 @llvm.aarch64.udiv.i64.i64(i64, i64)
65+
66+
define void @test_free_intrinsics(i64 %x, i8* %ptr) {
67+
; CHECK-LABEL: @test_free_intrinsics(
68+
; CHECK-NEXT: entry:
69+
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 100000000032, i8* [[PTR:%.*]])
70+
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 100000000064, i8* [[PTR]])
71+
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 100000000128, i8* [[PTR]])
72+
; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 100000000256, i8* [[PTR]])
73+
; CHECK-NEXT: call void @llvm.invariant.end.p0i8({}* [[I]], i64 100000000256, i8* [[PTR]])
74+
; CHECK-NEXT: ret void
75+
;
76+
entry:
77+
call void @llvm.lifetime.start.p0i8(i64 100000000032, i8* %ptr)
78+
call void @llvm.lifetime.start.p0i8(i64 100000000064, i8* %ptr)
79+
call void @llvm.lifetime.end.p0i8(i64 100000000128, i8* %ptr)
80+
%i = call {}* @llvm.invariant.start.p0i8(i64 100000000256, i8* %ptr)
81+
call void @llvm.invariant.end.p0i8({}* %i, i64 100000000256, i8* %ptr)
82+
ret void
83+
}
84+
85+
declare void @llvm.lifetime.start.p0i8(i64, i8*)
86+
declare void @llvm.lifetime.end.p0i8(i64, i8*)
87+
88+
declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture)
89+
declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture)

0 commit comments

Comments
 (0)