Skip to content

Commit d526aab

Browse files
author
git apple-llvm automerger
committed
Merge commit '14f1200a228e' from apple/main into swift/next
2 parents 9e366d0 + 14f1200 commit d526aab

File tree

3 files changed

+55
-0
lines changed

3 files changed

+55
-0
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,21 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
14091409
return ScalarCost;
14101410
}
14111411

1412+
int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1413+
TTI::TargetCostKind CostKind) {
1414+
// Currently we make a somewhat optimistic assumption that active_lane_mask's
1415+
// are always free. In reality it may be freely folded into a tail predicated
1416+
// loop, expanded into a VCPT or expanded into a lot of add/icmp code. We
1417+
// may need to improve this in the future, but being able to detect if it
1418+
// is free or not involves looking at a lot of other code. We currently assume
1419+
// that the vectorizer inserted these, and knew what it was doing in adding
1420+
// one.
1421+
if (ST->hasMVEIntegerOps() && ICA.getID() == Intrinsic::get_active_lane_mask)
1422+
return 0;
1423+
1424+
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1425+
}
1426+
14121427
bool ARMTTIImpl::isLoweredToCall(const Function *F) {
14131428
if (!F->isIntrinsic())
14141429
BaseT::isLoweredToCall(F);

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,9 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
247247
Align Alignment, TTI::TargetCostKind CostKind,
248248
const Instruction *I = nullptr);
249249

250+
int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
251+
TTI::TargetCostKind CostKind);
252+
250253
bool maybeLoweredToCall(Instruction &I);
251254
bool isLoweredToCall(const Function *F);
252255
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2+
; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze | FileCheck %s
3+
4+
; Note that these instructions like this (not in a look that could be tail
5+
; predicated) should not really be free. We currently assume that all active
6+
; lane masks are free.
7+
8+
define void @v4i32(i32 %index, i32 %TC) {
9+
; CHECK-LABEL: 'v4i32'
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
12+
;
13+
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
14+
ret void
15+
}
16+
17+
define void @v8i16(i32 %index, i32 %TC) {
18+
; CHECK-LABEL: 'v8i16'
19+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
20+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
21+
;
22+
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
23+
ret void
24+
}
25+
26+
define void @v16i8(i32 %index, i32 %TC) {
27+
; CHECK-LABEL: 'v16i8'
28+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
29+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
30+
;
31+
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
32+
ret void
33+
}
34+
35+
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
36+
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
37+
declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)

0 commit comments

Comments
 (0)