Skip to content

Commit 6a38c9a

Browse files
committed
Responding to review comments
Change-Id: Ie8c40972ae07e568d767ace37b9dda0f77272569
1 parent a602844 commit 6a38c9a

File tree

2 files changed

+78
-21
lines changed

2 files changed

+78
-21
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,15 +1460,16 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost(
14601460

14611461
// When targets have both DSP and MVE we find that the
14621462
// the compiler will attempt to vectorize as well as using
1463-
// scalar SMLAL operations. This is in cases where we have
1463+
// scalar (S/U)MLAL operations. This is in cases where we have
14641464
// the pattern ext(mul(ext(i16), ext(i16))) we find
1465-
// that generated codegen performs better when only using SMLAL scalar
1466-
// ops instead of trying to mix vector ops with SMLAL ops. We therefore
1465+
// that generated codegen performs better when only using (S/U)MLAL scalar
1466+
// ops instead of trying to mix vector ops with (S/U)MLAL ops. We therefore
14671467
// check if a mul instruction is used in a SMLAL pattern.
1468-
auto MulInSMLALPattern = [&](const Instruction *I, unsigned Opcode,
1469-
Type *Ty) -> bool {
1470-
if (!ST->hasDSP() || !ST->hasMVEIntegerOps())
1468+
auto MulInDSPMLALPattern = [&](const Instruction *I, unsigned Opcode,
1469+
Type *Ty) -> bool {
1470+
if (!ST->hasDSP())
14711471
return false;
1472+
14721473
if (!I)
14731474
return false;
14741475

@@ -1478,30 +1479,43 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost(
14781479
if (Ty->isVectorTy())
14791480
return false;
14801481

1481-
auto IsSExtInst = [](const Value *V) -> bool {
1482-
return (dyn_cast<SExtInst>(V)) ? true : false;
1482+
auto IsSExtInst = [](const Value *V) -> bool { return isa<SExtInst>(V); };
1483+
auto IsZExtInst = [](const Value *V) -> bool { return isa<ZExtInst>(V); };
1484+
auto IsExtInst = [&, IsSExtInst, IsZExtInst](const Value *V) -> bool {
1485+
return IsSExtInst(V) || IsZExtInst(V);
1486+
};
1487+
auto IsExtensionFromHalf = [&, IsSExtInst,
1488+
IsZExtInst](const Value *V) -> bool {
1489+
if (IsSExtInst(V))
1490+
return dyn_cast<SExtInst>(V)->getOperand(0)->getType()->isIntegerTy(16);
1491+
if (IsZExtInst(V))
1492+
return dyn_cast<ZExtInst>(V)->getOperand(0)->getType()->isIntegerTy(16);
1493+
return false;
14831494
};
14841495

1485-
// We check the arguments of the function to see if they're extends
1496+
// We check the arguments of the instruction to see if they're extends
14861497
auto *BinOp = dyn_cast<BinaryOperator>(I);
14871498
if (!BinOp)
14881499
return false;
1489-
auto *Op0 = BinOp->getOperand(0);
1490-
auto *Op1 = BinOp->getOperand(1);
1491-
if (Op0 && Op1 && IsSExtInst(Op0) && IsSExtInst(Op1)) {
1492-
// In this case we're interested in an ext of an i16
1493-
if (!Op0->getType()->isIntegerTy(32) || !Op1->getType()->isIntegerTy(32))
1500+
Value *Op0 = BinOp->getOperand(0);
1501+
Value *Op1 = BinOp->getOperand(1);
1502+
if (IsExtInst(Op0) && IsExtInst(Op1)) {
1503+
// We're interested in an ext of an i16
1504+
if (!I->getType()->isIntegerTy(32) || !IsExtensionFromHalf(Op0) ||
1505+
!IsExtensionFromHalf(Op1))
14941506
return false;
14951507
// We need to check if this result will be further extended to i64
1508+
// and that all these uses are SExt
14961509
for (auto *U : I->users())
1497-
if (IsSExtInst(dyn_cast<Value>(U)))
1498-
return true;
1510+
if (!IsExtInst(dyn_cast<Value>(U)))
1511+
return false;
1512+
return true;
14991513
}
15001514

15011515
return false;
15021516
};
15031517

1504-
if (MulInSMLALPattern(CxtI, Opcode, Ty))
1518+
if (MulInDSPMLALPattern(CxtI, Opcode, Ty))
15051519
return 0;
15061520

15071521
// Default to cheap (throughput/size of 1 instruction) but adjust throughput

llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,20 @@
1-
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+mve,+dsp < %s | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp < %s | FileCheck %s
3+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP
24
define i64 @test(i16 %a, i16 %b) {
35
; CHECK-LABEL: 'test'
4-
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
6+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
7+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
11+
;
12+
; CHECK-NO-DSP-LABEL: 'test'
13+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
14+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
15+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
16+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
17+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms
518
;
619
%as = sext i16 %a to i32
720
%bs = sext i16 %b to i32
@@ -12,7 +25,20 @@ define i64 @test(i16 %a, i16 %b) {
1225

1326
define i64 @withadd(i16 %a, i16 %b, i64 %c) {
1427
; CHECK-LABEL: 'withadd'
15-
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
28+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
29+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
30+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
31+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
32+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
33+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
34+
;
35+
; CHECK-NO-DSP-LABEL: 'withadd'
36+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32
37+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32
38+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
39+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
40+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
41+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
1642
;
1743
%as = sext i16 %a to i32
1844
%bs = sext i16 %b to i32
@@ -24,7 +50,24 @@ define i64 @withadd(i16 %a, i16 %b, i64 %c) {
2450

2551
define i64 @withloads(ptr %pa, ptr %pb, i64 %c) {
2652
; CHECK-LABEL: 'withloads'
27-
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
53+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2
54+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2
55+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32
56+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32
57+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs
58+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
59+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
60+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
61+
;
62+
; CHECK-NO-DSP-LABEL: 'withloads'
63+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2
64+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2
65+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32
66+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32
67+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs
68+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64
69+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms
70+
; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
2871
;
2972
%a = load i16, ptr %pa
3073
%b = load i16, ptr %pb

0 commit comments

Comments
 (0)