Skip to content

Commit 2cbc47b

Browse files
committed
[AArch64][SVE] Removed uneccesary optimisations for _u type intrinsics
This patch removes the optimisations for _u intrinsics will no active lanes as this should never occur.
1 parent fa66a70 commit 2cbc47b

File tree

2 files changed

+276
-209
lines changed

2 files changed

+276
-209
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 15 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,25 +1406,17 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
14061406
return &II;
14071407
}
14081408

1409-
// Optimize operations that take an all false predicate or send them for
1410-
// canonicalization.
1409+
// Simplify operations where predicate has all inactive lanes or try to replace
1410+
// with _u form when all lanes are active
14111411
static std::optional<Instruction *>
14121412
instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II,
14131413
Intrinsic::ID IID) {
14141414
if (match(II.getOperand(0), m_ZeroInt())) {
1415-
if (II.getIntrinsicID() != IID) {
1416-
// llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are
1417-
// inactive for sv[func]_m or sv[func]_z
1418-
return IC.replaceInstUsesWith(II, II.getOperand(1));
1419-
} else {
1420-
// llvm_ir_u, pred(0), op1, op2 - Spec says to return undef when all lanes
1421-
// are inactive for sv[func]_x
1422-
return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
1423-
}
1415+
// llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are
1416+
// inactive for sv[func]_m or sv[func]_z
1417+
return IC.replaceInstUsesWith(II, II.getOperand(1));
14241418
}
1425-
if (II.getIntrinsicID() != IID)
1426-
return instCombineSVEAllActive(II, IID);
1427-
return std::nullopt;
1419+
return instCombineSVEAllActive(II, IID);
14281420
}
14291421

14301422
static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
@@ -1443,18 +1435,6 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
14431435
return std::nullopt;
14441436
}
14451437

1446-
static std::optional<Instruction *>
1447-
instCombineSVEVectorAddU(InstCombiner &IC, IntrinsicInst &II) {
1448-
if (auto II_U =
1449-
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u))
1450-
return II_U;
1451-
else {
1452-
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1453-
Intrinsic::aarch64_sve_mla_u>(
1454-
IC, II, true);
1455-
}
1456-
}
1457-
14581438
static std::optional<Instruction *>
14591439
instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
14601440
if (auto II_U =
@@ -1480,9 +1460,6 @@ instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
14801460

14811461
static std::optional<Instruction *>
14821462
instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
1483-
if (auto II_U =
1484-
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u))
1485-
return II_U;
14861463
if (auto FMLA =
14871464
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
14881465
Intrinsic::aarch64_sve_fmla>(IC, II,
@@ -1526,9 +1503,6 @@ instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
15261503

15271504
static std::optional<Instruction *>
15281505
instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
1529-
if (auto II_U =
1530-
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u))
1531-
return II_U;
15321506
if (auto FMLS =
15331507
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
15341508
Intrinsic::aarch64_sve_fmls>(IC, II,
@@ -1559,27 +1533,16 @@ static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
15591533
return std::nullopt;
15601534
}
15611535

1562-
static std::optional<Instruction *>
1563-
instCombineSVEVectorSubU(InstCombiner &IC, IntrinsicInst &II) {
1564-
if (auto II_U =
1565-
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u))
1566-
return II_U;
1567-
else {
1568-
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1569-
Intrinsic::aarch64_sve_mls_u>(
1570-
IC, II, true);
1571-
}
1572-
}
1573-
15741536
static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
15751537
IntrinsicInst &II,
15761538
Intrinsic::ID IID) {
15771539
auto *OpPredicate = II.getOperand(0);
15781540
auto *OpMultiplicand = II.getOperand(1);
15791541
auto *OpMultiplier = II.getOperand(2);
15801542

1581-
if (auto II_U = instCombineSVEAllOrNoActive(IC, II, IID))
1582-
return II_U;
1543+
if (II.getIntrinsicID() != IID)
1544+
if (auto II_U = instCombineSVEAllOrNoActive(IC, II, IID))
1545+
return II_U;
15831546

15841547
// Return true if a given instruction is a unit splat value, false otherwise.
15851548
auto IsUnitSplat = [](auto *I) {
@@ -1944,44 +1907,33 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
19441907
case Intrinsic::aarch64_sve_ptest_last:
19451908
return instCombineSVEPTest(IC, II);
19461909
case Intrinsic::aarch64_sve_fabd:
1947-
case Intrinsic::aarch64_sve_fabd_u:
19481910
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u);
19491911
case Intrinsic::aarch64_sve_fadd:
19501912
return instCombineSVEVectorFAdd(IC, II);
19511913
case Intrinsic::aarch64_sve_fadd_u:
19521914
return instCombineSVEVectorFAddU(IC, II);
19531915
case Intrinsic::aarch64_sve_fdiv:
1954-
case Intrinsic::aarch64_sve_fdiv_u:
19551916
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u);
19561917
case Intrinsic::aarch64_sve_fmax:
1957-
case Intrinsic::aarch64_sve_fmax_u:
19581918
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u);
19591919
case Intrinsic::aarch64_sve_fmaxnm:
1960-
case Intrinsic::aarch64_sve_fmaxnm_u:
19611920
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u);
19621921
case Intrinsic::aarch64_sve_fmin:
1963-
case Intrinsic::aarch64_sve_fmin_u:
19641922
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u);
19651923
case Intrinsic::aarch64_sve_fminnm:
1966-
case Intrinsic::aarch64_sve_fminnm_u:
19671924
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u);
19681925
case Intrinsic::aarch64_sve_fmla:
1969-
case Intrinsic::aarch64_sve_fmla_u:
19701926
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u);
19711927
case Intrinsic::aarch64_sve_fmls:
1972-
case Intrinsic::aarch64_sve_fmls_u:
19731928
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u);
19741929
case Intrinsic::aarch64_sve_fmul:
19751930
case Intrinsic::aarch64_sve_fmul_u:
19761931
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
19771932
case Intrinsic::aarch64_sve_fmulx:
1978-
case Intrinsic::aarch64_sve_fmulx_u:
19791933
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u);
19801934
case Intrinsic::aarch64_sve_fnmla:
1981-
case Intrinsic::aarch64_sve_fnmla_u:
19821935
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u);
19831936
case Intrinsic::aarch64_sve_fnmls:
1984-
case Intrinsic::aarch64_sve_fnmls_u:
19851937
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u);
19861938
case Intrinsic::aarch64_sve_fsub:
19871939
return instCombineSVEVectorFSub(IC, II);
@@ -1990,70 +1942,55 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
19901942
case Intrinsic::aarch64_sve_add:
19911943
return instCombineSVEVectorAdd(IC, II);
19921944
case Intrinsic::aarch64_sve_add_u:
1993-
return instCombineSVEVectorAddU(IC, II);
1945+
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1946+
Intrinsic::aarch64_sve_mla_u>(
1947+
IC, II, true);
19941948
case Intrinsic::aarch64_sve_mla:
1995-
case Intrinsic::aarch64_sve_mla_u:
19961949
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u);
19971950
case Intrinsic::aarch64_sve_mls:
1998-
case Intrinsic::aarch64_sve_mls_u:
19991951
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u);
20001952
case Intrinsic::aarch64_sve_mul:
20011953
case Intrinsic::aarch64_sve_mul_u:
20021954
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
20031955
case Intrinsic::aarch64_sve_sabd:
2004-
case Intrinsic::aarch64_sve_sabd_u:
20051956
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u);
20061957
case Intrinsic::aarch64_sve_smax:
2007-
case Intrinsic::aarch64_sve_smax_u:
20081958
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u);
20091959
case Intrinsic::aarch64_sve_smin:
2010-
case Intrinsic::aarch64_sve_smin_u:
20111960
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u);
20121961
case Intrinsic::aarch64_sve_smulh:
2013-
case Intrinsic::aarch64_sve_smulh_u:
20141962
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u);
20151963
case Intrinsic::aarch64_sve_sub:
20161964
return instCombineSVEVectorSub(IC, II);
20171965
case Intrinsic::aarch64_sve_sub_u:
2018-
return instCombineSVEVectorSubU(IC, II);
1966+
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1967+
Intrinsic::aarch64_sve_mls_u>(
1968+
IC, II, true);
20191969
case Intrinsic::aarch64_sve_uabd:
2020-
case Intrinsic::aarch64_sve_uabd_u:
20211970
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u);
20221971
case Intrinsic::aarch64_sve_umax:
2023-
case Intrinsic::aarch64_sve_umax_u:
20241972
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u);
20251973
case Intrinsic::aarch64_sve_umin:
2026-
case Intrinsic::aarch64_sve_umin_u:
20271974
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u);
20281975
case Intrinsic::aarch64_sve_umulh:
2029-
case Intrinsic::aarch64_sve_umulh_u:
20301976
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u);
20311977
case Intrinsic::aarch64_sve_asr:
2032-
case Intrinsic::aarch64_sve_asr_u:
20331978
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u);
20341979
case Intrinsic::aarch64_sve_lsl:
2035-
case Intrinsic::aarch64_sve_lsl_u:
20361980
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u);
20371981
case Intrinsic::aarch64_sve_lsr:
2038-
case Intrinsic::aarch64_sve_lsr_u:
20391982
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u);
20401983
case Intrinsic::aarch64_sve_and:
2041-
case Intrinsic::aarch64_sve_and_u:
20421984
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u);
20431985
case Intrinsic::aarch64_sve_bic:
2044-
case Intrinsic::aarch64_sve_bic_u:
20451986
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u);
20461987
case Intrinsic::aarch64_sve_eor:
2047-
case Intrinsic::aarch64_sve_eor_u:
20481988
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u);
20491989
case Intrinsic::aarch64_sve_orr:
2050-
case Intrinsic::aarch64_sve_orr_u:
20511990
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u);
20521991
case Intrinsic::aarch64_sve_sqsub:
2053-
case Intrinsic::aarch64_sve_sqsub_u:
20541992
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u);
20551993
case Intrinsic::aarch64_sve_uqsub:
2056-
case Intrinsic::aarch64_sve_uqsub_u:
20571994
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u);
20581995
case Intrinsic::aarch64_sve_tbl:
20591996
return instCombineSVETBL(IC, II);

0 commit comments

Comments
 (0)