Skip to content

Commit adfd131

Browse files
authored
[AArch64][SVE] Add optimisation for SVE intrinsics with no active lanes (#73964)
This patch introduces optimisations for SVE intrinsic function calls which have all false predicates.
1 parent 29f98d6 commit adfd131

File tree

2 files changed

+1383
-39
lines changed

2 files changed

+1383
-39
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 59 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,9 +1406,23 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
14061406
return &II;
14071407
}
14081408

1409+
// Simplify operations where predicate has all inactive lanes or try to replace
1410+
// with _u form when all lanes are active
1411+
static std::optional<Instruction *>
1412+
instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II,
1413+
Intrinsic::ID IID) {
1414+
if (match(II.getOperand(0), m_ZeroInt())) {
1415+
// llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are
1416+
// inactive for sv[func]_m
1417+
return IC.replaceInstUsesWith(II, II.getOperand(1));
1418+
}
1419+
return instCombineSVEAllActive(II, IID);
1420+
}
1421+
14091422
static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
14101423
IntrinsicInst &II) {
1411-
if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u))
1424+
if (auto II_U =
1425+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u))
14121426
return II_U;
14131427
if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
14141428
Intrinsic::aarch64_sve_mla>(
@@ -1423,7 +1437,8 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
14231437

14241438
static std::optional<Instruction *>
14251439
instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
1426-
if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u))
1440+
if (auto II_U =
1441+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u))
14271442
return II_U;
14281443
if (auto FMLA =
14291444
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1465,7 +1480,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
14651480

14661481
static std::optional<Instruction *>
14671482
instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
1468-
if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u))
1483+
if (auto II_U =
1484+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u))
14691485
return II_U;
14701486
if (auto FMLS =
14711487
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1507,7 +1523,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
15071523

15081524
static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
15091525
IntrinsicInst &II) {
1510-
if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u))
1526+
if (auto II_U =
1527+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u))
15111528
return II_U;
15121529
if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
15131530
Intrinsic::aarch64_sve_mls>(
@@ -1523,11 +1540,6 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
15231540
auto *OpMultiplicand = II.getOperand(1);
15241541
auto *OpMultiplier = II.getOperand(2);
15251542

1526-
// Canonicalise a non _u intrinsic only.
1527-
if (II.getIntrinsicID() != IID)
1528-
if (auto II_U = instCombineSVEAllActive(II, IID))
1529-
return II_U;
1530-
15311543
// Return true if a given instruction is a unit splat value, false otherwise.
15321544
auto IsUnitSplat = [](auto *I) {
15331545
auto *SplatValue = getSplatValue(I);
@@ -1891,34 +1903,38 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
18911903
case Intrinsic::aarch64_sve_ptest_last:
18921904
return instCombineSVEPTest(IC, II);
18931905
case Intrinsic::aarch64_sve_fabd:
1894-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u);
1906+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u);
18951907
case Intrinsic::aarch64_sve_fadd:
18961908
return instCombineSVEVectorFAdd(IC, II);
18971909
case Intrinsic::aarch64_sve_fadd_u:
18981910
return instCombineSVEVectorFAddU(IC, II);
18991911
case Intrinsic::aarch64_sve_fdiv:
1900-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u);
1912+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u);
19011913
case Intrinsic::aarch64_sve_fmax:
1902-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u);
1914+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u);
19031915
case Intrinsic::aarch64_sve_fmaxnm:
1904-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u);
1916+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u);
19051917
case Intrinsic::aarch64_sve_fmin:
1906-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u);
1918+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u);
19071919
case Intrinsic::aarch64_sve_fminnm:
1908-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u);
1920+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u);
19091921
case Intrinsic::aarch64_sve_fmla:
1910-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u);
1922+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u);
19111923
case Intrinsic::aarch64_sve_fmls:
1912-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u);
1924+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u);
19131925
case Intrinsic::aarch64_sve_fmul:
1926+
if (auto II_U =
1927+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmul_u))
1928+
return II_U;
1929+
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
19141930
case Intrinsic::aarch64_sve_fmul_u:
19151931
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
19161932
case Intrinsic::aarch64_sve_fmulx:
1917-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u);
1933+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u);
19181934
case Intrinsic::aarch64_sve_fnmla:
1919-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u);
1935+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u);
19201936
case Intrinsic::aarch64_sve_fnmls:
1921-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u);
1937+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u);
19221938
case Intrinsic::aarch64_sve_fsub:
19231939
return instCombineSVEVectorFSub(IC, II);
19241940
case Intrinsic::aarch64_sve_fsub_u:
@@ -1930,52 +1946,56 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
19301946
Intrinsic::aarch64_sve_mla_u>(
19311947
IC, II, true);
19321948
case Intrinsic::aarch64_sve_mla:
1933-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u);
1949+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u);
19341950
case Intrinsic::aarch64_sve_mls:
1935-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u);
1951+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u);
19361952
case Intrinsic::aarch64_sve_mul:
1953+
if (auto II_U =
1954+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mul_u))
1955+
return II_U;
1956+
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
19371957
case Intrinsic::aarch64_sve_mul_u:
19381958
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
19391959
case Intrinsic::aarch64_sve_sabd:
1940-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u);
1960+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u);
19411961
case Intrinsic::aarch64_sve_smax:
1942-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u);
1962+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u);
19431963
case Intrinsic::aarch64_sve_smin:
1944-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u);
1964+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u);
19451965
case Intrinsic::aarch64_sve_smulh:
1946-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u);
1966+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u);
19471967
case Intrinsic::aarch64_sve_sub:
19481968
return instCombineSVEVectorSub(IC, II);
19491969
case Intrinsic::aarch64_sve_sub_u:
19501970
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
19511971
Intrinsic::aarch64_sve_mls_u>(
19521972
IC, II, true);
19531973
case Intrinsic::aarch64_sve_uabd:
1954-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u);
1974+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u);
19551975
case Intrinsic::aarch64_sve_umax:
1956-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u);
1976+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u);
19571977
case Intrinsic::aarch64_sve_umin:
1958-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u);
1978+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u);
19591979
case Intrinsic::aarch64_sve_umulh:
1960-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u);
1980+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u);
19611981
case Intrinsic::aarch64_sve_asr:
1962-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u);
1982+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u);
19631983
case Intrinsic::aarch64_sve_lsl:
1964-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u);
1984+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u);
19651985
case Intrinsic::aarch64_sve_lsr:
1966-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u);
1986+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u);
19671987
case Intrinsic::aarch64_sve_and:
1968-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u);
1988+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u);
19691989
case Intrinsic::aarch64_sve_bic:
1970-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u);
1990+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u);
19711991
case Intrinsic::aarch64_sve_eor:
1972-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u);
1992+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u);
19731993
case Intrinsic::aarch64_sve_orr:
1974-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u);
1994+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u);
19751995
case Intrinsic::aarch64_sve_sqsub:
1976-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u);
1996+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u);
19771997
case Intrinsic::aarch64_sve_uqsub:
1978-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u);
1998+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u);
19791999
case Intrinsic::aarch64_sve_tbl:
19802000
return instCombineSVETBL(IC, II);
19812001
case Intrinsic::aarch64_sve_uunpkhi:

0 commit comments

Comments
 (0)