@@ -1406,9 +1406,23 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
1406
1406
return &II;
1407
1407
}
1408
1408
1409
+ // Simplify operations where predicate has all inactive lanes or try to replace
1410
+ // with _u form when all lanes are active
1411
+ static std::optional<Instruction *>
1412
+ instCombineSVEAllOrNoActive (InstCombiner &IC, IntrinsicInst &II,
1413
+ Intrinsic::ID IID) {
1414
+ if (match (II.getOperand (0 ), m_ZeroInt ())) {
1415
+ // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are
1416
+ // inactive for sv[func]_m
1417
+ return IC.replaceInstUsesWith (II, II.getOperand (1 ));
1418
+ }
1419
+ return instCombineSVEAllActive (II, IID);
1420
+ }
1421
+
1409
1422
static std::optional<Instruction *> instCombineSVEVectorAdd (InstCombiner &IC,
1410
1423
IntrinsicInst &II) {
1411
- if (auto II_U = instCombineSVEAllActive (II, Intrinsic::aarch64_sve_add_u))
1424
+ if (auto II_U =
1425
+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_add_u))
1412
1426
return II_U;
1413
1427
if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
1414
1428
Intrinsic::aarch64_sve_mla>(
@@ -1423,7 +1437,8 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
1423
1437
1424
1438
static std::optional<Instruction *>
1425
1439
instCombineSVEVectorFAdd (InstCombiner &IC, IntrinsicInst &II) {
1426
- if (auto II_U = instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fadd_u))
1440
+ if (auto II_U =
1441
+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fadd_u))
1427
1442
return II_U;
1428
1443
if (auto FMLA =
1429
1444
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1465,7 +1480,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
1465
1480
1466
1481
static std::optional<Instruction *>
1467
1482
instCombineSVEVectorFSub (InstCombiner &IC, IntrinsicInst &II) {
1468
- if (auto II_U = instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fsub_u))
1483
+ if (auto II_U =
1484
+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fsub_u))
1469
1485
return II_U;
1470
1486
if (auto FMLS =
1471
1487
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1507,7 +1523,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
1507
1523
1508
1524
static std::optional<Instruction *> instCombineSVEVectorSub (InstCombiner &IC,
1509
1525
IntrinsicInst &II) {
1510
- if (auto II_U = instCombineSVEAllActive (II, Intrinsic::aarch64_sve_sub_u))
1526
+ if (auto II_U =
1527
+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_sub_u))
1511
1528
return II_U;
1512
1529
if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
1513
1530
Intrinsic::aarch64_sve_mls>(
@@ -1523,11 +1540,6 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
1523
1540
auto *OpMultiplicand = II.getOperand (1 );
1524
1541
auto *OpMultiplier = II.getOperand (2 );
1525
1542
1526
- // Canonicalise a non _u intrinsic only.
1527
- if (II.getIntrinsicID () != IID)
1528
- if (auto II_U = instCombineSVEAllActive (II, IID))
1529
- return II_U;
1530
-
1531
1543
// Return true if a given instruction is a unit splat value, false otherwise.
1532
1544
auto IsUnitSplat = [](auto *I) {
1533
1545
auto *SplatValue = getSplatValue (I);
@@ -1891,34 +1903,38 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
1891
1903
case Intrinsic::aarch64_sve_ptest_last:
1892
1904
return instCombineSVEPTest (IC, II);
1893
1905
case Intrinsic::aarch64_sve_fabd:
1894
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fabd_u);
1906
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fabd_u);
1895
1907
case Intrinsic::aarch64_sve_fadd:
1896
1908
return instCombineSVEVectorFAdd (IC, II);
1897
1909
case Intrinsic::aarch64_sve_fadd_u:
1898
1910
return instCombineSVEVectorFAddU (IC, II);
1899
1911
case Intrinsic::aarch64_sve_fdiv:
1900
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fdiv_u);
1912
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fdiv_u);
1901
1913
case Intrinsic::aarch64_sve_fmax:
1902
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fmax_u);
1914
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmax_u);
1903
1915
case Intrinsic::aarch64_sve_fmaxnm:
1904
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fmaxnm_u);
1916
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmaxnm_u);
1905
1917
case Intrinsic::aarch64_sve_fmin:
1906
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fmin_u);
1918
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmin_u);
1907
1919
case Intrinsic::aarch64_sve_fminnm:
1908
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fminnm_u);
1920
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fminnm_u);
1909
1921
case Intrinsic::aarch64_sve_fmla:
1910
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fmla_u);
1922
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmla_u);
1911
1923
case Intrinsic::aarch64_sve_fmls:
1912
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fmls_u);
1924
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmls_u);
1913
1925
case Intrinsic::aarch64_sve_fmul:
1926
+ if (auto II_U =
1927
+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmul_u))
1928
+ return II_U;
1929
+ return instCombineSVEVectorMul (IC, II, Intrinsic::aarch64_sve_fmul_u);
1914
1930
case Intrinsic::aarch64_sve_fmul_u:
1915
1931
return instCombineSVEVectorMul (IC, II, Intrinsic::aarch64_sve_fmul_u);
1916
1932
case Intrinsic::aarch64_sve_fmulx:
1917
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fmulx_u);
1933
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmulx_u);
1918
1934
case Intrinsic::aarch64_sve_fnmla:
1919
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fnmla_u);
1935
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fnmla_u);
1920
1936
case Intrinsic::aarch64_sve_fnmls:
1921
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_fnmls_u);
1937
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fnmls_u);
1922
1938
case Intrinsic::aarch64_sve_fsub:
1923
1939
return instCombineSVEVectorFSub (IC, II);
1924
1940
case Intrinsic::aarch64_sve_fsub_u:
@@ -1930,52 +1946,56 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
1930
1946
Intrinsic::aarch64_sve_mla_u>(
1931
1947
IC, II, true );
1932
1948
case Intrinsic::aarch64_sve_mla:
1933
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_mla_u);
1949
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_mla_u);
1934
1950
case Intrinsic::aarch64_sve_mls:
1935
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_mls_u);
1951
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_mls_u);
1936
1952
case Intrinsic::aarch64_sve_mul:
1953
+ if (auto II_U =
1954
+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_mul_u))
1955
+ return II_U;
1956
+ return instCombineSVEVectorMul (IC, II, Intrinsic::aarch64_sve_mul_u);
1937
1957
case Intrinsic::aarch64_sve_mul_u:
1938
1958
return instCombineSVEVectorMul (IC, II, Intrinsic::aarch64_sve_mul_u);
1939
1959
case Intrinsic::aarch64_sve_sabd:
1940
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_sabd_u);
1960
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_sabd_u);
1941
1961
case Intrinsic::aarch64_sve_smax:
1942
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_smax_u);
1962
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_smax_u);
1943
1963
case Intrinsic::aarch64_sve_smin:
1944
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_smin_u);
1964
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_smin_u);
1945
1965
case Intrinsic::aarch64_sve_smulh:
1946
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_smulh_u);
1966
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_smulh_u);
1947
1967
case Intrinsic::aarch64_sve_sub:
1948
1968
return instCombineSVEVectorSub (IC, II);
1949
1969
case Intrinsic::aarch64_sve_sub_u:
1950
1970
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1951
1971
Intrinsic::aarch64_sve_mls_u>(
1952
1972
IC, II, true );
1953
1973
case Intrinsic::aarch64_sve_uabd:
1954
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_uabd_u);
1974
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_uabd_u);
1955
1975
case Intrinsic::aarch64_sve_umax:
1956
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_umax_u);
1976
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_umax_u);
1957
1977
case Intrinsic::aarch64_sve_umin:
1958
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_umin_u);
1978
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_umin_u);
1959
1979
case Intrinsic::aarch64_sve_umulh:
1960
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_umulh_u);
1980
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_umulh_u);
1961
1981
case Intrinsic::aarch64_sve_asr:
1962
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_asr_u);
1982
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_asr_u);
1963
1983
case Intrinsic::aarch64_sve_lsl:
1964
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_lsl_u);
1984
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_lsl_u);
1965
1985
case Intrinsic::aarch64_sve_lsr:
1966
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_lsr_u);
1986
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_lsr_u);
1967
1987
case Intrinsic::aarch64_sve_and:
1968
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_and_u);
1988
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_and_u);
1969
1989
case Intrinsic::aarch64_sve_bic:
1970
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_bic_u);
1990
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_bic_u);
1971
1991
case Intrinsic::aarch64_sve_eor:
1972
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_eor_u);
1992
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_eor_u);
1973
1993
case Intrinsic::aarch64_sve_orr:
1974
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_orr_u);
1994
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_orr_u);
1975
1995
case Intrinsic::aarch64_sve_sqsub:
1976
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_sqsub_u);
1996
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_sqsub_u);
1977
1997
case Intrinsic::aarch64_sve_uqsub:
1978
- return instCombineSVEAllActive ( II, Intrinsic::aarch64_sve_uqsub_u);
1998
+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_uqsub_u);
1979
1999
case Intrinsic::aarch64_sve_tbl:
1980
2000
return instCombineSVETBL (IC, II);
1981
2001
case Intrinsic::aarch64_sve_uunpkhi:
0 commit comments