@@ -1497,6 +1497,65 @@ static bool isAllActivePredicate(Value *Pred) {
1497
1497
return (C && C->isAllOnesValue ());
1498
1498
}
1499
1499
1500
+ // Simplify `V` by only considering the operations that affect active lanes.
1501
+ // This function should only return existing Values or newly created Constants.
1502
+ static Value *stripInactiveLanes (Value *V, const Value *Pg) {
1503
+ auto *Dup = dyn_cast<IntrinsicInst>(V);
1504
+ if (Dup && Dup->getIntrinsicID () == Intrinsic::aarch64_sve_dup &&
1505
+ Dup->getOperand (1 ) == Pg && isa<Constant>(Dup->getOperand (2 )))
1506
+ return ConstantVector::getSplat (
1507
+ cast<VectorType>(V->getType ())->getElementCount (),
1508
+ cast<Constant>(Dup->getOperand (2 )));
1509
+
1510
+ return V;
1511
+ }
1512
+
1513
+ static std::optional<Instruction *>
1514
+ simplifySVEIntrinsicBinOp (InstCombiner &IC, IntrinsicInst &II,
1515
+ const SVEIntrinsicInfo &IInfo) {
1516
+ const unsigned Opc = IInfo.getMatchingIROpode ();
1517
+ assert (Instruction::isBinaryOp (Opc) && " Expected a binary operation!" );
1518
+
1519
+ Value *Pg = II.getOperand (0 );
1520
+ Value *Op1 = II.getOperand (1 );
1521
+ Value *Op2 = II.getOperand (2 );
1522
+ const DataLayout &DL = II.getDataLayout ();
1523
+
1524
+ // Canonicalise constants to the RHS.
1525
+ if (Instruction::isCommutative (Opc) && IInfo.inactiveLanesAreNotDefined () &&
1526
+ isa<Constant>(Op1) && !isa<Constant>(Op2)) {
1527
+ IC.replaceOperand (II, 1 , Op2);
1528
+ IC.replaceOperand (II, 2 , Op1);
1529
+ return &II;
1530
+ }
1531
+
1532
+ // Only active lanes matter when simplifying the operation.
1533
+ Op1 = stripInactiveLanes (Op1, Pg);
1534
+ Op2 = stripInactiveLanes (Op2, Pg);
1535
+
1536
+ Value *SimpleII;
1537
+ if (auto FII = dyn_cast<FPMathOperator>(&II))
1538
+ SimpleII = simplifyBinOp (Opc, Op1, Op2, FII->getFastMathFlags (), DL);
1539
+ else
1540
+ SimpleII = simplifyBinOp (Opc, Op1, Op2, DL);
1541
+
1542
+ if (!SimpleII)
1543
+ return std::nullopt;
1544
+
1545
+ if (IInfo.inactiveLanesAreNotDefined ())
1546
+ return IC.replaceInstUsesWith (II, SimpleII);
1547
+
1548
+ Value *Inactive = II.getOperand (IInfo.getOperandIdxInactiveLanesTakenFrom ());
1549
+
1550
+ // The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
1551
+ if (SimpleII == Inactive)
1552
+ return IC.replaceInstUsesWith (II, SimpleII);
1553
+
1554
+ // Inactive lanes must be preserved.
1555
+ SimpleII = IC.Builder .CreateSelect (Pg, SimpleII, Inactive);
1556
+ return IC.replaceInstUsesWith (II, SimpleII);
1557
+ }
1558
+
1500
1559
// Use SVE intrinsic info to eliminate redundant operands and/or canonicalise
1501
1560
// to operations with less strict inactive lane requirements.
1502
1561
static std::optional<Instruction *>
@@ -1537,6 +1596,11 @@ simplifySVEIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1537
1596
}
1538
1597
}
1539
1598
1599
+ // Operation specific simplifications.
1600
+ if (IInfo.hasMatchingIROpode () &&
1601
+ Instruction::isBinaryOp (IInfo.getMatchingIROpode ()))
1602
+ return simplifySVEIntrinsicBinOp (IC, II, IInfo);
1603
+
1540
1604
return std::nullopt;
1541
1605
}
1542
1606
@@ -2220,68 +2284,6 @@ static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
2220
2284
return std::nullopt;
2221
2285
}
2222
2286
2223
- // Simplify `V` by only considering the operations that affect active lanes.
2224
- // This function should only return existing Values or newly created Constants.
2225
- static Value *stripInactiveLanes (Value *V, const Value *Pg) {
2226
- auto *Dup = dyn_cast<IntrinsicInst>(V);
2227
- if (Dup && Dup->getIntrinsicID () == Intrinsic::aarch64_sve_dup &&
2228
- Dup->getOperand (1 ) == Pg && isa<Constant>(Dup->getOperand (2 )))
2229
- return ConstantVector::getSplat (
2230
- cast<VectorType>(V->getType ())->getElementCount (),
2231
- cast<Constant>(Dup->getOperand (2 )));
2232
-
2233
- return V;
2234
- }
2235
-
2236
- static std::optional<Instruction *>
2237
- instCombineSVEVectorMul (InstCombiner &IC, IntrinsicInst &II,
2238
- const SVEIntrinsicInfo &IInfo) {
2239
- const unsigned Opc = IInfo.getMatchingIROpode ();
2240
- if (!Instruction::isBinaryOp (Opc))
2241
- return std::nullopt;
2242
-
2243
- Value *Pg = II.getOperand (0 );
2244
- Value *Op1 = II.getOperand (1 );
2245
- Value *Op2 = II.getOperand (2 );
2246
- const DataLayout &DL = II.getDataLayout ();
2247
-
2248
- // Canonicalise constants to the RHS.
2249
- if (Instruction::isCommutative (Opc) && IInfo.inactiveLanesAreNotDefined () &&
2250
- isa<Constant>(Op1) && !isa<Constant>(Op2)) {
2251
- IC.replaceOperand (II, 1 , Op2);
2252
- IC.replaceOperand (II, 2 , Op1);
2253
- return &II;
2254
- }
2255
-
2256
- // Only active lanes matter when simplifying the operation.
2257
- Op1 = stripInactiveLanes (Op1, Pg);
2258
- Op2 = stripInactiveLanes (Op2, Pg);
2259
-
2260
- Value *SimpleII;
2261
- if (auto FII = dyn_cast<FPMathOperator>(&II))
2262
- SimpleII = simplifyBinOp (Opc, Op1, Op2, FII->getFastMathFlags (), DL);
2263
- else
2264
- SimpleII = simplifyBinOp (Opc, Op1, Op2, DL);
2265
-
2266
- if (SimpleII) {
2267
- if (IInfo.inactiveLanesAreNotDefined ())
2268
- return IC.replaceInstUsesWith (II, SimpleII);
2269
-
2270
- Value *Inactive =
2271
- II.getOperand (IInfo.getOperandIdxInactiveLanesTakenFrom ());
2272
-
2273
- // The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
2274
- if (SimpleII == Inactive)
2275
- return IC.replaceInstUsesWith (II, SimpleII);
2276
-
2277
- // Inactive lanes must be preserved.
2278
- SimpleII = IC.Builder .CreateSelect (Pg, SimpleII, Inactive);
2279
- return IC.replaceInstUsesWith (II, SimpleII);
2280
- }
2281
-
2282
- return instCombineSVEVectorBinOp (IC, II);
2283
- }
2284
-
2285
2287
static std::optional<Instruction *> instCombineSVEUnpack (InstCombiner &IC,
2286
2288
IntrinsicInst &II) {
2287
2289
Value *UnpackArg = II.getArgOperand (0 );
@@ -2689,10 +2691,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
2689
2691
return instCombineSVEVectorFAdd (IC, II);
2690
2692
case Intrinsic::aarch64_sve_fadd_u:
2691
2693
return instCombineSVEVectorFAddU (IC, II);
2692
- case Intrinsic::aarch64_sve_fmul:
2693
- return instCombineSVEVectorMul (IC, II, IInfo);
2694
2694
case Intrinsic::aarch64_sve_fmul_u:
2695
- return instCombineSVEVectorMul (IC, II, IInfo );
2695
+ return instCombineSVEVectorBinOp (IC, II);
2696
2696
case Intrinsic::aarch64_sve_fsub:
2697
2697
return instCombineSVEVectorFSub (IC, II);
2698
2698
case Intrinsic::aarch64_sve_fsub_u:
@@ -2703,10 +2703,6 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
2703
2703
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
2704
2704
Intrinsic::aarch64_sve_mla_u>(
2705
2705
IC, II, true );
2706
- case Intrinsic::aarch64_sve_mul:
2707
- return instCombineSVEVectorMul (IC, II, IInfo);
2708
- case Intrinsic::aarch64_sve_mul_u:
2709
- return instCombineSVEVectorMul (IC, II, IInfo);
2710
2706
case Intrinsic::aarch64_sve_sub:
2711
2707
return instCombineSVEVectorSub (IC, II);
2712
2708
case Intrinsic::aarch64_sve_sub_u:
0 commit comments