Skip to content

Commit 1997073

Browse files
[LLVM][InstCombine][SVE] Refactor sve.mul/fmul combines. (#134116)
After #126928 it's now possible to rewrite the existing combines, which mostly only handle cases where a operand is an identity value, to use existing simplify code to unlock general constant folding.
1 parent 83fbe67 commit 1997073

File tree

6 files changed

+250
-65
lines changed

6 files changed

+250
-65
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 82 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,19 @@ struct SVEIntrinsicInfo {
11111111
return *this;
11121112
}
11131113

1114+
bool hasMatchingIROpode() const { return IROpcode != 0; }
1115+
1116+
unsigned getMatchingIROpode() const {
1117+
assert(hasMatchingIROpode() && "Propery not set!");
1118+
return IROpcode;
1119+
}
1120+
1121+
SVEIntrinsicInfo &setMatchingIROpcode(unsigned Opcode) {
1122+
assert(!hasMatchingIROpode() && "Cannot set property twice!");
1123+
IROpcode = Opcode;
1124+
return *this;
1125+
}
1126+
11141127
//
11151128
// Properties relating to the result of inactive lanes.
11161129
//
@@ -1186,6 +1199,7 @@ struct SVEIntrinsicInfo {
11861199
unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
11871200

11881201
Intrinsic::ID UndefIntrinsic = Intrinsic::not_intrinsic;
1202+
unsigned IROpcode = 0;
11891203

11901204
enum PredicationStyle {
11911205
Uninitialized,
@@ -1269,7 +1283,8 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
12691283
case Intrinsic::aarch64_sve_fmls:
12701284
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmls_u);
12711285
case Intrinsic::aarch64_sve_fmul:
1272-
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmul_u);
1286+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmul_u)
1287+
.setMatchingIROpcode(Instruction::FMul);
12731288
case Intrinsic::aarch64_sve_fmulx:
12741289
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmulx_u);
12751290
case Intrinsic::aarch64_sve_fnmla:
@@ -1285,7 +1300,8 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
12851300
case Intrinsic::aarch64_sve_mls:
12861301
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mls_u);
12871302
case Intrinsic::aarch64_sve_mul:
1288-
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mul_u);
1303+
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mul_u)
1304+
.setMatchingIROpcode(Instruction::Mul);
12891305
case Intrinsic::aarch64_sve_sabd:
12901306
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sabd_u);
12911307
case Intrinsic::aarch64_sve_smax:
@@ -1323,6 +1339,13 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
13231339
case Intrinsic::aarch64_sve_uqsub:
13241340
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqsub_u);
13251341

1342+
case Intrinsic::aarch64_sve_fmul_u:
1343+
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
1344+
Instruction::FMul);
1345+
case Intrinsic::aarch64_sve_mul_u:
1346+
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
1347+
Instruction::Mul);
1348+
13261349
case Intrinsic::aarch64_sve_addqv:
13271350
case Intrinsic::aarch64_sve_and_z:
13281351
case Intrinsic::aarch64_sve_bic_z:
@@ -2205,45 +2228,63 @@ static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
22052228
return std::nullopt;
22062229
}
22072230

2208-
static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
2209-
IntrinsicInst &II) {
2210-
auto *OpPredicate = II.getOperand(0);
2211-
auto *OpMultiplicand = II.getOperand(1);
2212-
auto *OpMultiplier = II.getOperand(2);
2231+
// Simplify `V` by only considering the operations that affect active lanes.
2232+
// This function should only return existing Values or newly created Constants.
2233+
static Value *stripInactiveLanes(Value *V, const Value *Pg) {
2234+
auto *Dup = dyn_cast<IntrinsicInst>(V);
2235+
if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
2236+
Dup->getOperand(1) == Pg && isa<Constant>(Dup->getOperand(2)))
2237+
return ConstantVector::getSplat(
2238+
cast<VectorType>(V->getType())->getElementCount(),
2239+
cast<Constant>(Dup->getOperand(2)));
2240+
2241+
return V;
2242+
}
22132243

2214-
// Return true if a given instruction is a unit splat value, false otherwise.
2215-
auto IsUnitSplat = [](auto *I) {
2216-
auto *SplatValue = getSplatValue(I);
2217-
if (!SplatValue)
2218-
return false;
2219-
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
2220-
};
2244+
static std::optional<Instruction *>
2245+
instCombineSVEVectorMul(InstCombiner &IC, IntrinsicInst &II,
2246+
const SVEIntrinsicInfo &IInfo) {
2247+
const unsigned Opc = IInfo.getMatchingIROpode();
2248+
if (!Instruction::isBinaryOp(Opc))
2249+
return std::nullopt;
22212250

2222-
// Return true if a given instruction is an aarch64_sve_dup intrinsic call
2223-
// with a unit splat value, false otherwise.
2224-
auto IsUnitDup = [](auto *I) {
2225-
auto *IntrI = dyn_cast<IntrinsicInst>(I);
2226-
if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
2227-
return false;
2251+
Value *Pg = II.getOperand(0);
2252+
Value *Op1 = II.getOperand(1);
2253+
Value *Op2 = II.getOperand(2);
2254+
const DataLayout &DL = II.getDataLayout();
22282255

2229-
auto *SplatValue = IntrI->getOperand(2);
2230-
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
2231-
};
2256+
// Canonicalise constants to the RHS.
2257+
if (Instruction::isCommutative(Opc) && IInfo.inactiveLanesAreNotDefined() &&
2258+
isa<Constant>(Op1) && !isa<Constant>(Op2)) {
2259+
IC.replaceOperand(II, 1, Op2);
2260+
IC.replaceOperand(II, 2, Op1);
2261+
return &II;
2262+
}
22322263

2233-
if (IsUnitSplat(OpMultiplier)) {
2234-
// [f]mul pg %n, (dupx 1) => %n
2235-
OpMultiplicand->takeName(&II);
2236-
return IC.replaceInstUsesWith(II, OpMultiplicand);
2237-
} else if (IsUnitDup(OpMultiplier)) {
2238-
// [f]mul pg %n, (dup pg 1) => %n
2239-
auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
2240-
auto *DupPg = DupInst->getOperand(1);
2241-
// TODO: this is naive. The optimization is still valid if DupPg
2242-
// 'encompasses' OpPredicate, not only if they're the same predicate.
2243-
if (OpPredicate == DupPg) {
2244-
OpMultiplicand->takeName(&II);
2245-
return IC.replaceInstUsesWith(II, OpMultiplicand);
2246-
}
2264+
// Only active lanes matter when simplifying the operation.
2265+
Op1 = stripInactiveLanes(Op1, Pg);
2266+
Op2 = stripInactiveLanes(Op2, Pg);
2267+
2268+
Value *SimpleII;
2269+
if (auto FII = dyn_cast<FPMathOperator>(&II))
2270+
SimpleII = simplifyBinOp(Opc, Op1, Op2, FII->getFastMathFlags(), DL);
2271+
else
2272+
SimpleII = simplifyBinOp(Opc, Op1, Op2, DL);
2273+
2274+
if (SimpleII) {
2275+
if (IInfo.inactiveLanesAreNotDefined())
2276+
return IC.replaceInstUsesWith(II, SimpleII);
2277+
2278+
Value *Inactive =
2279+
II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom());
2280+
2281+
// The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
2282+
if (SimpleII == Inactive)
2283+
return IC.replaceInstUsesWith(II, SimpleII);
2284+
2285+
// Inactive lanes must be preserved.
2286+
SimpleII = IC.Builder.CreateSelect(Pg, SimpleII, Inactive);
2287+
return IC.replaceInstUsesWith(II, SimpleII);
22472288
}
22482289

22492290
return instCombineSVEVectorBinOp(IC, II);
@@ -2650,9 +2691,9 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
26502691
case Intrinsic::aarch64_sve_fadd_u:
26512692
return instCombineSVEVectorFAddU(IC, II);
26522693
case Intrinsic::aarch64_sve_fmul:
2653-
return instCombineSVEVectorMul(IC, II);
2694+
return instCombineSVEVectorMul(IC, II, IInfo);
26542695
case Intrinsic::aarch64_sve_fmul_u:
2655-
return instCombineSVEVectorMul(IC, II);
2696+
return instCombineSVEVectorMul(IC, II, IInfo);
26562697
case Intrinsic::aarch64_sve_fsub:
26572698
return instCombineSVEVectorFSub(IC, II);
26582699
case Intrinsic::aarch64_sve_fsub_u:
@@ -2664,9 +2705,9 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
26642705
Intrinsic::aarch64_sve_mla_u>(
26652706
IC, II, true);
26662707
case Intrinsic::aarch64_sve_mul:
2667-
return instCombineSVEVectorMul(IC, II);
2708+
return instCombineSVEVectorMul(IC, II, IInfo);
26682709
case Intrinsic::aarch64_sve_mul_u:
2669-
return instCombineSVEVectorMul(IC, II);
2710+
return instCombineSVEVectorMul(IC, II, IInfo);
26702711
case Intrinsic::aarch64_sve_sub:
26712712
return instCombineSVEVectorSub(IC, II);
26722713
case Intrinsic::aarch64_sve_sub_u:

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ target triple = "aarch64-unknown-linux-gnu"
66
; Idempotent fmuls -- should compile to just a ret.
77
define <vscale x 8 x half> @idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
88
; CHECK-LABEL: define <vscale x 8 x half> @idempotent_fmul_f16(
9-
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
10-
; CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
9+
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
1111
;
1212
%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
1313
%2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)
@@ -16,8 +16,8 @@ define <vscale x 8 x half> @idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x
1616

1717
define <vscale x 4 x float> @idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
1818
; CHECK-LABEL: define <vscale x 4 x float> @idempotent_fmul_f32(
19-
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[TMP0:%.*]]) #[[ATTR0]] {
20-
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
19+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]]) #[[ATTR0]] {
20+
; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
2121
;
2222
%1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 1.0)
2323
%2 = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %1)
@@ -26,8 +26,8 @@ define <vscale x 4 x float> @idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale
2626

2727
define <vscale x 2 x double> @idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
2828
; CHECK-LABEL: define <vscale x 2 x double> @idempotent_fmul_f64(
29-
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[TMP0:%.*]]) #[[ATTR0]] {
30-
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
29+
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]]) #[[ATTR0]] {
30+
; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
3131
;
3232
%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
3333
%2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %1)
@@ -37,7 +37,7 @@ define <vscale x 2 x double> @idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale
3737
define <vscale x 2 x double> @idempotent_fmul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
3838
; CHECK-LABEL: define <vscale x 2 x double> @idempotent_fmul_different_argument_order(
3939
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]]) #[[ATTR0]] {
40-
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG]], <vscale x 2 x double> splat (double 1.000000e+00), <vscale x 2 x double> [[A]])
40+
; CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[A]], <vscale x 2 x double> splat (double 1.000000e+00)
4141
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
4242
;
4343
%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
@@ -48,8 +48,8 @@ define <vscale x 2 x double> @idempotent_fmul_different_argument_order(<vscale x
4848

4949
define <vscale x 8 x half> @idempotent_fmul_with_predicated_dup(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
5050
; CHECK-LABEL: define <vscale x 8 x half> @idempotent_fmul_with_predicated_dup(
51-
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[TMP0:%.*]]) #[[ATTR0]] {
52-
; CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
51+
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]]) #[[ATTR0]] {
52+
; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
5353
;
5454
%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, half 1.0)
5555
%2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul_u-idempotency.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,8 @@ define <vscale x 2 x double> @idempotent_fmul_u_f64(<vscale x 2 x i1> %pg, <vsca
3636

3737
define <vscale x 2 x double> @idempotent_fmul_u_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
3838
; CHECK-LABEL: define <vscale x 2 x double> @idempotent_fmul_u_different_argument_order(
39-
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]]) #[[ATTR0]] {
40-
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[PG]], <vscale x 2 x double> splat (double 1.000000e+00), <vscale x 2 x double> [[A]])
41-
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
39+
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[TMP0:%.*]]) #[[ATTR0]] {
40+
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
4241
;
4342
%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
4443
; Different argument order to the above tests.

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ target triple = "aarch64-unknown-linux-gnu"
66
; Idempotent muls -- should compile to just a ret.
77
define <vscale x 8 x i16> @idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
88
; CHECK-LABEL: define <vscale x 8 x i16> @idempotent_mul_i16(
9-
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
10-
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
9+
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: ret <vscale x 8 x i16> [[A]]
1111
;
1212
%1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 1)
1313
%2 = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %1)
@@ -16,8 +16,8 @@ define <vscale x 8 x i16> @idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale x 8
1616

1717
define <vscale x 4 x i32> @idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
1818
; CHECK-LABEL: define <vscale x 4 x i32> @idempotent_mul_i32(
19-
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
20-
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
19+
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
20+
; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
2121
;
2222
%1 = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
2323
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %1)
@@ -26,8 +26,8 @@ define <vscale x 4 x i32> @idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale x 4
2626

2727
define <vscale x 2 x i64> @idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
2828
; CHECK-LABEL: define <vscale x 2 x i64> @idempotent_mul_i64(
29-
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
30-
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
29+
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
30+
; CHECK-NEXT: ret <vscale x 2 x i64> [[A]]
3131
;
3232
%1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 1)
3333
%2 = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %1)
@@ -37,7 +37,7 @@ define <vscale x 2 x i64> @idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2
3737
define <vscale x 2 x i64> @idempotent_mul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
3838
; CHECK-LABEL: define <vscale x 2 x i64> @idempotent_mul_different_argument_order(
3939
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
40-
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> [[A]])
40+
; CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> splat (i64 1)
4141
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
4242
;
4343
%1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 1)
@@ -48,8 +48,8 @@ define <vscale x 2 x i64> @idempotent_mul_different_argument_order(<vscale x 2 x
4848

4949
define <vscale x 8 x i16> @idempotent_mul_with_predicated_dup(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
5050
; CHECK-LABEL: define <vscale x 8 x i16> @idempotent_mul_with_predicated_dup(
51-
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[TMP0:%.*]]) #[[ATTR0]] {
52-
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
51+
; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) #[[ATTR0]] {
52+
; CHECK-NEXT: ret <vscale x 8 x i16> [[A]]
5353
;
5454
%1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, i16 1)
5555
%2 = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %1)

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul_u-idempotency.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,8 @@ define <vscale x 2 x i64> @idempotent_mul_u_i64(<vscale x 2 x i1> %pg, <vscale x
3636

3737
define <vscale x 2 x i64> @idempotent_mul_u_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
3838
; CHECK-LABEL: define <vscale x 2 x i64> @idempotent_mul_u_different_argument_order(
39-
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
40-
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> [[A]])
41-
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
39+
; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
40+
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
4241
;
4342
%1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 1)
4443
; Different argument order to the above tests.

0 commit comments

Comments
 (0)