Skip to content

Commit 113806d

Browse files
authored
[AArch64] optimise SVE cvt intrinsics with no active lanes (#104809)
This patch extends #73964 and optimises SVE cvt intrinsics away when predicate is zero.
1 parent 0f206b1 commit 113806d

File tree

3 files changed

+806
-1
lines changed

3 files changed

+806
-1
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1073,6 +1073,33 @@ static bool isAllActivePredicate(Value *Pred) {
10731073
m_ConstantInt<AArch64SVEPredPattern::all>()));
10741074
}
10751075

1076+
// Simplify unary operation where predicate has all inactive lanes by replacing
1077+
// instruction with its operand
1078+
static std::optional<Instruction *>
1079+
instCombineSVENoActiveReplace(InstCombiner &IC, IntrinsicInst &II,
1080+
bool hasInactiveVector) {
1081+
int PredOperand = hasInactiveVector ? 1 : 0;
1082+
int ReplaceOperand = hasInactiveVector ? 0 : 1;
1083+
if (match(II.getOperand(PredOperand), m_ZeroInt())) {
1084+
IC.replaceInstUsesWith(II, II.getOperand(ReplaceOperand));
1085+
return IC.eraseInstFromFunction(II);
1086+
}
1087+
return std::nullopt;
1088+
}
1089+
1090+
// Simplify unary operation where predicate has all inactive lanes or
1091+
// replace unused first operand with undef when all lanes are active
1092+
static std::optional<Instruction *>
1093+
instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
1094+
if (isAllActivePredicate(II.getOperand(1)) &&
1095+
!isa<llvm::UndefValue>(II.getOperand(0)) &&
1096+
!isa<llvm::PoisonValue>(II.getOperand(0))) {
1097+
Value *Undef = llvm::UndefValue::get(II.getType());
1098+
return IC.replaceOperand(II, 0, Undef);
1099+
}
1100+
return instCombineSVENoActiveReplace(IC, II, true);
1101+
}
1102+
10761103
// Erase unary operation where predicate has all inactive lanes
10771104
static std::optional<Instruction *>
10781105
instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II,
@@ -2109,7 +2136,41 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21092136
switch (IID) {
21102137
default:
21112138
break;
2112-
2139+
case Intrinsic::aarch64_sve_fcvt_bf16f32:
2140+
case Intrinsic::aarch64_sve_fcvt_f16f32:
2141+
case Intrinsic::aarch64_sve_fcvt_f16f64:
2142+
case Intrinsic::aarch64_sve_fcvt_f32f16:
2143+
case Intrinsic::aarch64_sve_fcvt_f32f64:
2144+
case Intrinsic::aarch64_sve_fcvt_f64f16:
2145+
case Intrinsic::aarch64_sve_fcvt_f64f32:
2146+
case Intrinsic::aarch64_sve_fcvtlt_f32f16:
2147+
case Intrinsic::aarch64_sve_fcvtlt_f64f32:
2148+
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
2149+
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
2150+
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
2151+
case Intrinsic::aarch64_sve_fcvtx_f32f64:
2152+
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
2153+
case Intrinsic::aarch64_sve_fcvtzs:
2154+
case Intrinsic::aarch64_sve_fcvtzs_i32f16:
2155+
case Intrinsic::aarch64_sve_fcvtzs_i32f64:
2156+
case Intrinsic::aarch64_sve_fcvtzs_i64f16:
2157+
case Intrinsic::aarch64_sve_fcvtzs_i64f32:
2158+
case Intrinsic::aarch64_sve_fcvtzu:
2159+
case Intrinsic::aarch64_sve_fcvtzu_i32f16:
2160+
case Intrinsic::aarch64_sve_fcvtzu_i32f64:
2161+
case Intrinsic::aarch64_sve_fcvtzu_i64f16:
2162+
case Intrinsic::aarch64_sve_fcvtzu_i64f32:
2163+
case Intrinsic::aarch64_sve_scvtf:
2164+
case Intrinsic::aarch64_sve_scvtf_f16i32:
2165+
case Intrinsic::aarch64_sve_scvtf_f16i64:
2166+
case Intrinsic::aarch64_sve_scvtf_f32i64:
2167+
case Intrinsic::aarch64_sve_scvtf_f64i32:
2168+
case Intrinsic::aarch64_sve_ucvtf:
2169+
case Intrinsic::aarch64_sve_ucvtf_f16i32:
2170+
case Intrinsic::aarch64_sve_ucvtf_f16i64:
2171+
case Intrinsic::aarch64_sve_ucvtf_f32i64:
2172+
case Intrinsic::aarch64_sve_ucvtf_f64i32:
2173+
return instCombineSVEAllOrNoActiveUnary(IC, II);
21132174
case Intrinsic::aarch64_sve_st1_scatter:
21142175
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
21152176
case Intrinsic::aarch64_sve_st1_scatter_sxtw:

0 commit comments

Comments
 (0)