Skip to content

Commit 5f3c0b2

Browse files
authored
[AArch64][NEON] Extend faminmax patterns with fminnm/fmaxnm (#104766)
Patterns were previously added to allow the following reductions - fminimum(abs(a), abs(b)) -> famin(a, b) - fmaximum(abs(a), abs(b)) -> famax(a, b) - #103027 It was suggested by @davemgreen that the following reductions are also possible - fminnum[nnan](abs(a), abs(b)) -> famin(a, b) - fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b) ('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags) The 'no NaNs' flag allows optimisations to assume that neither argument is a NaN, and so the differing NaN propagation semantics of llvm.maxnum/llvm.minnum and FAMAX/FAMIN can be ignored in this reduction. (llvm.maxnum/llvm.minnum: https://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic) - Changes to LLVM - lib/target/AArch64/AArch64InstrInfo.td - add 'fminnm_nnan' and 'fmaxnm_nnan'; patfrags on fminnm/fmaxnm that are predicated on the instrinsic call having the 'nnan' flag. - add AArch64famin and AArch64famax patfrags, containing the new and existing reductions. - test/CodeGen/AArch64/aarch64-neon-faminmax.ll - add positive and negative tests for the new reduction, based on the presence of 'nnan' in the IR intrinsic call.
1 parent 34e15ad commit 5f3c0b2

File tree

2 files changed

+420
-12
lines changed

2 files changed

+420
-12
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,17 @@ def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r),
973973
CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
974974
}]>;
975975

976+
// Match "nnan" flagged calls to fminnum and fmmaxnum. Then semantically equivalent
977+
// to fmaximum/fminimum.
978+
def fmaxnum_nnan : PatFrag<(ops node:$Rn, node:$Rm),
979+
(fmaxnum node:$Rn, node:$Rm), [{
980+
return N->getFlags().hasNoNaNs();
981+
}]>;
982+
def fminnum_nnan : PatFrag<(ops node:$Rn, node:$Rm),
983+
(fminnum node:$Rn, node:$Rm), [{
984+
return N->getFlags().hasNoNaNs();
985+
}]>;
986+
976987
//===----------------------------------------------------------------------===//
977988

978989
//===----------------------------------------------------------------------===//
@@ -10158,19 +10169,21 @@ let Uses = [FPMR, FPCR], Predicates = [HasFP8] in {
1015810169
defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
1015910170
} // End let Predicates = [HasFP8]
1016010171

10172+
// fminimum(abs(a), abs(b)) -> famin(a, b)
10173+
// fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
10174+
def AArch64famin : PatFrags<(ops node:$Rn, node:$Rm),
10175+
[(fminimum (fabs node:$Rn), (fabs node:$Rm)),
10176+
(fminnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
10177+
10178+
// fmaximum(abs(a), abs(b)) -> famax(a, b)
10179+
// fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
10180+
def AArch64famax : PatFrags<(ops node:$Rn, node:$Rm),
10181+
[(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
10182+
(fmaxnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
10183+
1016110184
let Predicates = [HasNEON, HasFAMINMAX] in {
10162-
defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
10163-
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
10164-
10165-
foreach Ty = [v4f16, v8f16, v2f32, v4f32, v2f64] in {
10166-
// Replace min(abs(a), abs(b)) with famin(a, b)
10167-
def : Pat<(Ty (fminimum (fabs Ty:$Rn), (fabs Ty:$Rm))),
10168-
(!cast<Instruction>("FAMIN"#Ty) Ty:$Rn, Ty:$Rm)>;
10169-
10170-
// Replace max(abs(a), abs(b)) with famax(a, b)
10171-
def : Pat<(Ty (fmaximum (fabs Ty:$Rn), (fabs Ty:$Rm))),
10172-
(!cast<Instruction>("FAMAX"#Ty) Ty:$Rn, Ty:$Rm)>;
10173-
}
10185+
defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", AArch64famax>;
10186+
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", AArch64famin>;
1017410187
} // End let Predicates = [HasNEON, HasFAMINMAX]
1017510188

1017610189
let Uses = [FPMR, FPCR], Predicates = [HasFP8FMA] in {

0 commit comments

Comments
 (0)