[DAGCombiner] avoid narrowing fake fneg vector op

rotateright · rotateright · commit b3d0c798367d · 2020-02-26T11:25:56.000-05:00
This may inhibit vector narrowing in general, but there's
already an inconsistency in the way that we deal with this
pattern as shown by the test diff.

We may want to add a dedicated function for narrowing fneg.
It's often folded into some other op, so moving it away from
other math ops may cause regressions that we would not see
for normal binops.

See D73978 for more details.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18556,6 +18556,15 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
     return SDValue();
 
+  // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
+  // reduced to the unary fneg when it is visited, and we probably want to deal
+  // with fneg in a target-specific way.
+  if (BOpcode == ISD::FSUB) {
+    auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
+    if (C && C->getValueAPF().isNegZero())
+      return SDValue();
+  }
+
   // The binop must be a vector type, so we can extract some fraction of it.
   EVT WideBVT = BinOp.getValueType();
   if (!WideBVT.isVector())
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp.ll b/llvm/test/CodeGen/AArch64/arm64-fp.ll
@@ -62,9 +62,8 @@ define <2 x float> @fake_fneg_splat_extract(<4 x float> %rhs) {
 define <2 x float> @fake_fneg_splat_extract_undef(<4 x float> %rhs) {
 ; CHECK-LABEL: fake_fneg_splat_extract_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    fneg v0.2s, v0.2s
-; CHECK-NEXT:    dup v0.2s, v0.s[1]
+; CHECK-NEXT:    fneg v0.4s, v0.4s
+; CHECK-NEXT:    dup v0.2s, v0.s[3]
 ; CHECK-NEXT:    ret
   %rhs_neg = fsub <4 x float> <float undef, float -0.0, float -0.0, float -0.0>, %rhs
   %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <2 x i32> <i32 3, i32 3>