[GlobalISel] Add identity fold for fadd -0.0 (llvm#73296)

davemgreen · web-flow · commit 9cee94b81b14 · 2023-11-25T08:35:26.000Z
-0.0 acts as the identity element for fadd. This doesn't try to add 0.0
too, which would require nsz fast math flags.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -473,6 +473,13 @@ def right_identity_zero: GICombineRule<
   (apply (GIReplaceReg $dst, $lhs))
 >;
 
+def right_identity_neg_zero_fp: GICombineRule<
+  (defs root:$dst),
+  (match (G_FADD $dst, $x, $y):$root,
+    [{ return Helper.matchConstantFPOp(${y}, -0.0); }]),
+  (apply (GIReplaceReg $dst, $x))
+>;
+
 // Fold x op 1 -> x
 def right_identity_one_int: GICombineRule<
   (defs root:$dst),
@@ -1250,7 +1257,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
                                         add_sub_reg, buildvector_identity_fold,
                                         trunc_buildvector_fold,
                                         trunc_lshr_buildvector_fold,
-                                        bitcast_bitcast_fold, fptrunc_fpext_fold]>;
+                                        bitcast_bitcast_fold, fptrunc_fpext_fold,
+                                        right_identity_neg_zero_fp]>;
 
 def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
                                      overlapping_and, mulo_by_2, mulo_by_0,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-add.mir
@@ -127,3 +127,83 @@ body:             |
     %3:_(<4 x s16>) = G_ADD %1, %2
     $x0 = COPY %3
 ...
+---
+name:            fadd_by_zero
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0
+    ; CHECK-LABEL: name: fadd_by_zero
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[C]]
+    ; CHECK-NEXT: $d0 = COPY [[FADD]](s64)
+    %0:_(s64) = COPY $d0
+    %1:_(s64) = G_FCONSTANT double 0.000000e+00
+    %2:_(s64) = G_FADD %0, %1(s64)
+    $d0 = COPY %2(s64)
+...
+---
+name:            fadd_vector_by_zero
+alignment:       4
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $q0
+    ; CHECK-LABEL: name: fadd_vector_by_zero
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[COPY]], [[BUILD_VECTOR]]
+    ; CHECK-NEXT: $q0 = COPY [[FADD]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(s32) = G_FCONSTANT float 0.0
+    %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
+    %3:_(<4 x s32>) = G_FADD %0, %2(<4 x s32>)
+    $q0 = COPY %3(<4 x s32>)
+...
+
+---
+name:            fadd_by_neg_zero
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0
+    ; CHECK-LABEL: name: fadd_by_neg_zero
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+    ; CHECK-NEXT: $d0 = COPY [[COPY]](s64)
+    %0:_(s64) = COPY $d0
+    %1:_(s64) = G_FCONSTANT double -0.000000e+00
+    %2:_(s64) = G_FADD %0, %1(s64)
+    $d0 = COPY %2(s64)
+...
+---
+name:            fadd_vector_by_neg_zero
+alignment:       4
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $q0
+    ; CHECK-LABEL: name: fadd_vector_by_neg_zero
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: $q0 = COPY [[COPY]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(s32) = G_FCONSTANT float -0.0
+    %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
+    %3:_(<4 x s32>) = G_FADD %0, %2(<4 x s32>)
+    $q0 = COPY %3(<4 x s32>)
+...