Skip to content

Commit 9cee94b

Browse files
authored
[GlobalISel] Add identity fold for fadd -0.0 (llvm#73296)
-0.0 acts as the identity element for fadd. This doesn't try to add 0.0 too, which would require nsz fast math flags.
1 parent c8f6bb4 commit 9cee94b

File tree

2 files changed

+89
-1
lines changed

2 files changed

+89
-1
lines changed

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,13 @@ def right_identity_zero: GICombineRule<
473473
(apply (GIReplaceReg $dst, $lhs))
474474
>;
475475

476+
def right_identity_neg_zero_fp: GICombineRule<
477+
(defs root:$dst),
478+
(match (G_FADD $dst, $x, $y):$root,
479+
[{ return Helper.matchConstantFPOp(${y}, -0.0); }]),
480+
(apply (GIReplaceReg $dst, $x))
481+
>;
482+
476483
// Fold x op 1 -> x
477484
def right_identity_one_int: GICombineRule<
478485
(defs root:$dst),
@@ -1250,7 +1257,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
12501257
add_sub_reg, buildvector_identity_fold,
12511258
trunc_buildvector_fold,
12521259
trunc_lshr_buildvector_fold,
1253-
bitcast_bitcast_fold, fptrunc_fpext_fold]>;
1260+
bitcast_bitcast_fold, fptrunc_fpext_fold,
1261+
right_identity_neg_zero_fp]>;
12541262

12551263
def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
12561264
overlapping_and, mulo_by_2, mulo_by_0,

llvm/test/CodeGen/AArch64/GlobalISel/combine-add.mir

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,83 @@ body: |
127127
%3:_(<4 x s16>) = G_ADD %1, %2
128128
$x0 = COPY %3
129129
...
130+
---
131+
name: fadd_by_zero
132+
tracksRegLiveness: true
133+
body: |
134+
bb.0:
135+
liveins: $d0
136+
; CHECK-LABEL: name: fadd_by_zero
137+
; CHECK: liveins: $d0
138+
; CHECK-NEXT: {{ $}}
139+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
140+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
141+
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[C]]
142+
; CHECK-NEXT: $d0 = COPY [[FADD]](s64)
143+
%0:_(s64) = COPY $d0
144+
%1:_(s64) = G_FCONSTANT double 0.000000e+00
145+
%2:_(s64) = G_FADD %0, %1(s64)
146+
$d0 = COPY %2(s64)
147+
...
148+
---
149+
name: fadd_vector_by_zero
150+
alignment: 4
151+
tracksRegLiveness: true
152+
frameInfo:
153+
maxAlignment: 1
154+
machineFunctionInfo: {}
155+
body: |
156+
bb.0:
157+
liveins: $q0
158+
; CHECK-LABEL: name: fadd_vector_by_zero
159+
; CHECK: liveins: $q0
160+
; CHECK-NEXT: {{ $}}
161+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
162+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
163+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
164+
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[COPY]], [[BUILD_VECTOR]]
165+
; CHECK-NEXT: $q0 = COPY [[FADD]](<4 x s32>)
166+
%0:_(<4 x s32>) = COPY $q0
167+
%1:_(s32) = G_FCONSTANT float 0.0
168+
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
169+
%3:_(<4 x s32>) = G_FADD %0, %2(<4 x s32>)
170+
$q0 = COPY %3(<4 x s32>)
171+
...
172+
173+
---
174+
name: fadd_by_neg_zero
175+
tracksRegLiveness: true
176+
body: |
177+
bb.0:
178+
liveins: $d0
179+
; CHECK-LABEL: name: fadd_by_neg_zero
180+
; CHECK: liveins: $d0
181+
; CHECK-NEXT: {{ $}}
182+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
183+
; CHECK-NEXT: $d0 = COPY [[COPY]](s64)
184+
%0:_(s64) = COPY $d0
185+
%1:_(s64) = G_FCONSTANT double -0.000000e+00
186+
%2:_(s64) = G_FADD %0, %1(s64)
187+
$d0 = COPY %2(s64)
188+
...
189+
---
190+
name: fadd_vector_by_neg_zero
191+
alignment: 4
192+
tracksRegLiveness: true
193+
frameInfo:
194+
maxAlignment: 1
195+
machineFunctionInfo: {}
196+
body: |
197+
bb.0:
198+
liveins: $q0
199+
; CHECK-LABEL: name: fadd_vector_by_neg_zero
200+
; CHECK: liveins: $q0
201+
; CHECK-NEXT: {{ $}}
202+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
203+
; CHECK-NEXT: $q0 = COPY [[COPY]](<4 x s32>)
204+
%0:_(<4 x s32>) = COPY $q0
205+
%1:_(s32) = G_FCONSTANT float -0.0
206+
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
207+
%3:_(<4 x s32>) = G_FADD %0, %2(<4 x s32>)
208+
$q0 = COPY %3(<4 x s32>)
209+
...

0 commit comments

Comments
 (0)