Skip to content

Commit 857b8a7

Browse files
[LoopVectorize] Change the identity element for FAdd
Changes getRecurrenceIdentity to always return a neutral value of -0.0 for FAdd. Reviewed By: dmgreen, spatel Differential Revision: https://reviews.llvm.org/D98963
1 parent a6b06b7 commit 857b8a7

File tree

4 files changed

+26
-17
lines changed

4 files changed

+26
-17
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,8 @@ class RecurrenceDescriptor {
136136
static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I);
137137

138138
/// Returns identity corresponding to the RecurrenceKind.
139-
static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp);
139+
static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp,
140+
FastMathFlags FMF);
140141

141142
/// Returns the opcode corresponding to the RecurrenceKind.
142143
static unsigned getOpcode(RecurKind Kind);

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,8 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
761761

762762
/// This function returns the identity element (or neutral element) for
763763
/// the operation K.
764-
Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp) {
764+
Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
765+
FastMathFlags FMF) {
765766
switch (K) {
766767
case RecurKind::Xor:
767768
case RecurKind::Add:
@@ -779,7 +780,14 @@ Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp) {
779780
return ConstantFP::get(Tp, 1.0L);
780781
case RecurKind::FAdd:
781782
// Adding zero to a number does not change it.
782-
return ConstantFP::get(Tp, 0.0L);
783+
// FIXME: Ideally we should not need to check FMF for FAdd and should always
784+
// use -0.0. However, this will currently result in mixed vectors of 0.0/-0.0.
785+
// Instead, we should ensure that 1) the FMF from FAdd are propagated to the PHI
786+
// nodes where possible, and 2) PHIs with the nsz flag + -0.0 use 0.0. This would
787+
// mean we can then remove the check for noSignedZeros() below (see D98963).
788+
if (FMF.noSignedZeros())
789+
return ConstantFP::get(Tp, 0.0L);
790+
return ConstantFP::get(Tp, -0.0L);
783791
case RecurKind::UMin:
784792
return ConstantInt::get(Tp, -1);
785793
case RecurKind::UMax:

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4697,7 +4697,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
46974697
}
46984698
} else {
46994699
Constant *IdenC = RecurrenceDescriptor::getRecurrenceIdentity(
4700-
RK, VecTy->getScalarType());
4700+
RK, VecTy->getScalarType(), RdxDesc->getFastMathFlags());
47014701
Iden = IdenC;
47024702

47034703
if (!ScalarPHI) {
@@ -9207,7 +9207,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
92079207
Value *NewCond = State.get(Cond, Part);
92089208
VectorType *VecTy = cast<VectorType>(NewVecOp->getType());
92099209
Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity(
9210-
Kind, VecTy->getElementType());
9210+
Kind, VecTy->getElementType(), RdxDesc->getFastMathFlags());
92119211
Constant *IdenVec =
92129212
ConstantVector::getSplat(VecTy->getElementCount(), Iden);
92139213
Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);

llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ define float @reduction_sum_float_only_reassoc(i32 %n, float* %array) {
129129
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
130130
; CHECK: vector.body:
131131
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
132-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
133-
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
132+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
133+
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
134134
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
135135
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 4
136136
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[ARRAY:%.*]], i32 [[TMP0]]
@@ -153,7 +153,7 @@ define float @reduction_sum_float_only_reassoc(i32 %n, float* %array) {
153153
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
154154
; CHECK: scalar.ph:
155155
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
156-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
156+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ -0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
157157
; CHECK-NEXT: br label [[LOOP:%.*]]
158158
; CHECK: loop:
159159
; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -168,7 +168,7 @@ define float @reduction_sum_float_only_reassoc(i32 %n, float* %array) {
168168
; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
169169
; CHECK-NEXT: br label [[LOOP_EXIT]]
170170
; CHECK: loop.exit:
171-
; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ]
171+
; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ]
172172
; CHECK-NEXT: ret float [[SUM_LCSSA]]
173173
;
174174
entry:
@@ -177,7 +177,7 @@ entry:
177177

178178
loop:
179179
%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
180-
%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
180+
%sum = phi float [ -0.000000e+00, %entry ], [ %sum.inc, %loop ]
181181
%address = getelementptr float, float* %array, i32 %idx
182182
%value = load float, float* %address
183183
%sum.inc = fadd reassoc float %sum, %value
@@ -186,7 +186,7 @@ loop:
186186
br i1 %be.cond, label %loop, label %loop.exit
187187

188188
loop.exit:
189-
%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
189+
%sum.lcssa = phi float [ %sum.inc, %loop ], [ -0.000000e+00, %entry ]
190190
ret float %sum.lcssa
191191
}
192192

@@ -201,8 +201,8 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, float* %arra
201201
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
202202
; CHECK: vector.body:
203203
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
204-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
205-
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
204+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
205+
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
206206
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
207207
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 4
208208
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[ARRAY:%.*]], i32 [[TMP0]]
@@ -225,7 +225,7 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, float* %arra
225225
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
226226
; CHECK: scalar.ph:
227227
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
228-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
228+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ -0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
229229
; CHECK-NEXT: br label [[LOOP:%.*]]
230230
; CHECK: loop:
231231
; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -240,7 +240,7 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, float* %arra
240240
; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
241241
; CHECK-NEXT: br label [[LOOP_EXIT]]
242242
; CHECK: loop.exit:
243-
; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ]
243+
; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ]
244244
; CHECK-NEXT: ret float [[SUM_LCSSA]]
245245
;
246246
entry:
@@ -249,7 +249,7 @@ entry:
249249

250250
loop:
251251
%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
252-
%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
252+
%sum = phi float [ -0.000000e+00, %entry ], [ %sum.inc, %loop ]
253253
%address = getelementptr float, float* %array, i32 %idx
254254
%value = load float, float* %address
255255
%sum.inc = fadd reassoc contract float %sum, %value
@@ -258,7 +258,7 @@ loop:
258258
br i1 %be.cond, label %loop, label %loop.exit
259259

260260
loop.exit:
261-
%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
261+
%sum.lcssa = phi float [ %sum.inc, %loop ], [ -0.000000e+00, %entry ]
262262
ret float %sum.lcssa
263263
}
264264

0 commit comments

Comments
 (0)