Skip to content

Commit 1fbb6b4

Browse files
authored
[LV] Prefer FLT_MIN/MAX for fmin/fmax reductions with ninf (#107141)
Analogous to 2c7786e, cleanup a case where the vectorizer is emitting a non-canonical identity value given the available flags. We use largest/smallest value during ISEL, and VP expansion, but not during vectorization. Since the fmin/fmax/fminimum/fmaximum intrinsics don't require a start value, this difference is only visible when masking of inactive lanes is required. Primary motivation of this change is simply to remove a difference between version of code which reason about the identity value of a reduction so I can kill all but one off. In review, it was pointed out that this is actually a functional fix as well. The old code used inf on a noinf reduction instruction - whose result is poison! That wasn't the intent of the code.
1 parent eec1fac commit 1fbb6b4

File tree

4 files changed

+13
-12
lines changed

4 files changed

+13
-12
lines changed

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,17 +1058,18 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
10581058
return ConstantInt::get(Tp,
10591059
APInt::getSignedMinValue(Tp->getIntegerBitWidth()));
10601060
case RecurKind::FMin:
1061-
assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
1062-
"nnan, nsz is expected to be set for FP min reduction.");
1063-
return ConstantFP::getInfinity(Tp, false /*Negative*/);
10641061
case RecurKind::FMax:
10651062
assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
1066-
"nnan, nsz is expected to be set for FP max reduction.");
1067-
return ConstantFP::getInfinity(Tp, true /*Negative*/);
1063+
"nnan, nsz is expected to be set for FP min/max reduction.");
1064+
[[fallthrough]];
10681065
case RecurKind::FMinimum:
1069-
return ConstantFP::getInfinity(Tp, false /*Negative*/);
1070-
case RecurKind::FMaximum:
1071-
return ConstantFP::getInfinity(Tp, true /*Negative*/);
1066+
case RecurKind::FMaximum: {
1067+
bool Negative = K == RecurKind::FMax || K == RecurKind::FMaximum;
1068+
const fltSemantics &Semantics = Tp->getFltSemantics();
1069+
return !FMF.noInfs()
1070+
? ConstantFP::getInfinity(Tp, Negative)
1071+
: ConstantFP::get(Tp, APFloat::getLargest(Semantics, Negative));
1072+
}
10721073
case RecurKind::IAnyOf:
10731074
case RecurKind::FAnyOf:
10741075
llvm_unreachable("No meaningful identity for recurrence kind");

llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) {
115115
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP6]]
116116
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0
117117
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison)
118-
; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0x7FF0000000000000, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
118+
; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0x47EFFFFFE0000000, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
119119
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP12]])
120120
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]]
121121
; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]]

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,7 +1270,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
12701270
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]]
12711271
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
12721272
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
1273-
; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmin.nxv4f32(float 0x7FF0000000000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
1273+
; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmin.nxv4f32(float 0x47EFFFFFE0000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
12741274
; IF-EVL-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP14]], [[VEC_PHI]]
12751275
; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]]
12761276
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
@@ -1392,7 +1392,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
13921392
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]]
13931393
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
13941394
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
1395-
; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmax.nxv4f32(float 0xFFF0000000000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
1395+
; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmax.nxv4f32(float 0xC7EFFFFFE0000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
13961396
; IF-EVL-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt float [[TMP14]], [[VEC_PHI]]
13971397
; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]]
13981398
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64

llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) {
169169
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
170170
; CHECK: pred.load.continue6:
171171
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ]
172-
; CHECK-NEXT: [[TMP25:%.*]] = select fast <4 x i1> [[TMP1]], <4 x float> [[TMP24]], <4 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
172+
; CHECK-NEXT: [[TMP25:%.*]] = select fast <4 x i1> [[TMP1]], <4 x float> [[TMP24]], <4 x float> <float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000>
173173
; CHECK-NEXT: [[TMP26:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP25]])
174174
; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = call fast float @llvm.minnum.f32(float [[TMP26]], float [[VEC_PHI]])
175175
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

0 commit comments

Comments
 (0)