Skip to content

[ValueTracking] Implement computeKnownFPClass for llvm.vector.reduce.{fmin,fmax,fmaximum,fminimum} #88408

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5008,6 +5008,19 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,

break;
}
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
case Intrinsic::vector_reduce_fmaximum:
case Intrinsic::vector_reduce_fminimum: {
// reduce min/max will choose an element from one of the vector elements,
// so we can infer and class information that is common to all elements.
Known = computeKnownFPClass(II->getArgOperand(0), II->getFastMathFlags(),
InterestedClasses, Depth + 1, Q);
// Can only propagate sign if output is never NaN.
if (!Known.isKnownNeverNaN())
Known.SignBit.reset();
break;
}
case Intrinsic::trunc:
case Intrinsic::floor:
case Intrinsic::ceil:
Expand Down
119 changes: 119 additions & 0 deletions llvm/test/Transforms/InstCombine/known-fpclass-reduce-signbit.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -S -passes=instcombine | FileCheck %s

define i1 @vector_reduce_maximum_signbit(<4 x double> nofpclass(nan nzero) %x) {
; CHECK-LABEL: define i1 @vector_reduce_maximum_signbit
; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) {
; CHECK-NEXT: ret i1 true
;
%x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
%op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %x.abs)
%cmp = fcmp oge double %op, 0.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use bitcast + icmp instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is the canonical form of signbit idiom: https://godbolt.org/z/zMnPPq6o3. I prefer to use the canonical form, although in cases with nnan/nsz both forms are fine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is more instructions, so it probably shouldn't be the canonical form

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although I suppose it works even in the nan case

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bitcast + icmp doesn't work. Needs for knownFPClass -> KnownBits patch. My preference would be to just get this in, but I can wait if there are strong feelings.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the purposes of this patch, I think it should just stay the fcmp. It's fewer operations and has the same effect for the test

ret i1 %cmp
}

define i1 @vector_reduce_maximum_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) {
; CHECK-LABEL: define i1 @vector_reduce_maximum_signbit_fail_maybe_nan
; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) {
; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]])
; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> [[X_ABS]])
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00
; CHECK-NEXT: ret i1 [[CMP]]
;
%x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
%op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %x.abs)
%cmp = fcmp oge double %op, 0.0
ret i1 %cmp
}


define i1 @vector_reduce_minimum_signbit(<4 x double> nofpclass(nan nzero) %x) {
; CHECK-LABEL: define i1 @vector_reduce_minimum_signbit
; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) {
; CHECK-NEXT: ret i1 true
;
%x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
%op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %x.abs)
%cmp = fcmp oge double %op, 0.0
ret i1 %cmp
}

define i1 @vector_reduce_minimum_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) {
; CHECK-LABEL: define i1 @vector_reduce_minimum_signbit_fail_maybe_nan
; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) {
; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]])
; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[X_ABS]])
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00
; CHECK-NEXT: ret i1 [[CMP]]
;
%x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
%op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %x.abs)
%cmp = fcmp oge double %op, 0.0
ret i1 %cmp
}

define i1 @vector_reduce_max_signbit(<4 x double> nofpclass(nan nzero) %x) {
; CHECK-LABEL: define i1 @vector_reduce_max_signbit
; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) {
; CHECK-NEXT: ret i1 true
;
%x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
%op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x.abs)
%cmp = fcmp oge double %op, 0.0
ret i1 %cmp
}

define i1 @vector_reduce_max_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) {
; CHECK-LABEL: define i1 @vector_reduce_max_signbit_fail_maybe_nan
; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) {
; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]])
; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[X_ABS]])
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00
; CHECK-NEXT: ret i1 [[CMP]]
;
%x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
%op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x.abs)
%cmp = fcmp oge double %op, 0.0
ret i1 %cmp
}


define i1 @vector_reduce_min_signbit(<4 x double> nofpclass(nan nzero) %x) {
; CHECK-LABEL: define i1 @vector_reduce_min_signbit
; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) {
; CHECK-NEXT: ret i1 true
;
%x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
%op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x.abs)
%cmp = fcmp oge double %op, 0.0
ret i1 %cmp
}

define i1 @vector_reduce_min_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) {
; CHECK-LABEL: define i1 @vector_reduce_min_signbit_fail_maybe_nan
; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) {
; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]])
; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[X_ABS]])
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00
; CHECK-NEXT: ret i1 [[CMP]]
;
%x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
%op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x.abs)
%cmp = fcmp oge double %op, 0.0
ret i1 %cmp
}



define i1 @vector_reduce_min_signbit_nnan_from_fmf(<4 x double> nofpclass(nzero) %x) {
; CHECK-LABEL: define i1 @vector_reduce_min_signbit_nnan_from_fmf
; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) {
; CHECK-NEXT: ret i1 true
;
%x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
%op = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x.abs)
%cmp = fcmp oge double %op, 0.0
ret i1 %cmp
}


100 changes: 100 additions & 0 deletions llvm/test/Transforms/InstSimplify/known-never-infinity.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,106 @@ define float @fcmp_ult_neginf_implies_class_assert(float %arg) {
ret float %mul_by_zero
}

define i1 @isKnownNeverInfinity_vector_reduce_maximum(<4 x double> %x) {
; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_maximum
; CHECK-SAME: (<4 x double> [[X:%.*]]) {
; CHECK-NEXT: ret i1 true
;
%ninf.x = fadd ninf <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
%op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %ninf.x)
%cmp = fcmp une double %op, 0x7ff0000000000000
ret i1 %cmp
}

define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail(<4 x double> %x) {
; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail
; CHECK-SAME: (<4 x double> [[X:%.*]]) {
; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> [[NINF_X]])
; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000
; CHECK-NEXT: ret i1 [[CMP]]
;
%ninf.x = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
%op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %ninf.x)
%cmp = fcmp une double %op, 0x7ff0000000000000
ret i1 %cmp
}

define i1 @isKnownNeverInfinity_vector_reduce_minimum(<4 x double> %x) {
; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_minimum
; CHECK-SAME: (<4 x double> [[X:%.*]]) {
; CHECK-NEXT: ret i1 true
;
%ninf.x = fadd ninf <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
%op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %ninf.x)
%cmp = fcmp une double %op, 0x7ff0000000000000
ret i1 %cmp
}

define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail(<4 x double> %x) {
; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail
; CHECK-SAME: (<4 x double> [[X:%.*]]) {
; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[NINF_X]])
; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000
; CHECK-NEXT: ret i1 [[CMP]]
;
%ninf.x = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
%op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %ninf.x)
%cmp = fcmp une double %op, 0x7ff0000000000000
ret i1 %cmp
}

define i1 @isKnownNeverInfinity_vector_reduce_fmax(<4 x double> %x) {
; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmax
; CHECK-SAME: (<4 x double> [[X:%.*]]) {
; CHECK-NEXT: ret i1 true
;
%ninf.x = fadd ninf <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
%op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %ninf.x)
%cmp = fcmp une double %op, 0x7ff0000000000000
ret i1 %cmp
}

define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail(<4 x double> %x) {
; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail
; CHECK-SAME: (<4 x double> [[X:%.*]]) {
; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[NINF_X]])
; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000
; CHECK-NEXT: ret i1 [[CMP]]
;
%ninf.x = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
%op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %ninf.x)
%cmp = fcmp une double %op, 0x7ff0000000000000
ret i1 %cmp
}

define i1 @isKnownNeverInfinity_vector_reduce_fmin(<4 x double> %x) {
; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmin
; CHECK-SAME: (<4 x double> [[X:%.*]]) {
; CHECK-NEXT: ret i1 true
;
%ninf.x = fadd ninf <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
%op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %ninf.x)
%cmp = fcmp une double %op, 0x7ff0000000000000
ret i1 %cmp
}

define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail(<4 x double> %x) {
; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail
; CHECK-SAME: (<4 x double> [[X:%.*]]) {
; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[NINF_X]])
; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000
; CHECK-NEXT: ret i1 [[CMP]]
;
%ninf.x = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
%op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %ninf.x)
%cmp = fcmp une double %op, 0x7ff0000000000000
ret i1 %cmp
}

declare double @llvm.arithmetic.fence.f64(double)
declare double @llvm.canonicalize.f64(double)
declare double @llvm.ceil.f64(double)
Expand Down