Skip to content

Commit 86a480e

Browse files
committed
[AMDGPU] Add simplification/combines for llvm.amdgcn.fmul.legacy
Differential Revision: https://reviews.llvm.org/D88955
1 parent 2415636 commit 86a480e

File tree

3 files changed

+91
-0
lines changed

3 files changed

+91
-0
lines changed

llvm/include/llvm/IR/PatternMatch.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,18 @@ inline cstfp_pred_ty<is_finite> m_Finite() {
619619
}
620620
inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; }
621621

622+
struct is_finitenonzero {
623+
bool isValue(const APFloat &C) { return C.isFiniteNonZero(); }
624+
};
625+
/// Match a finite non-zero FP constant.
626+
/// For vectors, this includes constants with undefined elements.
627+
inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() {
628+
return cstfp_pred_ty<is_finitenonzero>();
629+
}
630+
inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) {
631+
return V;
632+
}
633+
622634
struct is_any_zero_fp {
623635
bool isValue(const APFloat &C) { return C.isZero(); }
624636
};

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,39 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
823823

824824
break;
825825
}
826+
case Intrinsic::amdgcn_fmul_legacy: {
827+
Value *Op0 = II.getArgOperand(0);
828+
Value *Op1 = II.getArgOperand(1);
829+
830+
// The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
831+
// infinity, gives +0.0.
832+
// TODO: Move to InstSimplify?
833+
if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
834+
match(Op1, PatternMatch::m_AnyZeroFP()))
835+
return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType()));
836+
837+
// If we can prove we don't have one of the special cases then we can use a
838+
// normal fmul instruction instead.
839+
auto *TLI = &IC.getTargetLibraryInfo();
840+
bool CanSimplifyToMul = false;
841+
// TODO: Create and use isKnownFiniteNonZero instead of just matching
842+
// constants here.
843+
if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
844+
match(Op1, PatternMatch::m_FiniteNonZero())) {
845+
// One operand is not zero or infinity or NaN.
846+
CanSimplifyToMul = true;
847+
} else if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
848+
isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
849+
// Neither operand is infinity or NaN.
850+
CanSimplifyToMul = true;
851+
}
852+
if (CanSimplifyToMul) {
853+
auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
854+
FMul->takeName(&II);
855+
return IC.replaceInstUsesWith(II, FMul);
856+
}
857+
break;
858+
}
826859
default: {
827860
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
828861
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -instcombine -S | FileCheck %s
3+
4+
; Simplify to +0.0.
5+
define float @test_zero(float %x) {
6+
; CHECK-LABEL: @test_zero(
7+
; CHECK-NEXT: ret float 0.000000e+00
8+
;
9+
%call = call float @llvm.amdgcn.fmul.legacy(float %x, float 0.0)
10+
ret float %call
11+
}
12+
13+
; Simplify to +0.0.
14+
define float @test_negzero(float %y) {
15+
; CHECK-LABEL: @test_negzero(
16+
; CHECK-NEXT: ret float 0.000000e+00
17+
;
18+
%call = call float @llvm.amdgcn.fmul.legacy(float -0.0, float %y)
19+
ret float %call
20+
}
21+
22+
; Combine to fmul because the constant is finite and non-zero.
23+
define float @test_const(float %x) {
24+
; CHECK-LABEL: @test_const(
25+
; CHECK-NEXT: [[CALL:%.*]] = fmul float [[X:%.*]], 9.950000e+01
26+
; CHECK-NEXT: ret float [[CALL]]
27+
;
28+
%call = call float @llvm.amdgcn.fmul.legacy(float %x, float 99.5)
29+
ret float %call
30+
}
31+
32+
; Combine to fmul because neither argument can be infinity or NaN.
33+
define float @test_finite(i32 %x, i32 %y) {
34+
; CHECK-LABEL: @test_finite(
35+
; CHECK-NEXT: [[XF:%.*]] = sitofp i32 [[X:%.*]] to float
36+
; CHECK-NEXT: [[YF:%.*]] = sitofp i32 [[Y:%.*]] to float
37+
; CHECK-NEXT: [[CALL:%.*]] = fmul float [[XF]], [[YF]]
38+
; CHECK-NEXT: ret float [[CALL]]
39+
;
40+
%xf = sitofp i32 %x to float
41+
%yf = sitofp i32 %y to float
42+
%call = call float @llvm.amdgcn.fmul.legacy(float %xf, float %yf)
43+
ret float %call
44+
}
45+
46+
declare float @llvm.amdgcn.fmul.legacy(float, float)

0 commit comments

Comments
 (0)