Skip to content

Commit a629d9e

Browse files
[NVPTX] Constant-folding for f2i, d2ui, f2ll etc. (llvm#118965)
Add constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers.
1 parent c274837 commit a629d9e

File tree

7 files changed

+2575
-41
lines changed

7 files changed

+2575
-41
lines changed

llvm/include/llvm/IR/NVVMIntrinsicFlags.h

Lines changed: 0 additions & 39 deletions
This file was deleted.
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
//===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This file contains the definitions of the enumerations and flags
11+
/// associated with NVVM Intrinsics, along with some helper functions.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef LLVM_IR_NVVMINTRINSICUTILS_H
16+
#define LLVM_IR_NVVMINTRINSICUTILS_H
17+
18+
#include <stdint.h>
19+
20+
#include "llvm/ADT/APFloat.h"
21+
#include "llvm/IR/Intrinsics.h"
22+
#include "llvm/IR/IntrinsicsNVPTX.h"
23+
24+
namespace llvm {
25+
namespace nvvm {
26+
27+
// Reduction Ops supported with TMA Copy from Shared
28+
// to Global Memory for the "cp.reduce.async.bulk.tensor.*"
29+
// family of PTX instructions.
30+
enum class TMAReductionOp : uint8_t {
31+
ADD = 0,
32+
MIN = 1,
33+
MAX = 2,
34+
INC = 3,
35+
DEC = 4,
36+
AND = 5,
37+
OR = 6,
38+
XOR = 7,
39+
};
40+
41+
inline bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
42+
switch (IntrinsicID) {
43+
// Float to i32 / i64 conversion intrinsics:
44+
case Intrinsic::nvvm_f2i_rm_ftz:
45+
case Intrinsic::nvvm_f2i_rn_ftz:
46+
case Intrinsic::nvvm_f2i_rp_ftz:
47+
case Intrinsic::nvvm_f2i_rz_ftz:
48+
49+
case Intrinsic::nvvm_f2ui_rm_ftz:
50+
case Intrinsic::nvvm_f2ui_rn_ftz:
51+
case Intrinsic::nvvm_f2ui_rp_ftz:
52+
case Intrinsic::nvvm_f2ui_rz_ftz:
53+
54+
case Intrinsic::nvvm_f2ll_rm_ftz:
55+
case Intrinsic::nvvm_f2ll_rn_ftz:
56+
case Intrinsic::nvvm_f2ll_rp_ftz:
57+
case Intrinsic::nvvm_f2ll_rz_ftz:
58+
59+
case Intrinsic::nvvm_f2ull_rm_ftz:
60+
case Intrinsic::nvvm_f2ull_rn_ftz:
61+
case Intrinsic::nvvm_f2ull_rp_ftz:
62+
case Intrinsic::nvvm_f2ull_rz_ftz:
63+
return true;
64+
}
65+
return false;
66+
}
67+
68+
inline bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
69+
switch (IntrinsicID) {
70+
// f2i
71+
case Intrinsic::nvvm_f2i_rm:
72+
case Intrinsic::nvvm_f2i_rm_ftz:
73+
case Intrinsic::nvvm_f2i_rn:
74+
case Intrinsic::nvvm_f2i_rn_ftz:
75+
case Intrinsic::nvvm_f2i_rp:
76+
case Intrinsic::nvvm_f2i_rp_ftz:
77+
case Intrinsic::nvvm_f2i_rz:
78+
case Intrinsic::nvvm_f2i_rz_ftz:
79+
// d2i
80+
case Intrinsic::nvvm_d2i_rm:
81+
case Intrinsic::nvvm_d2i_rn:
82+
case Intrinsic::nvvm_d2i_rp:
83+
case Intrinsic::nvvm_d2i_rz:
84+
// f2ll
85+
case Intrinsic::nvvm_f2ll_rm:
86+
case Intrinsic::nvvm_f2ll_rm_ftz:
87+
case Intrinsic::nvvm_f2ll_rn:
88+
case Intrinsic::nvvm_f2ll_rn_ftz:
89+
case Intrinsic::nvvm_f2ll_rp:
90+
case Intrinsic::nvvm_f2ll_rp_ftz:
91+
case Intrinsic::nvvm_f2ll_rz:
92+
case Intrinsic::nvvm_f2ll_rz_ftz:
93+
// d2ll
94+
case Intrinsic::nvvm_d2ll_rm:
95+
case Intrinsic::nvvm_d2ll_rn:
96+
case Intrinsic::nvvm_d2ll_rp:
97+
case Intrinsic::nvvm_d2ll_rz:
98+
return true;
99+
}
100+
return false;
101+
}
102+
103+
inline APFloat::roundingMode
104+
IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
105+
switch (IntrinsicID) {
106+
// RM:
107+
case Intrinsic::nvvm_f2i_rm:
108+
case Intrinsic::nvvm_f2ui_rm:
109+
case Intrinsic::nvvm_f2i_rm_ftz:
110+
case Intrinsic::nvvm_f2ui_rm_ftz:
111+
case Intrinsic::nvvm_d2i_rm:
112+
case Intrinsic::nvvm_d2ui_rm:
113+
114+
case Intrinsic::nvvm_f2ll_rm:
115+
case Intrinsic::nvvm_f2ull_rm:
116+
case Intrinsic::nvvm_f2ll_rm_ftz:
117+
case Intrinsic::nvvm_f2ull_rm_ftz:
118+
case Intrinsic::nvvm_d2ll_rm:
119+
case Intrinsic::nvvm_d2ull_rm:
120+
return APFloat::rmTowardNegative;
121+
122+
// RN:
123+
case Intrinsic::nvvm_f2i_rn:
124+
case Intrinsic::nvvm_f2ui_rn:
125+
case Intrinsic::nvvm_f2i_rn_ftz:
126+
case Intrinsic::nvvm_f2ui_rn_ftz:
127+
case Intrinsic::nvvm_d2i_rn:
128+
case Intrinsic::nvvm_d2ui_rn:
129+
130+
case Intrinsic::nvvm_f2ll_rn:
131+
case Intrinsic::nvvm_f2ull_rn:
132+
case Intrinsic::nvvm_f2ll_rn_ftz:
133+
case Intrinsic::nvvm_f2ull_rn_ftz:
134+
case Intrinsic::nvvm_d2ll_rn:
135+
case Intrinsic::nvvm_d2ull_rn:
136+
return APFloat::rmNearestTiesToEven;
137+
138+
// RP:
139+
case Intrinsic::nvvm_f2i_rp:
140+
case Intrinsic::nvvm_f2ui_rp:
141+
case Intrinsic::nvvm_f2i_rp_ftz:
142+
case Intrinsic::nvvm_f2ui_rp_ftz:
143+
case Intrinsic::nvvm_d2i_rp:
144+
case Intrinsic::nvvm_d2ui_rp:
145+
146+
case Intrinsic::nvvm_f2ll_rp:
147+
case Intrinsic::nvvm_f2ull_rp:
148+
case Intrinsic::nvvm_f2ll_rp_ftz:
149+
case Intrinsic::nvvm_f2ull_rp_ftz:
150+
case Intrinsic::nvvm_d2ll_rp:
151+
case Intrinsic::nvvm_d2ull_rp:
152+
return APFloat::rmTowardPositive;
153+
154+
// RZ:
155+
case Intrinsic::nvvm_f2i_rz:
156+
case Intrinsic::nvvm_f2ui_rz:
157+
case Intrinsic::nvvm_f2i_rz_ftz:
158+
case Intrinsic::nvvm_f2ui_rz_ftz:
159+
case Intrinsic::nvvm_d2i_rz:
160+
case Intrinsic::nvvm_d2ui_rz:
161+
162+
case Intrinsic::nvvm_f2ll_rz:
163+
case Intrinsic::nvvm_f2ull_rz:
164+
case Intrinsic::nvvm_f2ll_rz_ftz:
165+
case Intrinsic::nvvm_f2ull_rz_ftz:
166+
case Intrinsic::nvvm_d2ll_rz:
167+
case Intrinsic::nvvm_d2ull_rz:
168+
return APFloat::rmTowardZero;
169+
}
170+
llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
171+
return APFloat::roundingMode::Invalid;
172+
}
173+
174+
} // namespace nvvm
175+
} // namespace llvm
176+
#endif // LLVM_IR_NVVMINTRINSICUTILS_H

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,10 @@
4545
#include "llvm/IR/IntrinsicsAArch64.h"
4646
#include "llvm/IR/IntrinsicsAMDGPU.h"
4747
#include "llvm/IR/IntrinsicsARM.h"
48+
#include "llvm/IR/IntrinsicsNVPTX.h"
4849
#include "llvm/IR/IntrinsicsWebAssembly.h"
4950
#include "llvm/IR/IntrinsicsX86.h"
51+
#include "llvm/IR/NVVMIntrinsicUtils.h"
5052
#include "llvm/IR/Operator.h"
5153
#include "llvm/IR/Type.h"
5254
#include "llvm/IR/Value.h"
@@ -1687,6 +1689,58 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
16871689
case Intrinsic::x86_avx512_cvttsd2usi64:
16881690
return !Call->isStrictFP();
16891691

1692+
// NVVM float/double to int32/uint32 conversion intrinsics
1693+
case Intrinsic::nvvm_f2i_rm:
1694+
case Intrinsic::nvvm_f2i_rn:
1695+
case Intrinsic::nvvm_f2i_rp:
1696+
case Intrinsic::nvvm_f2i_rz:
1697+
case Intrinsic::nvvm_f2i_rm_ftz:
1698+
case Intrinsic::nvvm_f2i_rn_ftz:
1699+
case Intrinsic::nvvm_f2i_rp_ftz:
1700+
case Intrinsic::nvvm_f2i_rz_ftz:
1701+
case Intrinsic::nvvm_f2ui_rm:
1702+
case Intrinsic::nvvm_f2ui_rn:
1703+
case Intrinsic::nvvm_f2ui_rp:
1704+
case Intrinsic::nvvm_f2ui_rz:
1705+
case Intrinsic::nvvm_f2ui_rm_ftz:
1706+
case Intrinsic::nvvm_f2ui_rn_ftz:
1707+
case Intrinsic::nvvm_f2ui_rp_ftz:
1708+
case Intrinsic::nvvm_f2ui_rz_ftz:
1709+
case Intrinsic::nvvm_d2i_rm:
1710+
case Intrinsic::nvvm_d2i_rn:
1711+
case Intrinsic::nvvm_d2i_rp:
1712+
case Intrinsic::nvvm_d2i_rz:
1713+
case Intrinsic::nvvm_d2ui_rm:
1714+
case Intrinsic::nvvm_d2ui_rn:
1715+
case Intrinsic::nvvm_d2ui_rp:
1716+
case Intrinsic::nvvm_d2ui_rz:
1717+
1718+
// NVVM float/double to int64/uint64 conversion intrinsics
1719+
case Intrinsic::nvvm_f2ll_rm:
1720+
case Intrinsic::nvvm_f2ll_rn:
1721+
case Intrinsic::nvvm_f2ll_rp:
1722+
case Intrinsic::nvvm_f2ll_rz:
1723+
case Intrinsic::nvvm_f2ll_rm_ftz:
1724+
case Intrinsic::nvvm_f2ll_rn_ftz:
1725+
case Intrinsic::nvvm_f2ll_rp_ftz:
1726+
case Intrinsic::nvvm_f2ll_rz_ftz:
1727+
case Intrinsic::nvvm_f2ull_rm:
1728+
case Intrinsic::nvvm_f2ull_rn:
1729+
case Intrinsic::nvvm_f2ull_rp:
1730+
case Intrinsic::nvvm_f2ull_rz:
1731+
case Intrinsic::nvvm_f2ull_rm_ftz:
1732+
case Intrinsic::nvvm_f2ull_rn_ftz:
1733+
case Intrinsic::nvvm_f2ull_rp_ftz:
1734+
case Intrinsic::nvvm_f2ull_rz_ftz:
1735+
case Intrinsic::nvvm_d2ll_rm:
1736+
case Intrinsic::nvvm_d2ll_rn:
1737+
case Intrinsic::nvvm_d2ll_rp:
1738+
case Intrinsic::nvvm_d2ll_rz:
1739+
case Intrinsic::nvvm_d2ull_rm:
1740+
case Intrinsic::nvvm_d2ull_rn:
1741+
case Intrinsic::nvvm_d2ull_rp:
1742+
case Intrinsic::nvvm_d2ull_rz:
1743+
16901744
// Sign operations are actually bitwise operations, they do not raise
16911745
// exceptions even for SNANs.
16921746
case Intrinsic::fabs:
@@ -1849,6 +1903,12 @@ inline bool llvm_fenv_testexcept() {
18491903
return false;
18501904
}
18511905

1906+
static const APFloat FTZPreserveSign(const APFloat &V) {
1907+
if (V.isDenormal())
1908+
return APFloat::getZero(V.getSemantics(), V.isNegative());
1909+
return V;
1910+
}
1911+
18521912
Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
18531913
Type *Ty) {
18541914
llvm_fenv_clearexcept();
@@ -2309,6 +2369,85 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
23092369
return ConstantFP::get(Ty->getContext(), U);
23102370
}
23112371

2372+
// NVVM float/double to signed/unsigned int32/int64 conversions:
2373+
switch (IntrinsicID) {
2374+
// f2i
2375+
case Intrinsic::nvvm_f2i_rm:
2376+
case Intrinsic::nvvm_f2i_rn:
2377+
case Intrinsic::nvvm_f2i_rp:
2378+
case Intrinsic::nvvm_f2i_rz:
2379+
case Intrinsic::nvvm_f2i_rm_ftz:
2380+
case Intrinsic::nvvm_f2i_rn_ftz:
2381+
case Intrinsic::nvvm_f2i_rp_ftz:
2382+
case Intrinsic::nvvm_f2i_rz_ftz:
2383+
// f2ui
2384+
case Intrinsic::nvvm_f2ui_rm:
2385+
case Intrinsic::nvvm_f2ui_rn:
2386+
case Intrinsic::nvvm_f2ui_rp:
2387+
case Intrinsic::nvvm_f2ui_rz:
2388+
case Intrinsic::nvvm_f2ui_rm_ftz:
2389+
case Intrinsic::nvvm_f2ui_rn_ftz:
2390+
case Intrinsic::nvvm_f2ui_rp_ftz:
2391+
case Intrinsic::nvvm_f2ui_rz_ftz:
2392+
// d2i
2393+
case Intrinsic::nvvm_d2i_rm:
2394+
case Intrinsic::nvvm_d2i_rn:
2395+
case Intrinsic::nvvm_d2i_rp:
2396+
case Intrinsic::nvvm_d2i_rz:
2397+
// d2ui
2398+
case Intrinsic::nvvm_d2ui_rm:
2399+
case Intrinsic::nvvm_d2ui_rn:
2400+
case Intrinsic::nvvm_d2ui_rp:
2401+
case Intrinsic::nvvm_d2ui_rz:
2402+
// f2ll
2403+
case Intrinsic::nvvm_f2ll_rm:
2404+
case Intrinsic::nvvm_f2ll_rn:
2405+
case Intrinsic::nvvm_f2ll_rp:
2406+
case Intrinsic::nvvm_f2ll_rz:
2407+
case Intrinsic::nvvm_f2ll_rm_ftz:
2408+
case Intrinsic::nvvm_f2ll_rn_ftz:
2409+
case Intrinsic::nvvm_f2ll_rp_ftz:
2410+
case Intrinsic::nvvm_f2ll_rz_ftz:
2411+
// f2ull
2412+
case Intrinsic::nvvm_f2ull_rm:
2413+
case Intrinsic::nvvm_f2ull_rn:
2414+
case Intrinsic::nvvm_f2ull_rp:
2415+
case Intrinsic::nvvm_f2ull_rz:
2416+
case Intrinsic::nvvm_f2ull_rm_ftz:
2417+
case Intrinsic::nvvm_f2ull_rn_ftz:
2418+
case Intrinsic::nvvm_f2ull_rp_ftz:
2419+
case Intrinsic::nvvm_f2ull_rz_ftz:
2420+
// d2ll
2421+
case Intrinsic::nvvm_d2ll_rm:
2422+
case Intrinsic::nvvm_d2ll_rn:
2423+
case Intrinsic::nvvm_d2ll_rp:
2424+
case Intrinsic::nvvm_d2ll_rz:
2425+
// d2ull
2426+
case Intrinsic::nvvm_d2ull_rm:
2427+
case Intrinsic::nvvm_d2ull_rn:
2428+
case Intrinsic::nvvm_d2ull_rp:
2429+
case Intrinsic::nvvm_d2ull_rz: {
2430+
// In float-to-integer conversion, NaN inputs are converted to 0.
2431+
if (U.isNaN())
2432+
return ConstantInt::get(Ty, 0);
2433+
2434+
APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID);
2435+
bool IsFTZ = nvvm::IntrinsicShouldFTZ(IntrinsicID);
2436+
bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID);
2437+
2438+
APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
2439+
auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;
2440+
2441+
bool IsExact = false;
2442+
APFloat::opStatus Status =
2443+
FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
2444+
2445+
if (Status != APFloat::opInvalidOp)
2446+
return ConstantInt::get(Ty, ResInt);
2447+
return nullptr;
2448+
}
2449+
}
2450+
23122451
/// We only fold functions with finite arguments. Folding NaN and inf is
23132452
/// likely to be aborted with an exception anyway, and some host libms
23142453
/// have known errors raising exceptions.

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include "NVPTX.h"
1515
#include "NVPTXUtilities.h"
1616
#include "llvm/ADT/StringRef.h"
17-
#include "llvm/IR/NVVMIntrinsicFlags.h"
17+
#include "llvm/IR/NVVMIntrinsicUtils.h"
1818
#include "llvm/MC/MCExpr.h"
1919
#include "llvm/MC/MCInst.h"
2020
#include "llvm/MC/MCInstrInfo.h"

0 commit comments

Comments
 (0)