Skip to content

Commit c91d6ff

Browse files
committed
[NVPTX] deprecate nvvm.rotate.* intrinsics, cleanup funnel-shift handeling
1 parent 660cc98 commit c91d6ff

File tree

6 files changed

+374
-438
lines changed

6 files changed

+374
-438
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4489,22 +4489,6 @@ def int_nvvm_sust_p_3d_v4i32_trap
44894489
"llvm.nvvm.sust.p.3d.v4i32.trap">,
44904490
ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
44914491

4492-
4493-
def int_nvvm_rotate_b32
4494-
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4495-
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">,
4496-
ClangBuiltin<"__nvvm_rotate_b32">;
4497-
4498-
def int_nvvm_rotate_b64
4499-
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
4500-
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">,
4501-
ClangBuiltin<"__nvvm_rotate_b64">;
4502-
4503-
def int_nvvm_rotate_right_b64
4504-
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
4505-
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">,
4506-
ClangBuiltin<"__nvvm_rotate_right_b64">;
4507-
45084492
def int_nvvm_swap_lo_hi_b64
45094493
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
45104494
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 102 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,6 +1268,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
12681268
else if (Name.consume_front("atomic.load.add."))
12691269
// nvvm.atomic.load.add.{f32.p,f64.p}
12701270
Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1271+
else if (Name.consume_front("rotate."))
1272+
// nvvm.rotate.{b32,b64,right.b64}
1273+
Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
12711274
else
12721275
Expand = false;
12731276

@@ -2254,6 +2257,104 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
22542257
}
22552258
}
22562259

2260+
static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2261+
Function *F, IRBuilder<> &Builder) {
2262+
Value *Rep = nullptr;
2263+
2264+
if (Name == "abs.i" || Name == "abs.ll") {
2265+
Value *Arg = CI->getArgOperand(0);
2266+
Value *Neg = Builder.CreateNeg(Arg, "neg");
2267+
Value *Cmp = Builder.CreateICmpSGE(
2268+
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2269+
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2270+
} else if (Name.starts_with("atomic.load.add.f32.p") ||
2271+
Name.starts_with("atomic.load.add.f64.p")) {
2272+
Value *Ptr = CI->getArgOperand(0);
2273+
Value *Val = CI->getArgOperand(1);
2274+
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2275+
AtomicOrdering::SequentiallyConsistent);
2276+
} else if (Name.consume_front("max.") &&
2277+
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2278+
Name == "ui" || Name == "ull")) {
2279+
Value *Arg0 = CI->getArgOperand(0);
2280+
Value *Arg1 = CI->getArgOperand(1);
2281+
Value *Cmp = Name.starts_with("u")
2282+
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2283+
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2284+
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2285+
} else if (Name.consume_front("min.") &&
2286+
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2287+
Name == "ui" || Name == "ull")) {
2288+
Value *Arg0 = CI->getArgOperand(0);
2289+
Value *Arg1 = CI->getArgOperand(1);
2290+
Value *Cmp = Name.starts_with("u")
2291+
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2292+
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2293+
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2294+
} else if (Name == "clz.ll") {
2295+
// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2296+
Value *Arg = CI->getArgOperand(0);
2297+
Value *Ctlz = Builder.CreateCall(
2298+
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
2299+
{Arg->getType()}),
2300+
{Arg, Builder.getFalse()}, "ctlz");
2301+
Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2302+
} else if (Name == "popc.ll") {
2303+
// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2304+
// i64.
2305+
Value *Arg = CI->getArgOperand(0);
2306+
Value *Popc = Builder.CreateCall(
2307+
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
2308+
{Arg->getType()}),
2309+
Arg, "ctpop");
2310+
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2311+
} else if (Name == "h2f") {
2312+
Rep = Builder.CreateCall(
2313+
Intrinsic::getDeclaration(F->getParent(), Intrinsic::convert_from_fp16,
2314+
{Builder.getFloatTy()}),
2315+
CI->getArgOperand(0), "h2f");
2316+
} else if (Name == "rotate.b32") {
2317+
Value *Arg = CI->getOperand(0);
2318+
Value *ShiftAmt = CI->getOperand(1);
2319+
Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2320+
{Arg, Arg, ShiftAmt});
2321+
} else if (Name == "rotate.b64") {
2322+
Type *Int64Ty = Builder.getInt64Ty();
2323+
Value *Arg = CI->getOperand(0);
2324+
Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2325+
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2326+
{Arg, Arg, ZExtShiftAmt});
2327+
} else if (Name == "rotate.right.b64") {
2328+
Type *Int64Ty = Builder.getInt64Ty();
2329+
Value *Arg = CI->getOperand(0);
2330+
Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2331+
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2332+
{Arg, Arg, ZExtShiftAmt});
2333+
} else {
2334+
Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2335+
if (IID != Intrinsic::not_intrinsic &&
2336+
!F->getReturnType()->getScalarType()->isBFloatTy()) {
2337+
rename(F);
2338+
Function *NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
2339+
SmallVector<Value *, 2> Args;
2340+
for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2341+
Value *Arg = CI->getArgOperand(I);
2342+
Type *OldType = Arg->getType();
2343+
Type *NewType = NewFn->getArg(I)->getType();
2344+
Args.push_back(
2345+
(OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2346+
? Builder.CreateBitCast(Arg, NewType)
2347+
: Arg);
2348+
}
2349+
Rep = Builder.CreateCall(NewFn, Args);
2350+
if (F->getReturnType()->isIntegerTy())
2351+
Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2352+
}
2353+
}
2354+
2355+
return Rep;
2356+
}
2357+
22572358
static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
22582359
IRBuilder<> &Builder) {
22592360
LLVMContext &C = F->getContext();
@@ -4204,81 +4305,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
42044305

42054306
if (!IsX86 && Name == "stackprotectorcheck") {
42064307
Rep = nullptr;
4207-
} else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4208-
Value *Arg = CI->getArgOperand(0);
4209-
Value *Neg = Builder.CreateNeg(Arg, "neg");
4210-
Value *Cmp = Builder.CreateICmpSGE(
4211-
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4212-
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4213-
} else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4214-
Name.starts_with("atomic.load.add.f64.p"))) {
4215-
Value *Ptr = CI->getArgOperand(0);
4216-
Value *Val = CI->getArgOperand(1);
4217-
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4218-
AtomicOrdering::SequentiallyConsistent);
4219-
} else if (IsNVVM && Name.consume_front("max.") &&
4220-
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4221-
Name == "ui" || Name == "ull")) {
4222-
Value *Arg0 = CI->getArgOperand(0);
4223-
Value *Arg1 = CI->getArgOperand(1);
4224-
Value *Cmp = Name.starts_with("u")
4225-
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4226-
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4227-
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4228-
} else if (IsNVVM && Name.consume_front("min.") &&
4229-
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4230-
Name == "ui" || Name == "ull")) {
4231-
Value *Arg0 = CI->getArgOperand(0);
4232-
Value *Arg1 = CI->getArgOperand(1);
4233-
Value *Cmp = Name.starts_with("u")
4234-
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4235-
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4236-
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4237-
} else if (IsNVVM && Name == "clz.ll") {
4238-
// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4239-
Value *Arg = CI->getArgOperand(0);
4240-
Value *Ctlz = Builder.CreateCall(
4241-
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4242-
{Arg->getType()}),
4243-
{Arg, Builder.getFalse()}, "ctlz");
4244-
Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4245-
} else if (IsNVVM && Name == "popc.ll") {
4246-
// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4247-
// i64.
4248-
Value *Arg = CI->getArgOperand(0);
4249-
Value *Popc = Builder.CreateCall(
4250-
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4251-
{Arg->getType()}),
4252-
Arg, "ctpop");
4253-
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
42544308
} else if (IsNVVM) {
4255-
if (Name == "h2f") {
4256-
Rep =
4257-
Builder.CreateCall(Intrinsic::getDeclaration(
4258-
F->getParent(), Intrinsic::convert_from_fp16,
4259-
{Builder.getFloatTy()}),
4260-
CI->getArgOperand(0), "h2f");
4261-
} else {
4262-
Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
4263-
if (IID != Intrinsic::not_intrinsic &&
4264-
!F->getReturnType()->getScalarType()->isBFloatTy()) {
4265-
rename(F);
4266-
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4267-
SmallVector<Value *, 2> Args;
4268-
for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4269-
Value *Arg = CI->getArgOperand(I);
4270-
Type *OldType = Arg->getType();
4271-
Type *NewType = NewFn->getArg(I)->getType();
4272-
Args.push_back((OldType->isIntegerTy() &&
4273-
NewType->getScalarType()->isBFloatTy())
4274-
? Builder.CreateBitCast(Arg, NewType)
4275-
: Arg);
4276-
}
4277-
Rep = Builder.CreateCall(NewFn, Args);
4278-
if (F->getReturnType()->isIntegerTy())
4279-
Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4280-
}
4281-
}
4309+
Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
42824310
} else if (IsX86) {
42834311
Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
42844312
} else if (IsARM) {

0 commit comments

Comments
 (0)