@@ -1268,6 +1268,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
1268
1268
else if (Name.consume_front (" atomic.load.add." ))
1269
1269
// nvvm.atomic.load.add.{f32.p,f64.p}
1270
1270
Expand = Name.starts_with (" f32.p" ) || Name.starts_with (" f64.p" );
1271
+ else if (Name.consume_front (" rotate." ))
1272
+ // nvvm.rotate.{b32,b64,right.b64}
1273
+ Expand = Name == " b32" || Name == " b64" || Name == " right.b64" ;
1271
1274
else
1272
1275
Expand = false ;
1273
1276
@@ -2254,6 +2257,104 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2254
2257
}
2255
2258
}
2256
2259
2260
+ static Value *upgradeNVVMIntrinsicCall (StringRef Name, CallBase *CI,
2261
+ Function *F, IRBuilder<> &Builder) {
2262
+ Value *Rep = nullptr ;
2263
+
2264
+ if (Name == " abs.i" || Name == " abs.ll" ) {
2265
+ Value *Arg = CI->getArgOperand (0 );
2266
+ Value *Neg = Builder.CreateNeg (Arg, " neg" );
2267
+ Value *Cmp = Builder.CreateICmpSGE (
2268
+ Arg, llvm::Constant::getNullValue (Arg->getType ()), " abs.cond" );
2269
+ Rep = Builder.CreateSelect (Cmp, Arg, Neg, " abs" );
2270
+ } else if (Name.starts_with (" atomic.load.add.f32.p" ) ||
2271
+ Name.starts_with (" atomic.load.add.f64.p" )) {
2272
+ Value *Ptr = CI->getArgOperand (0 );
2273
+ Value *Val = CI->getArgOperand (1 );
2274
+ Rep = Builder.CreateAtomicRMW (AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign (),
2275
+ AtomicOrdering::SequentiallyConsistent);
2276
+ } else if (Name.consume_front (" max." ) &&
2277
+ (Name == " s" || Name == " i" || Name == " ll" || Name == " us" ||
2278
+ Name == " ui" || Name == " ull" )) {
2279
+ Value *Arg0 = CI->getArgOperand (0 );
2280
+ Value *Arg1 = CI->getArgOperand (1 );
2281
+ Value *Cmp = Name.starts_with (" u" )
2282
+ ? Builder.CreateICmpUGE (Arg0, Arg1, " max.cond" )
2283
+ : Builder.CreateICmpSGE (Arg0, Arg1, " max.cond" );
2284
+ Rep = Builder.CreateSelect (Cmp, Arg0, Arg1, " max" );
2285
+ } else if (Name.consume_front (" min." ) &&
2286
+ (Name == " s" || Name == " i" || Name == " ll" || Name == " us" ||
2287
+ Name == " ui" || Name == " ull" )) {
2288
+ Value *Arg0 = CI->getArgOperand (0 );
2289
+ Value *Arg1 = CI->getArgOperand (1 );
2290
+ Value *Cmp = Name.starts_with (" u" )
2291
+ ? Builder.CreateICmpULE (Arg0, Arg1, " min.cond" )
2292
+ : Builder.CreateICmpSLE (Arg0, Arg1, " min.cond" );
2293
+ Rep = Builder.CreateSelect (Cmp, Arg0, Arg1, " min" );
2294
+ } else if (Name == " clz.ll" ) {
2295
+ // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2296
+ Value *Arg = CI->getArgOperand (0 );
2297
+ Value *Ctlz = Builder.CreateCall (
2298
+ Intrinsic::getDeclaration (F->getParent (), Intrinsic::ctlz,
2299
+ {Arg->getType ()}),
2300
+ {Arg, Builder.getFalse ()}, " ctlz" );
2301
+ Rep = Builder.CreateTrunc (Ctlz, Builder.getInt32Ty (), " ctlz.trunc" );
2302
+ } else if (Name == " popc.ll" ) {
2303
+ // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2304
+ // i64.
2305
+ Value *Arg = CI->getArgOperand (0 );
2306
+ Value *Popc = Builder.CreateCall (
2307
+ Intrinsic::getDeclaration (F->getParent (), Intrinsic::ctpop,
2308
+ {Arg->getType ()}),
2309
+ Arg, " ctpop" );
2310
+ Rep = Builder.CreateTrunc (Popc, Builder.getInt32Ty (), " ctpop.trunc" );
2311
+ } else if (Name == " h2f" ) {
2312
+ Rep = Builder.CreateCall (
2313
+ Intrinsic::getDeclaration (F->getParent (), Intrinsic::convert_from_fp16,
2314
+ {Builder.getFloatTy ()}),
2315
+ CI->getArgOperand (0 ), " h2f" );
2316
+ } else if (Name == " rotate.b32" ) {
2317
+ Value *Arg = CI->getOperand (0 );
2318
+ Value *ShiftAmt = CI->getOperand (1 );
2319
+ Rep = Builder.CreateIntrinsic (Builder.getInt32Ty (), Intrinsic::fshl,
2320
+ {Arg, Arg, ShiftAmt});
2321
+ } else if (Name == " rotate.b64" ) {
2322
+ Type *Int64Ty = Builder.getInt64Ty ();
2323
+ Value *Arg = CI->getOperand (0 );
2324
+ Value *ZExtShiftAmt = Builder.CreateZExt (CI->getOperand (1 ), Int64Ty);
2325
+ Rep = Builder.CreateIntrinsic (Int64Ty, Intrinsic::fshl,
2326
+ {Arg, Arg, ZExtShiftAmt});
2327
+ } else if (Name == " rotate.right.b64" ) {
2328
+ Type *Int64Ty = Builder.getInt64Ty ();
2329
+ Value *Arg = CI->getOperand (0 );
2330
+ Value *ZExtShiftAmt = Builder.CreateZExt (CI->getOperand (1 ), Int64Ty);
2331
+ Rep = Builder.CreateIntrinsic (Int64Ty, Intrinsic::fshr,
2332
+ {Arg, Arg, ZExtShiftAmt});
2333
+ } else {
2334
+ Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic (Name);
2335
+ if (IID != Intrinsic::not_intrinsic &&
2336
+ !F->getReturnType ()->getScalarType ()->isBFloatTy ()) {
2337
+ rename (F);
2338
+ Function *NewFn = Intrinsic::getDeclaration (F->getParent (), IID);
2339
+ SmallVector<Value *, 2 > Args;
2340
+ for (size_t I = 0 ; I < NewFn->arg_size (); ++I) {
2341
+ Value *Arg = CI->getArgOperand (I);
2342
+ Type *OldType = Arg->getType ();
2343
+ Type *NewType = NewFn->getArg (I)->getType ();
2344
+ Args.push_back (
2345
+ (OldType->isIntegerTy () && NewType->getScalarType ()->isBFloatTy ())
2346
+ ? Builder.CreateBitCast (Arg, NewType)
2347
+ : Arg);
2348
+ }
2349
+ Rep = Builder.CreateCall (NewFn, Args);
2350
+ if (F->getReturnType ()->isIntegerTy ())
2351
+ Rep = Builder.CreateBitCast (Rep, F->getReturnType ());
2352
+ }
2353
+ }
2354
+
2355
+ return Rep;
2356
+ }
2357
+
2257
2358
static Value *upgradeX86IntrinsicCall (StringRef Name, CallBase *CI, Function *F,
2258
2359
IRBuilder<> &Builder) {
2259
2360
LLVMContext &C = F->getContext ();
@@ -4204,81 +4305,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4204
4305
4205
4306
if (!IsX86 && Name == " stackprotectorcheck" ) {
4206
4307
Rep = nullptr ;
4207
- } else if (IsNVVM && (Name == " abs.i" || Name == " abs.ll" )) {
4208
- Value *Arg = CI->getArgOperand (0 );
4209
- Value *Neg = Builder.CreateNeg (Arg, " neg" );
4210
- Value *Cmp = Builder.CreateICmpSGE (
4211
- Arg, llvm::Constant::getNullValue (Arg->getType ()), " abs.cond" );
4212
- Rep = Builder.CreateSelect (Cmp, Arg, Neg, " abs" );
4213
- } else if (IsNVVM && (Name.starts_with (" atomic.load.add.f32.p" ) ||
4214
- Name.starts_with (" atomic.load.add.f64.p" ))) {
4215
- Value *Ptr = CI->getArgOperand (0 );
4216
- Value *Val = CI->getArgOperand (1 );
4217
- Rep = Builder.CreateAtomicRMW (AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign (),
4218
- AtomicOrdering::SequentiallyConsistent);
4219
- } else if (IsNVVM && Name.consume_front (" max." ) &&
4220
- (Name == " s" || Name == " i" || Name == " ll" || Name == " us" ||
4221
- Name == " ui" || Name == " ull" )) {
4222
- Value *Arg0 = CI->getArgOperand (0 );
4223
- Value *Arg1 = CI->getArgOperand (1 );
4224
- Value *Cmp = Name.starts_with (" u" )
4225
- ? Builder.CreateICmpUGE (Arg0, Arg1, " max.cond" )
4226
- : Builder.CreateICmpSGE (Arg0, Arg1, " max.cond" );
4227
- Rep = Builder.CreateSelect (Cmp, Arg0, Arg1, " max" );
4228
- } else if (IsNVVM && Name.consume_front (" min." ) &&
4229
- (Name == " s" || Name == " i" || Name == " ll" || Name == " us" ||
4230
- Name == " ui" || Name == " ull" )) {
4231
- Value *Arg0 = CI->getArgOperand (0 );
4232
- Value *Arg1 = CI->getArgOperand (1 );
4233
- Value *Cmp = Name.starts_with (" u" )
4234
- ? Builder.CreateICmpULE (Arg0, Arg1, " min.cond" )
4235
- : Builder.CreateICmpSLE (Arg0, Arg1, " min.cond" );
4236
- Rep = Builder.CreateSelect (Cmp, Arg0, Arg1, " min" );
4237
- } else if (IsNVVM && Name == " clz.ll" ) {
4238
- // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4239
- Value *Arg = CI->getArgOperand (0 );
4240
- Value *Ctlz = Builder.CreateCall (
4241
- Intrinsic::getDeclaration (F->getParent (), Intrinsic::ctlz,
4242
- {Arg->getType ()}),
4243
- {Arg, Builder.getFalse ()}, " ctlz" );
4244
- Rep = Builder.CreateTrunc (Ctlz, Builder.getInt32Ty (), " ctlz.trunc" );
4245
- } else if (IsNVVM && Name == " popc.ll" ) {
4246
- // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4247
- // i64.
4248
- Value *Arg = CI->getArgOperand (0 );
4249
- Value *Popc = Builder.CreateCall (
4250
- Intrinsic::getDeclaration (F->getParent (), Intrinsic::ctpop,
4251
- {Arg->getType ()}),
4252
- Arg, " ctpop" );
4253
- Rep = Builder.CreateTrunc (Popc, Builder.getInt32Ty (), " ctpop.trunc" );
4254
4308
} else if (IsNVVM) {
4255
- if (Name == " h2f" ) {
4256
- Rep =
4257
- Builder.CreateCall (Intrinsic::getDeclaration (
4258
- F->getParent (), Intrinsic::convert_from_fp16,
4259
- {Builder.getFloatTy ()}),
4260
- CI->getArgOperand (0 ), " h2f" );
4261
- } else {
4262
- Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic (Name);
4263
- if (IID != Intrinsic::not_intrinsic &&
4264
- !F->getReturnType ()->getScalarType ()->isBFloatTy ()) {
4265
- rename (F);
4266
- NewFn = Intrinsic::getDeclaration (F->getParent (), IID);
4267
- SmallVector<Value *, 2 > Args;
4268
- for (size_t I = 0 ; I < NewFn->arg_size (); ++I) {
4269
- Value *Arg = CI->getArgOperand (I);
4270
- Type *OldType = Arg->getType ();
4271
- Type *NewType = NewFn->getArg (I)->getType ();
4272
- Args.push_back ((OldType->isIntegerTy () &&
4273
- NewType->getScalarType ()->isBFloatTy ())
4274
- ? Builder.CreateBitCast (Arg, NewType)
4275
- : Arg);
4276
- }
4277
- Rep = Builder.CreateCall (NewFn, Args);
4278
- if (F->getReturnType ()->isIntegerTy ())
4279
- Rep = Builder.CreateBitCast (Rep, F->getReturnType ());
4280
- }
4281
- }
4309
+ Rep = upgradeNVVMIntrinsicCall (Name, CI, F, Builder);
4282
4310
} else if (IsX86) {
4283
4311
Rep = upgradeX86IntrinsicCall (Name, CI, F, Builder);
4284
4312
} else if (IsARM) {
0 commit comments