@@ -322,6 +322,104 @@ void SPIRVRegularizeLLVMBase::expandSYCLTypeUsing(Module *M) {
322
322
expandVIDWithSYCLTypeByValComp (F);
323
323
}
324
324
325
+ // In this function, we handle two conversion operations
326
+ // 1. fptoui.sat.iX.fY (X is not 8,16,32,64; Y is 32 or 64)
327
+ // 2. fptosi.sat.iX.fY (X is not 8,16,32,64; Y is 32 or 64)
328
+ // Such non-standard integer types cannot be handled in SPIR-V. Hence, they
329
+ // will be promoted to
330
+ // 1. fptoui.sat.i64.fY (Y is 32 or 64)
331
+ // 2. fptosi.sat.i64.fY (Y is 32 or 64)
332
+ // However, LLVM documentation requires the following rules to be obeyed.
333
+ // Rule 1: If the argument is any NaN, zero is returned.
334
+ // Rule 2: If the argument is smaller than the smallest representable
335
+ // (un)signed integer of the result type, the smallest representable
336
+ // (un)signed integer is returned.
337
+ // Rule 3: If the argument is larger than the largest representable (un)signed
338
+ // integer of the result type, the largest representable (un)signed integer is
339
+ // returned.
340
+ // Rule 4: Otherwise, the result of rounding the argument towards zero is
341
+ // returned.
342
+ // Rules 1 & 4 are preserved when promoting iX to i64. For preserving Rule 2
343
+ // and Rule 3, we saturate the result of the promoted instruction based on
344
+ // original integer type (iX)
345
+ // Example:
346
+ // Input:
347
+ // %0 = call i2 @llvm.fptosi.sat.i2.f32(float %input)
348
+ // %1 = sext i32 %0
349
+ // Output:
350
+ // %0 = call i32 @_Z17convert_long_satf(float %input)
351
+ // %1 = icmp sge i32 %0, 1 <Largest 2-bit signed integer>
352
+ // %2 = icmp sle i32 %0, -2 <Smallest 2-bit signed integer>
353
+ // %3 = select i1 %1, i32 1, i32 %0
354
+ // %4 = select i1 %2, i32 -2, i32 %3
355
+ // Replace uses of %1 in Input with %4 in Output
356
+ void SPIRVRegularizeLLVMBase::cleanupConversionToNonStdIntegers (Module *M) {
357
+ for (auto FI = M->begin (), FE = M->end (); FI != FE;) {
358
+ Function *F = &(*FI++);
359
+ std::vector<Instruction *> ToErase;
360
+ auto IID = F->getIntrinsicID ();
361
+ if (IID != Intrinsic::fptosi_sat && IID != Intrinsic::fptoui_sat)
362
+ continue ;
363
+ for (auto *I : F->users ()) {
364
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
365
+ // TODO: Vector type not supported yet.
366
+ if (isa<VectorType>(II->getType ()))
367
+ continue ;
368
+ auto IID = II->getIntrinsicID ();
369
+ auto IntBitWidth = II->getType ()->getScalarSizeInBits ();
370
+ if (IntBitWidth == 8 || IntBitWidth == 16 || IntBitWidth == 32 ||
371
+ IntBitWidth == 64 )
372
+ continue ;
373
+ if (IID == Intrinsic::fptosi_sat) {
374
+ // Identify sext (user of II). Make sure that's the only use of II.
375
+ auto *User = II->getUniqueUndroppableUser ();
376
+ if (!User || !isa<SExtInst>(User))
377
+ continue ;
378
+ auto *SExtI = dyn_cast<SExtInst>(User);
379
+ auto *NewIType = SExtI->getType ();
380
+ IRBuilder<> IRB (II);
381
+ auto *NewII = IRB.CreateIntrinsic (
382
+ IID, {NewIType, II->getOperand (0 )->getType ()}, II->getOperand (0 ));
383
+ Constant *MaxVal = ConstantInt::get (
384
+ NewIType, APInt::getSignedMaxValue (IntBitWidth).getSExtValue ());
385
+ Constant *MinVal = ConstantInt::get (
386
+ NewIType, APInt::getSignedMinValue (IntBitWidth).getSExtValue ());
387
+ auto *GTMax = IRB.CreateICmp (CmpInst::ICMP_SGE, NewII, MaxVal);
388
+ auto *LTMin = IRB.CreateICmp (CmpInst::ICMP_SLE, NewII, MinVal);
389
+ auto *SatMax = IRB.CreateSelect (GTMax, MaxVal, NewII);
390
+ auto *SatMin = IRB.CreateSelect (LTMin, MinVal, SatMax);
391
+ SExtI->replaceAllUsesWith (SatMin);
392
+ ToErase.push_back (SExtI);
393
+ ToErase.push_back (II);
394
+ }
395
+ if (IID == Intrinsic::fptoui_sat) {
396
+ // Identify zext (user of II). Make sure that's the only use of II.
397
+ auto *User = II->getUniqueUndroppableUser ();
398
+ if (!User || !isa<ZExtInst>(User))
399
+ continue ;
400
+ auto *ZExtI = dyn_cast<ZExtInst>(User);
401
+ auto *NewIType = ZExtI->getType ();
402
+ IRBuilder<> IRB (II);
403
+ auto *NewII = IRB.CreateIntrinsic (
404
+ IID, {NewIType, II->getOperand (0 )->getType ()}, II->getOperand (0 ));
405
+ Constant *MaxVal = ConstantInt::get (
406
+ NewIType, APInt::getMaxValue (IntBitWidth).getZExtValue ());
407
+ auto *GTMax = IRB.CreateICmp (CmpInst::ICMP_UGE, NewII, MaxVal);
408
+ auto *SatMax = IRB.CreateSelect (GTMax, MaxVal, NewII);
409
+ ZExtI->replaceAllUsesWith (SatMax);
410
+ ToErase.push_back (ZExtI);
411
+ ToErase.push_back (II);
412
+ }
413
+ }
414
+ }
415
+ for (Instruction *V : ToErase) {
416
+ assert (V->user_empty ());
417
+ V->dropAllReferences ();
418
+ V->eraseFromParent ();
419
+ }
420
+ }
421
+ }
422
+
325
423
bool SPIRVRegularizeLLVMBase::runRegularizeLLVM (Module &Module) {
326
424
M = &Module;
327
425
Ctx = &M->getContext ();
@@ -404,6 +502,7 @@ bool SPIRVRegularizeLLVMBase::regularize() {
404
502
eraseUselessFunctions (M);
405
503
addKernelEntryPoint (M);
406
504
expandSYCLTypeUsing (M);
505
+ cleanupConversionToNonStdIntegers (M);
407
506
408
507
for (auto I = M->begin (), E = M->end (); I != E;) {
409
508
Function *F = &(*I++);
0 commit comments