@@ -138,7 +138,6 @@ using namespace genx;
138
138
static cl::opt<bool >
139
139
EnableGenXByteWidening (" enable-genx-byte-widening" , cl::init(true ),
140
140
cl::Hidden, cl::desc(" Enable GenX byte widening." ));
141
-
142
141
namespace {
143
142
144
143
// GenXLowering : legalize execution widths and GRF crossing
@@ -2391,64 +2390,35 @@ bool GenXLowering::lowerFCmpInst(FCmpInst *Inst) {
2391
2390
2392
2391
// Lower cmp instructions that GenX cannot deal with.
2393
2392
bool GenXLowering::lowerMul64 (Instruction *Inst) {
2393
+
2394
+ LoHiSplitter SplitBuilder (*Inst);
2395
+ if (!SplitBuilder.IsI64Operation ())
2396
+ return false ;
2397
+
2394
2398
IRBuilder<> Builder (Inst);
2395
2399
Builder.SetCurrentDebugLocation (Inst->getDebugLoc ());
2396
- auto Src0 = Inst->getOperand (0 );
2397
- auto Src1 = Inst->getOperand (1 );
2398
- auto ETy = Src0->getType ();
2399
- auto Len = 1 ;
2400
- if (ETy->isVectorTy ()) {
2401
- Len = ETy->getVectorNumElements ();
2402
- ETy = ETy->getVectorElementType ();
2403
- }
2404
- if (!ETy->isIntegerTy () || ETy->getPrimitiveSizeInBits () != 64 )
2405
- return false ;
2406
- auto VTy = VectorType::get (ETy->getInt32Ty (Inst->getContext ()), Len * 2 );
2407
- // create src0 bitcast, then the low and high part
2408
- auto Src0V = Builder.CreateBitCast (Src0, VTy);
2409
- Region R (Inst);
2410
- R.Offset = 0 ;
2411
- R.Width = Len;
2412
- R.NumElements = Len;
2413
- R.Stride = 2 ;
2414
- R.VStride = 0 ;
2415
- auto Src0L = R.createRdRegion (Src0V, " " , Inst, Inst->getDebugLoc ());
2416
- R.Offset = 4 ;
2417
- auto Src0H = R.createRdRegion (Src0V, " " , Inst, Inst->getDebugLoc ());
2418
- // create src1 bitcast, then the low and high part
2419
- auto Src1V = Builder.CreateBitCast (Src1, VTy);
2420
- R.Offset = 0 ;
2421
- auto Src1L = R.createRdRegion (Src1V, " " , Inst, Inst->getDebugLoc ());
2422
- R.Offset = 4 ;
2423
- auto Src1H = R.createRdRegion (Src1V, " " , Inst, Inst->getDebugLoc ());
2400
+
2401
+ auto Src0 = SplitBuilder.splitOperand (0 );
2402
+ auto Src1 = SplitBuilder.splitOperand (1 );
2403
+
2424
2404
// create muls and adds
2425
- auto ResL = Builder.CreateMul (Src0L, Src1L );
2405
+ auto * ResL = Builder.CreateMul (Src0. Lo , Src1. Lo );
2426
2406
// create the mulh intrinsic to the get the carry-part
2427
- Type *tys[2 ];
2428
- SmallVector<llvm::Value *, 2 > args;
2429
- // build type-list
2430
- tys[0 ] = ResL->getType ();
2431
- tys[1 ] = Src0L->getType ();
2407
+ Type *tys[2 ] = {ResL->getType (), Src0.Lo ->getType ()};
2432
2408
// build argument list
2433
- args.push_back (Src0L);
2434
- args.push_back (Src1L);
2435
- auto M = Inst->getParent ()->getParent ()->getParent ();
2409
+ SmallVector<llvm::Value *, 2 > args{Src0.Lo , Src1.Lo };
2410
+ auto *M = Inst->getModule ();
2436
2411
Function *IntrinFunc =
2437
2412
GenXIntrinsic::getGenXDeclaration (M, GenXIntrinsic::genx_umulh, tys);
2438
- Instruction *Cari = CallInst::Create (IntrinFunc, args, " " , Inst);
2439
- Cari->setDebugLoc (Inst->getDebugLoc ());
2440
- auto Temp0 = Builder.CreateMul (Src0L, Src1H);
2441
- auto Temp1 = Builder.CreateAdd (Cari, Temp0);
2442
- auto Temp2 = Builder.CreateMul (Src0H, Src1L);
2443
- auto ResH = Builder.CreateAdd (Temp2, Temp1);
2444
- // create the write-regions
2445
- auto UndefV = UndefValue::get (VTy);
2446
- R.Offset = 0 ;
2447
- auto WrL = R.createWrRegion (UndefV, ResL, " WrLow" , Inst, Inst->getDebugLoc ());
2448
- R.Offset = 4 ;
2449
- auto WrH = R.createWrRegion (WrL, ResH, " WrHigh" , Inst, Inst->getDebugLoc ());
2413
+
2414
+ auto *Cari = Builder.CreateCall (IntrinFunc, args, " .cari" );
2415
+ auto *Temp0 = Builder.CreateMul (Src0.Lo , Src1.Hi );
2416
+ auto *Temp1 = Builder.CreateAdd (Cari, Temp0);
2417
+ auto *Temp2 = Builder.CreateMul (Src0.Hi , Src1.Lo );
2418
+ auto *ResH = Builder.CreateAdd (Temp2, Temp1);
2419
+
2450
2420
// create the bitcast to the destination-type
2451
- auto Replace = Builder. CreateBitCast (WrH, Inst-> getType () , " mul64" );
2421
+ auto * Replace = SplitBuilder. combineSplit (*ResL, *ResH , " mul64" );
2452
2422
Inst->replaceAllUsesWith (Replace);
2453
2423
ToErase.push_back (Inst);
2454
2424
return true ;
0 commit comments