@@ -1490,7 +1490,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
1490
1490
Observer.changedInstr (*BrCond);
1491
1491
}
1492
1492
1493
-
1493
+
1494
1494
bool CombinerHelper::tryEmitMemcpyInline (MachineInstr &MI) {
1495
1495
MachineIRBuilder HelperBuilder (MI);
1496
1496
GISelObserverWrapper DummyObserver;
@@ -5286,6 +5286,156 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
5286
5286
return MIB.buildMul (Ty, Res, Factor);
5287
5287
}
5288
5288
5289
+ bool CombinerHelper::matchDivByPow2 (MachineInstr &MI, bool IsSigned) {
5290
+ assert ((MI.getOpcode () == TargetOpcode::G_SDIV ||
5291
+ MI.getOpcode () == TargetOpcode::G_UDIV) &&
5292
+ " Expected SDIV or UDIV" );
5293
+ auto &Div = cast<GenericMachineInstr>(MI);
5294
+ Register RHS = Div.getReg (2 );
5295
+ auto MatchPow2 = [&](const Constant *C) {
5296
+ if (auto *CI = dyn_cast<ConstantInt>(C)) {
5297
+ if (CI->getValue ().isPowerOf2 ())
5298
+ return true ;
5299
+ if (IsSigned && CI->getValue ().isNegatedPowerOf2 ())
5300
+ return true ;
5301
+ }
5302
+ return false ;
5303
+ };
5304
+ return matchUnaryPredicate (MRI, RHS, MatchPow2, /* AllowUndefs=*/ false );
5305
+ }
5306
+
5307
+ void CombinerHelper::applySDivByPow2 (MachineInstr &MI) {
5308
+ assert (MI.getOpcode () == TargetOpcode::G_SDIV && " Expected SDIV" );
5309
+ auto &SDiv = cast<GenericMachineInstr>(MI);
5310
+ Register Dst = SDiv.getReg (0 );
5311
+ Register LHS = SDiv.getReg (1 );
5312
+ Register RHS = SDiv.getReg (2 );
5313
+ LLT Ty = MRI.getType (Dst);
5314
+ LLT ShiftAmtTy = getTargetLowering ().getPreferredShiftAmountTy (Ty);
5315
+
5316
+ Builder.setInstrAndDebugLoc (MI);
5317
+
5318
+ // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5319
+ // to the following version:
5320
+ //
5321
+ // %bits = G_CONSTANT $bitwidth
5322
+ // %c1 = G_CTTZ %rhs
5323
+ // %c1 = G_ZEXT %c1
5324
+ // %inexact = G_SUB $bits, %c1
5325
+ // %tmp = G_CONSTANT ($bitwidth - 1)
5326
+ // %sign = %G_ASHR %lhs, %tmp
5327
+ // %srl = G_SHL %sign, %inexact
5328
+ // %add = G_ADD $lhs, $srl
5329
+ // $sra = G_ASHR %add, %c1
5330
+ // %one = G_CONSTANT $1
5331
+ // %allones = G_CONSTANT %111..1
5332
+ // %isone = G_ICMP EQ %rhs, $one
5333
+ // %isallones = G_ICMP EQ %rhs, $allones
5334
+ // %isoneorallones = G_OR %isone, $isallones
5335
+ // %sra = G_SELECT, %isoneorallones, %lhs, %sra
5336
+ // %zero = G_CONSTANT $0
5337
+ // %sub = G_SUB %zero, %sra
5338
+ // %isneg = G_ICMP SLT $lhs, %zero
5339
+ // %res = G_SELECT %isneg, %sub, %sra
5340
+ //
5341
+ // When $rhs is a constant integer, or a splat vector, we can check its value
5342
+ // at compile time such that the first two G_ICMP conditional statements, as
5343
+ // well as the corresponding non-taken branches, can be eliminated. This can
5344
+ // generate compact code even w/o any constant folding afterwards. When $rhs
5345
+ // is not a splat vector, we have to generate those checks via instructions.
5346
+
5347
+ unsigned Bitwidth = Ty.getScalarSizeInBits ();
5348
+ auto Zero = Builder.buildConstant (Ty, 0 );
5349
+
5350
+ if (auto RHSC = getConstantOrConstantSplatVector (RHS)) {
5351
+ // Special case: (sdiv X, 1) -> X
5352
+ if (RHSC->isOne ()) {
5353
+ replaceSingleDefInstWithReg (MI, LHS);
5354
+ return ;
5355
+ }
5356
+ // Special Case: (sdiv X, -1) -> 0-X
5357
+ if (RHSC->isAllOnes ()) {
5358
+ auto Sub = Builder.buildSub (Ty, Zero, LHS);
5359
+ replaceSingleDefInstWithReg (MI, Sub->getOperand (0 ).getReg ());
5360
+ return ;
5361
+ }
5362
+
5363
+ unsigned TrailingZeros = RHSC->countTrailingZeros ();
5364
+ auto C1 = Builder.buildConstant (ShiftAmtTy, TrailingZeros);
5365
+ auto Inexact = Builder.buildConstant (ShiftAmtTy, Bitwidth - TrailingZeros);
5366
+ auto Sign = Builder.buildAShr (
5367
+ Ty, LHS, Builder.buildConstant (ShiftAmtTy, Bitwidth - 1 ));
5368
+ // Add (LHS < 0) ? abs2 - 1 : 0;
5369
+ auto Srl = Builder.buildShl (Ty, Sign, Inexact);
5370
+ auto Add = Builder.buildAdd (Ty, LHS, Srl);
5371
+ auto Sra = Builder.buildAShr (Ty, Add, C1);
5372
+
5373
+ // If dividing by a positive value, we're done. Otherwise, the result must
5374
+ // be negated.
5375
+ auto Res = RHSC->isNegative () ? Builder.buildSub (Ty, Zero, Sra) : Sra;
5376
+ replaceSingleDefInstWithReg (MI, Res->getOperand (0 ).getReg ());
5377
+ return ;
5378
+ }
5379
+
5380
+ // RHS is not a splat vector. Build the above version with instructions.
5381
+ auto Bits = Builder.buildConstant (ShiftAmtTy, Bitwidth);
5382
+ auto C1 = Builder.buildCTTZ (Ty, RHS);
5383
+ C1 = Builder.buildZExtOrTrunc (ShiftAmtTy, C1);
5384
+ auto Inexact = Builder.buildSub (ShiftAmtTy, Bits, C1);
5385
+ auto Sign = Builder.buildAShr (
5386
+ Ty, LHS, Builder.buildConstant (ShiftAmtTy, Bitwidth - 1 ));
5387
+
5388
+ // Add (LHS < 0) ? abs2 - 1 : 0;
5389
+ auto Srl = Builder.buildShl (Ty, Sign, Inexact);
5390
+ auto Add = Builder.buildAdd (Ty, LHS, Srl);
5391
+ auto Sra = Builder.buildAShr (Ty, Add, C1);
5392
+
5393
+ LLT CCVT = LLT::vector (Ty.getElementCount (), 1 );
5394
+
5395
+ auto One = Builder.buildConstant (Ty, 1 );
5396
+ auto AllOnes =
5397
+ Builder.buildConstant (Ty, APInt::getAllOnes (Ty.getScalarSizeInBits ()));
5398
+ auto IsOne = Builder.buildICmp (CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5399
+ auto IsAllOnes =
5400
+ Builder.buildICmp (CmpInst::Predicate::ICMP_EQ, CCVT, RHS, AllOnes);
5401
+ auto IsOneOrAllOnes = Builder.buildOr (CCVT, IsOne, IsAllOnes);
5402
+ Sra = Builder.buildSelect (Ty, IsOneOrAllOnes, LHS, Sra);
5403
+
5404
+ // If dividing by a positive value, we're done. Otherwise, the result must
5405
+ // be negated.
5406
+ auto Sub = Builder.buildSub (Ty, Zero, Sra);
5407
+ auto IsNeg = Builder.buildICmp (CmpInst::Predicate::ICMP_SLT, CCVT, LHS, Zero);
5408
+ auto Res = Builder.buildSelect (Ty, IsNeg, Sub, Sra);
5409
+ replaceSingleDefInstWithReg (MI, Res->getOperand (0 ).getReg ());
5410
+ }
5411
+
5412
+ void CombinerHelper::applyUDivByPow2 (MachineInstr &MI) {
5413
+ assert (MI.getOpcode () == TargetOpcode::G_UDIV && " Expected SDIV" );
5414
+ auto &UDiv = cast<GenericMachineInstr>(MI);
5415
+ Register Dst = UDiv.getReg (0 );
5416
+ Register LHS = UDiv.getReg (1 );
5417
+ Register RHS = UDiv.getReg (2 );
5418
+ LLT Ty = MRI.getType (Dst);
5419
+ LLT ShiftAmtTy = getTargetLowering ().getPreferredShiftAmountTy (Ty);
5420
+
5421
+ Builder.setInstrAndDebugLoc (MI);
5422
+
5423
+ auto RHSC = getIConstantVRegValWithLookThrough (RHS, MRI);
5424
+ assert (RHSC.has_value () && " RHS must be a constant" );
5425
+ auto RHSCV = RHSC->Value ;
5426
+
5427
+ // Special case: (udiv X, 1) -> X
5428
+ if (RHSCV.isOne ()) {
5429
+ replaceSingleDefInstWithReg (MI, LHS);
5430
+ return ;
5431
+ }
5432
+
5433
+ unsigned TrailingZeros = RHSCV.countTrailingZeros ();
5434
+ auto C1 = Builder.buildConstant (ShiftAmtTy, TrailingZeros);
5435
+ auto Res = Builder.buildLShr (Ty, LHS, C1);
5436
+ replaceSingleDefInstWithReg (MI, Res->getOperand (0 ).getReg ());
5437
+ }
5438
+
5289
5439
bool CombinerHelper::matchUMulHToLShr (MachineInstr &MI) {
5290
5440
assert (MI.getOpcode () == TargetOpcode::G_UMULH);
5291
5441
Register RHS = MI.getOperand (2 ).getReg ();
0 commit comments