@@ -3210,6 +3210,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3210
3210
case G_SDIVREM:
3211
3211
case G_UDIVREM:
3212
3212
return lowerDIVREM (MI);
3213
+ case G_FSHL:
3214
+ case G_FSHR:
3215
+ return lowerFunnelShift (MI);
3213
3216
}
3214
3217
}
3215
3218
@@ -5207,6 +5210,132 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
5207
5210
}
5208
5211
}
5209
5212
5213
+ // Check that (every element of) Reg is undef or not an exact multiple of BW.
5214
+ static bool isNonZeroModBitWidthOrUndef (const MachineRegisterInfo &MRI,
5215
+ Register Reg, unsigned BW) {
5216
+ return matchUnaryPredicate (
5217
+ MRI, Reg,
5218
+ [=](const Constant *C) {
5219
+ // Null constant here means an undef.
5220
+ const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
5221
+ return !CI || CI->getValue ().urem (BW) != 0 ;
5222
+ },
5223
+ /* AllowUndefs*/ true );
5224
+ }
5225
+
5226
+ LegalizerHelper::LegalizeResult
5227
+ LegalizerHelper::lowerFunnelShiftWithInverse (MachineInstr &MI) {
5228
+ Register Dst = MI.getOperand (0 ).getReg ();
5229
+ Register X = MI.getOperand (1 ).getReg ();
5230
+ Register Y = MI.getOperand (2 ).getReg ();
5231
+ Register Z = MI.getOperand (3 ).getReg ();
5232
+ LLT Ty = MRI.getType (Dst);
5233
+ LLT ShTy = MRI.getType (Z);
5234
+
5235
+ unsigned BW = Ty.getScalarSizeInBits ();
5236
+ const bool IsFSHL = MI.getOpcode () == TargetOpcode::G_FSHL;
5237
+ unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
5238
+
5239
+ if (isNonZeroModBitWidthOrUndef (MRI, Z, BW)) {
5240
+ // fshl X, Y, Z -> fshr X, Y, -Z
5241
+ // fshr X, Y, Z -> fshl X, Y, -Z
5242
+ auto Zero = MIRBuilder.buildConstant (ShTy, 0 );
5243
+ Z = MIRBuilder.buildSub (Ty, Zero, Z).getReg (0 );
5244
+ } else {
5245
+ // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
5246
+ // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
5247
+ auto One = MIRBuilder.buildConstant (ShTy, 1 );
5248
+ if (IsFSHL) {
5249
+ Y = MIRBuilder.buildInstr (RevOpcode, {Ty}, {X, Y, One}).getReg (0 );
5250
+ X = MIRBuilder.buildLShr (Ty, X, One).getReg (0 );
5251
+ } else {
5252
+ X = MIRBuilder.buildInstr (RevOpcode, {Ty}, {X, Y, One}).getReg (0 );
5253
+ Y = MIRBuilder.buildShl (Ty, Y, One).getReg (0 );
5254
+ }
5255
+
5256
+ Z = MIRBuilder.buildNot (ShTy, Z).getReg (0 );
5257
+ }
5258
+
5259
+ MIRBuilder.buildInstr (RevOpcode, {Dst}, {X, Y, Z});
5260
+ MI.eraseFromParent ();
5261
+ return Legalized;
5262
+ }
5263
+
5264
+ LegalizerHelper::LegalizeResult
5265
+ LegalizerHelper::lowerFunnelShiftAsShifts (MachineInstr &MI) {
5266
+ Register Dst = MI.getOperand (0 ).getReg ();
5267
+ Register X = MI.getOperand (1 ).getReg ();
5268
+ Register Y = MI.getOperand (2 ).getReg ();
5269
+ Register Z = MI.getOperand (3 ).getReg ();
5270
+ LLT Ty = MRI.getType (Dst);
5271
+ LLT ShTy = MRI.getType (Z);
5272
+
5273
+ const unsigned BW = Ty.getScalarSizeInBits ();
5274
+ const bool IsFSHL = MI.getOpcode () == TargetOpcode::G_FSHL;
5275
+
5276
+ Register ShX, ShY;
5277
+ Register ShAmt, InvShAmt;
5278
+
5279
+ // FIXME: Emit optimized urem by constant instead of letting it expand later.
5280
+ if (isNonZeroModBitWidthOrUndef (MRI, Z, BW)) {
5281
+ // fshl: X << C | Y >> (BW - C)
5282
+ // fshr: X << (BW - C) | Y >> C
5283
+ // where C = Z % BW is not zero
5284
+ auto BitWidthC = MIRBuilder.buildConstant (ShTy, BW);
5285
+ ShAmt = MIRBuilder.buildURem (ShTy, Z, BitWidthC).getReg (0 );
5286
+ InvShAmt = MIRBuilder.buildSub (ShTy, BitWidthC, ShAmt).getReg (0 );
5287
+ ShX = MIRBuilder.buildShl (Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg (0 );
5288
+ ShY = MIRBuilder.buildLShr (Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg (0 );
5289
+ } else {
5290
+ // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
5291
+ // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
5292
+ auto Mask = MIRBuilder.buildConstant (ShTy, BW - 1 );
5293
+ if (isPowerOf2_32 (BW)) {
5294
+ // Z % BW -> Z & (BW - 1)
5295
+ ShAmt = MIRBuilder.buildAnd (ShTy, Z, Mask).getReg (0 );
5296
+ // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
5297
+ auto NotZ = MIRBuilder.buildNot (ShTy, Z);
5298
+ InvShAmt = MIRBuilder.buildAnd (ShTy, NotZ, Mask).getReg (0 );
5299
+ } else {
5300
+ auto BitWidthC = MIRBuilder.buildConstant (ShTy, BW);
5301
+ ShAmt = MIRBuilder.buildURem (ShTy, Z, BitWidthC).getReg (0 );
5302
+ InvShAmt = MIRBuilder.buildSub (ShTy, Mask, ShAmt).getReg (0 );
5303
+ }
5304
+
5305
+ auto One = MIRBuilder.buildConstant (ShTy, 1 );
5306
+ if (IsFSHL) {
5307
+ ShX = MIRBuilder.buildShl (Ty, X, ShAmt).getReg (0 );
5308
+ auto ShY1 = MIRBuilder.buildLShr (Ty, Y, One);
5309
+ ShY = MIRBuilder.buildLShr (Ty, ShY1, InvShAmt).getReg (0 );
5310
+ } else {
5311
+ auto ShX1 = MIRBuilder.buildShl (Ty, X, One);
5312
+ ShX = MIRBuilder.buildShl (Ty, ShX1, InvShAmt).getReg (0 );
5313
+ ShY = MIRBuilder.buildLShr (Ty, Y, ShAmt).getReg (0 );
5314
+ }
5315
+ }
5316
+
5317
+ MIRBuilder.buildOr (Dst, ShX, ShY);
5318
+ MI.eraseFromParent ();
5319
+ return Legalized;
5320
+ }
5321
+
5322
+ LegalizerHelper::LegalizeResult
5323
+ LegalizerHelper::lowerFunnelShift (MachineInstr &MI) {
5324
+ // These operations approximately do the following (while avoiding undefined
5325
+ // shifts by BW):
5326
+ // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
5327
+ // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
5328
+ Register Dst = MI.getOperand (0 ).getReg ();
5329
+ LLT Ty = MRI.getType (Dst);
5330
+ LLT ShTy = MRI.getType (MI.getOperand (3 ).getReg ());
5331
+
5332
+ bool IsFSHL = MI.getOpcode () == TargetOpcode::G_FSHL;
5333
+ unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
5334
+ if (LI.getAction ({RevOpcode, {Ty, ShTy}}).Action == Lower)
5335
+ return lowerFunnelShiftAsShifts (MI);
5336
+ return lowerFunnelShiftWithInverse (MI);
5337
+ }
5338
+
5210
5339
// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
5211
5340
// representation.
5212
5341
LegalizerHelper::LegalizeResult
0 commit comments