@@ -1473,6 +1473,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1473
1473
case PPCISD::STFIWX: return "PPCISD::STFIWX";
1474
1474
case PPCISD::VPERM: return "PPCISD::VPERM";
1475
1475
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1476
+ case PPCISD::XXSPLTI_SP_TO_DP:
1477
+ return "PPCISD::XXSPLTI_SP_TO_DP";
1476
1478
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1477
1479
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1478
1480
case PPCISD::VECSHL: return "PPCISD::VECSHL";
@@ -8966,19 +8968,21 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8966
8968
// Vector related lowering.
8967
8969
//
8968
8970
8969
- /// BuildSplatI - Build a canonical splati of Val with an element size of
8970
- /// SplatSize. Cast the result to VT.
8971
- static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
8972
- SelectionDAG &DAG, const SDLoc &dl) {
8971
+ /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8972
+ /// element size of SplatSize. Cast the result to VT.
8973
+ static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8974
+ SelectionDAG &DAG, const SDLoc &dl) {
8973
8975
static const MVT VTys[] = { // canonical VT to use for each size.
8974
8976
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8975
8977
};
8976
8978
8977
8979
EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8978
8980
8979
- // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
8980
- if (Val == -1)
8981
+ // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8982
+ if (Val == ((1LU << (SplatSize * 8)) - 1)) {
8981
8983
SplatSize = 1;
8984
+ Val = 0xFF;
8985
+ }
8982
8986
8983
8987
EVT CanonicalVT = VTys[SplatSize-1];
8984
8988
@@ -9113,6 +9117,34 @@ static const SDValue *getNormalLoadInput(const SDValue &Op) {
9113
9117
return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9114
9118
}
9115
9119
9120
+ // Convert the argument APFloat to a single precision APFloat if there is no
9121
+ // loss in information during the conversion to single precision APFloat and the
9122
+ // resulting number is not a denormal number. Return true if successful.
9123
+ bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9124
+ APFloat APFloatToConvert = ArgAPFloat;
9125
+ bool LosesInfo = true;
9126
+ APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9127
+ &LosesInfo);
9128
+ bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9129
+ if (Success)
9130
+ ArgAPFloat = APFloatToConvert;
9131
+ return Success;
9132
+ }
9133
+
9134
+ // Bitcast the argument APInt to a double and convert it to a single precision
9135
+ // APFloat, bitcast the APFloat to an APInt and assign it to the original
9136
+ // argument if there is no loss in information during the conversion from
9137
+ // double to single precision APFloat and the resulting number is not a denormal
9138
+ // number. Return true if successful.
9139
+ bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9140
+ double DpValue = ArgAPInt.bitsToDouble();
9141
+ APFloat APFloatDp(DpValue);
9142
+ bool Success = convertToNonDenormSingle(APFloatDp);
9143
+ if (Success)
9144
+ ArgAPInt = APFloatDp.bitcastToAPInt();
9145
+ return Success;
9146
+ }
9147
+
9116
9148
// If this is a case we can't handle, return null and let the default
9117
9149
// expansion code take care of it. If we CAN select this case, and if it
9118
9150
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -9232,9 +9264,23 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9232
9264
APInt APSplatBits, APSplatUndef;
9233
9265
unsigned SplatBitSize;
9234
9266
bool HasAnyUndefs;
9235
- if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9236
- HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9237
- SplatBitSize > 32) {
9267
+ bool BVNIsConstantSplat =
9268
+ BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9269
+ HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9270
+
9271
+ // If it is a splat of a double, check if we can shrink it to a 32 bit
9272
+ // non-denormal float which when converted back to double gives us the same
9273
+ // double. This is to exploit the XXSPLTIDP instruction.
9274
+ if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
9275
+ (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
9276
+ convertToNonDenormSingle(APSplatBits)) {
9277
+ SDValue SplatNode = DAG.getNode(
9278
+ PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9279
+ DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9280
+ return DAG.getBitcast(Op.getValueType(), SplatNode);
9281
+ }
9282
+
9283
+ if (!BVNIsConstantSplat || SplatBitSize > 32) {
9238
9284
9239
9285
const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
9240
9286
// Handle load-and-splat patterns as we have instructions that will do this
@@ -9273,8 +9319,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9273
9319
return SDValue();
9274
9320
}
9275
9321
9276
- unsigned SplatBits = APSplatBits.getZExtValue();
9277
- unsigned SplatUndef = APSplatUndef.getZExtValue();
9322
+ uint64_t SplatBits = APSplatBits.getZExtValue();
9323
+ uint64_t SplatUndef = APSplatUndef.getZExtValue();
9278
9324
unsigned SplatSize = SplatBitSize / 8;
9279
9325
9280
9326
// First, handle single instruction cases.
@@ -9289,17 +9335,30 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9289
9335
return Op;
9290
9336
}
9291
9337
9292
- // We have XXSPLTIB for constant splats one byte wide
9293
- // FIXME: SplatBits is an unsigned int being cast to an int while passing it
9294
- // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here.
9338
+ // We have XXSPLTIW for constant splats four bytes wide.
9339
+ // Given vector length is a multiple of 4, 2-byte splats can be replaced
9340
+ // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9341
+ // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9342
+ // turned into a 4-byte splat of 0xABABABAB.
9343
+ if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9344
+ return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
9345
+ Op.getValueType(), DAG, dl);
9346
+
9347
+ if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9348
+ return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9349
+ dl);
9350
+
9351
+ // We have XXSPLTIB for constant splats one byte wide.
9295
9352
if (Subtarget.hasP9Vector() && SplatSize == 1)
9296
- return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl);
9353
+ return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9354
+ dl);
9297
9355
9298
9356
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9299
9357
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9300
9358
(32-SplatBitSize));
9301
9359
if (SextVal >= -16 && SextVal <= 15)
9302
- return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
9360
+ return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9361
+ dl);
9303
9362
9304
9363
// Two instruction sequences.
9305
9364
@@ -9330,7 +9389,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9330
9389
// for fneg/fabs.
9331
9390
if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9332
9391
// Make -1 and vspltisw -1:
9333
- SDValue OnesV = BuildSplatI (-1, 4, MVT::v4i32, DAG, dl);
9392
+ SDValue OnesV = getCanonicalConstSplat (-1, 4, MVT::v4i32, DAG, dl);
9334
9393
9335
9394
// Make the VSLW intrinsic, computing 0x8000_0000.
9336
9395
SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
@@ -9358,7 +9417,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9358
9417
9359
9418
// vsplti + shl self.
9360
9419
if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9361
- SDValue Res = BuildSplatI (i, SplatSize, MVT::Other, DAG, dl);
9420
+ SDValue Res = getCanonicalConstSplat (i, SplatSize, MVT::Other, DAG, dl);
9362
9421
static const unsigned IIDs[] = { // Intrinsic to use for each size.
9363
9422
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9364
9423
Intrinsic::ppc_altivec_vslw
@@ -9369,7 +9428,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9369
9428
9370
9429
// vsplti + srl self.
9371
9430
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9372
- SDValue Res = BuildSplatI (i, SplatSize, MVT::Other, DAG, dl);
9431
+ SDValue Res = getCanonicalConstSplat (i, SplatSize, MVT::Other, DAG, dl);
9373
9432
static const unsigned IIDs[] = { // Intrinsic to use for each size.
9374
9433
Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9375
9434
Intrinsic::ppc_altivec_vsrw
@@ -9380,7 +9439,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9380
9439
9381
9440
// vsplti + sra self.
9382
9441
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9383
- SDValue Res = BuildSplatI (i, SplatSize, MVT::Other, DAG, dl);
9442
+ SDValue Res = getCanonicalConstSplat (i, SplatSize, MVT::Other, DAG, dl);
9384
9443
static const unsigned IIDs[] = { // Intrinsic to use for each size.
9385
9444
Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9386
9445
Intrinsic::ppc_altivec_vsraw
@@ -9392,7 +9451,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9392
9451
// vsplti + rol self.
9393
9452
if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9394
9453
((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9395
- SDValue Res = BuildSplatI (i, SplatSize, MVT::Other, DAG, dl);
9454
+ SDValue Res = getCanonicalConstSplat (i, SplatSize, MVT::Other, DAG, dl);
9396
9455
static const unsigned IIDs[] = { // Intrinsic to use for each size.
9397
9456
Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9398
9457
Intrinsic::ppc_altivec_vrlw
@@ -9403,19 +9462,19 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9403
9462
9404
9463
// t = vsplti c, result = vsldoi t, t, 1
9405
9464
if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9406
- SDValue T = BuildSplatI (i, SplatSize, MVT::v16i8, DAG, dl);
9465
+ SDValue T = getCanonicalConstSplat (i, SplatSize, MVT::v16i8, DAG, dl);
9407
9466
unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9408
9467
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9409
9468
}
9410
9469
// t = vsplti c, result = vsldoi t, t, 2
9411
9470
if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9412
- SDValue T = BuildSplatI (i, SplatSize, MVT::v16i8, DAG, dl);
9471
+ SDValue T = getCanonicalConstSplat (i, SplatSize, MVT::v16i8, DAG, dl);
9413
9472
unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9414
9473
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9415
9474
}
9416
9475
// t = vsplti c, result = vsldoi t, t, 3
9417
9476
if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9418
- SDValue T = BuildSplatI (i, SplatSize, MVT::v16i8, DAG, dl);
9477
+ SDValue T = getCanonicalConstSplat (i, SplatSize, MVT::v16i8, DAG, dl);
9419
9478
unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9420
9479
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9421
9480
}
@@ -10817,9 +10876,9 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10817
10876
if (Op.getValueType() == MVT::v4i32) {
10818
10877
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10819
10878
10820
- SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
10821
- SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);// +16 as shift amt.
10822
-
10879
+ SDValue Zero = getCanonicalConstSplat( 0, 1, MVT::v4i32, DAG, dl);
10880
+ // +16 as shift amt.
10881
+ SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10823
10882
SDValue RHSSwap = // = vrlw RHS, 16
10824
10883
BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10825
10884
@@ -16239,6 +16298,13 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
16239
16298
return false;
16240
16299
case MVT::f32:
16241
16300
case MVT::f64:
16301
+ if (Subtarget.hasPrefixInstrs()) {
16302
+ // With prefixed instructions, we can materialize anything that can be
16303
+ // represented with a 32-bit immediate, not just positive zero.
16304
+ APFloat APFloatOfImm = Imm;
16305
+ return convertToNonDenormSingle(APFloatOfImm);
16306
+ }
16307
+ LLVM_FALLTHROUGH;
16242
16308
case MVT::ppcf128:
16243
16309
return Imm.isPosZero();
16244
16310
}
0 commit comments