@@ -870,7 +870,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
870
870
setTargetDAGCombine(ISD::SIGN_EXTEND);
871
871
setTargetDAGCombine(ISD::VECTOR_SPLICE);
872
872
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
873
- setTargetDAGCombine(ISD::TRUNCATE);
874
873
setTargetDAGCombine(ISD::CONCAT_VECTORS);
875
874
setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
876
875
setTargetDAGCombine(ISD::STORE);
@@ -1047,6 +1046,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1047
1046
1048
1047
for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1049
1048
MVT::v4i32}) {
1049
+ setOperationAction(ISD::AVGFLOORS, VT, Legal);
1050
+ setOperationAction(ISD::AVGFLOORU, VT, Legal);
1051
+ setOperationAction(ISD::AVGCEILS, VT, Legal);
1052
+ setOperationAction(ISD::AVGCEILU, VT, Legal);
1050
1053
setOperationAction(ISD::ABDS, VT, Legal);
1051
1054
setOperationAction(ISD::ABDU, VT, Legal);
1052
1055
}
@@ -2096,10 +2099,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
2096
2099
MAKE_CASE(AArch64ISD::FCMLTz)
2097
2100
MAKE_CASE(AArch64ISD::SADDV)
2098
2101
MAKE_CASE(AArch64ISD::UADDV)
2099
- MAKE_CASE(AArch64ISD::SRHADD)
2100
- MAKE_CASE(AArch64ISD::URHADD)
2101
- MAKE_CASE(AArch64ISD::SHADD)
2102
- MAKE_CASE(AArch64ISD::UHADD)
2103
2102
MAKE_CASE(AArch64ISD::SDOT)
2104
2103
MAKE_CASE(AArch64ISD::UDOT)
2105
2104
MAKE_CASE(AArch64ISD::SMINV)
@@ -4371,9 +4370,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
4371
4370
IntNo == Intrinsic::aarch64_neon_shadd);
4372
4371
bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4373
4372
IntNo == Intrinsic::aarch64_neon_urhadd);
4374
- unsigned Opcode =
4375
- IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD )
4376
- : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD );
4373
+ unsigned Opcode = IsSignedAdd
4374
+ ? (IsRoundingAdd ? ISD::AVGCEILS : ISD::AVGFLOORS )
4375
+ : (IsRoundingAdd ? ISD::AVGCEILU : ISD::AVGFLOORU );
4377
4376
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4378
4377
Op.getOperand(2));
4379
4378
}
@@ -14243,89 +14242,6 @@ static SDValue performANDCombine(SDNode *N,
14243
14242
return SDValue();
14244
14243
}
14245
14244
14246
- // Attempt to form urhadd(OpA, OpB) from
14247
- // truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
14248
- // or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
14249
- // The original form of the first expression is
14250
- // truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
14251
- // (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
14252
- // Before this function is called the srl will have been lowered to
14253
- // AArch64ISD::VLSHR.
14254
- // This pass can also recognize signed variants of the patterns that use sign
14255
- // extension instead of zero extension and form a srhadd(OpA, OpB) or a
14256
- // shadd(OpA, OpB) from them.
14257
- static SDValue
14258
- performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
14259
- SelectionDAG &DAG) {
14260
- EVT VT = N->getValueType(0);
14261
-
14262
- // Since we are looking for a right shift by a constant value of 1 and we are
14263
- // operating on types at least 16 bits in length (sign/zero extended OpA and
14264
- // OpB, which are at least 8 bits), it follows that the truncate will always
14265
- // discard the shifted-in bit and therefore the right shift will be logical
14266
- // regardless of the signedness of OpA and OpB.
14267
- SDValue Shift = N->getOperand(0);
14268
- if (Shift.getOpcode() != AArch64ISD::VLSHR)
14269
- return SDValue();
14270
-
14271
- // Is the right shift using an immediate value of 1?
14272
- uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
14273
- if (ShiftAmount != 1)
14274
- return SDValue();
14275
-
14276
- SDValue ExtendOpA, ExtendOpB;
14277
- SDValue ShiftOp0 = Shift.getOperand(0);
14278
- unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
14279
- if (ShiftOp0Opc == ISD::SUB) {
14280
-
14281
- SDValue Xor = ShiftOp0.getOperand(1);
14282
- if (Xor.getOpcode() != ISD::XOR)
14283
- return SDValue();
14284
-
14285
- // Is the XOR using a constant amount of all ones in the right hand side?
14286
- uint64_t C;
14287
- if (!isAllConstantBuildVector(Xor.getOperand(1), C))
14288
- return SDValue();
14289
-
14290
- unsigned ElemSizeInBits = VT.getScalarSizeInBits();
14291
- APInt CAsAPInt(ElemSizeInBits, C);
14292
- if (CAsAPInt != APInt::getAllOnes(ElemSizeInBits))
14293
- return SDValue();
14294
-
14295
- ExtendOpA = Xor.getOperand(0);
14296
- ExtendOpB = ShiftOp0.getOperand(0);
14297
- } else if (ShiftOp0Opc == ISD::ADD) {
14298
- ExtendOpA = ShiftOp0.getOperand(0);
14299
- ExtendOpB = ShiftOp0.getOperand(1);
14300
- } else
14301
- return SDValue();
14302
-
14303
- unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
14304
- unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
14305
- if (!(ExtendOpAOpc == ExtendOpBOpc &&
14306
- (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
14307
- return SDValue();
14308
-
14309
- // Is the result of the right shift being truncated to the same value type as
14310
- // the original operands, OpA and OpB?
14311
- SDValue OpA = ExtendOpA.getOperand(0);
14312
- SDValue OpB = ExtendOpB.getOperand(0);
14313
- EVT OpAVT = OpA.getValueType();
14314
- assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
14315
- if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
14316
- return SDValue();
14317
-
14318
- SDLoc DL(N);
14319
- bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
14320
- bool IsRHADD = ShiftOp0Opc == ISD::SUB;
14321
- unsigned HADDOpc = IsSignExtend
14322
- ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
14323
- : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
14324
- SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
14325
-
14326
- return ResultHADD;
14327
- }
14328
-
14329
14245
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
14330
14246
switch (Opcode) {
14331
14247
case ISD::FADD:
@@ -14428,20 +14344,20 @@ static SDValue performConcatVectorsCombine(SDNode *N,
14428
14344
if (DCI.isBeforeLegalizeOps())
14429
14345
return SDValue();
14430
14346
14431
- // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
14432
- // subvectors from the same original vectors. Combine these into a single
14433
- // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
14434
- // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
14435
- // extract_subvector (v16i8 OpB,
14436
- // <0>)) ),
14437
- // (v8i8 (urhadd ( extract_subvector (v16i8 OpA , <8> ),
14438
- // extract_subvector (v16i8 OpB ,
14439
- // <8>)))))
14347
+ // Optimise concat_vectors of two [us]avgceils or [us]avgfloors that use
14348
+ // extracted subvectors from the same original vectors. Combine these into a
14349
+ // single avg that operates on the two original vectors.
14350
+ // avgceil is the target independant name for rhadd, avgfloor is a hadd.
14351
+ // Example:
14352
+ // (concat_vectors (v8i8 (avgceils (extract_subvector (v16i8 OpA, <0>),
14353
+ // extract_subvector (v16i8 OpB , <0>)) ),
14354
+ // (v8i8 (avgceils ( extract_subvector (v16i8 OpA, <8>) ,
14355
+ // extract_subvector (v16i8 OpB, <8>)))))
14440
14356
// ->
14441
- // (v16i8(urhadd (v16i8 OpA, v16i8 OpB)))
14357
+ // (v16i8(avgceils (v16i8 OpA, v16i8 OpB)))
14442
14358
if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
14443
- (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
14444
- N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD )) {
14359
+ (N0Opc == ISD::AVGCEILU || N0Opc == ISD::AVGCEILS ||
14360
+ N0Opc == ISD::AVGFLOORU || N0Opc == ISD::AVGFLOORS )) {
14445
14361
SDValue N00 = N0->getOperand(0);
14446
14362
SDValue N01 = N0->getOperand(1);
14447
14363
SDValue N10 = N1->getOperand(0);
@@ -18022,8 +17938,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
18022
17938
return performExtendCombine(N, DCI, DAG);
18023
17939
case ISD::SIGN_EXTEND_INREG:
18024
17940
return performSignExtendInRegCombine(N, DCI, DAG);
18025
- case ISD::TRUNCATE:
18026
- return performVectorTruncateCombine(N, DCI, DAG);
18027
17941
case ISD::CONCAT_VECTORS:
18028
17942
return performConcatVectorsCombine(N, DCI, DAG);
18029
17943
case ISD::INSERT_SUBVECTOR:
0 commit comments