@@ -1013,6 +1013,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1013
1013
setTargetDAGCombine(ISD::VECREDUCE_OR);
1014
1014
setTargetDAGCombine(ISD::VECREDUCE_XOR);
1015
1015
1016
+ setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
1017
+
1016
1018
// In case of strict alignment, avoid an excessive number of byte wide stores.
1017
1019
MaxStoresPerMemsetOptSize = 8;
1018
1020
MaxStoresPerMemset =
@@ -23121,6 +23123,55 @@ static SDValue performMULLCombine(SDNode *N,
23121
23123
return SDValue();
23122
23124
}
23123
23125
23126
+ static SDValue
23127
+ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
23128
+ SelectionDAG &DAG) {
23129
+ // Let's do below transform.
23130
+ //
23131
+ // t34: v4i32 = AArch64ISD::UADDLV t2
23132
+ // t35: i32 = extract_vector_elt t34, Constant:i64<0>
23133
+ // t7: i64 = zero_extend t35
23134
+ // t20: v1i64 = scalar_to_vector t7
23135
+ // ==>
23136
+ // t34: v4i32 = AArch64ISD::UADDLV t2
23137
+ // t39: v2i32 = extract_subvector t34, Constant:i64<0>
23138
+ // t40: v1i64 = AArch64ISD::NVCAST t39
23139
+ if (DCI.isBeforeLegalizeOps())
23140
+ return SDValue();
23141
+
23142
+ EVT VT = N->getValueType(0);
23143
+ if (VT != MVT::v1i64)
23144
+ return SDValue();
23145
+
23146
+ SDValue ZEXT = N->getOperand(0);
23147
+ if (ZEXT.getOpcode() != ISD::ZERO_EXTEND || ZEXT.getValueType() != MVT::i64)
23148
+ return SDValue();
23149
+
23150
+ SDValue EXTRACT_VEC_ELT = ZEXT.getOperand(0);
23151
+ if (EXTRACT_VEC_ELT.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23152
+ EXTRACT_VEC_ELT.getValueType() != MVT::i32)
23153
+ return SDValue();
23154
+
23155
+ if (!isNullConstant(EXTRACT_VEC_ELT.getOperand(1)))
23156
+ return SDValue();
23157
+
23158
+ SDValue UADDLV = EXTRACT_VEC_ELT.getOperand(0);
23159
+ if (UADDLV.getOpcode() != AArch64ISD::UADDLV ||
23160
+ UADDLV.getValueType() != MVT::v4i32 ||
23161
+ UADDLV.getOperand(0).getValueType() != MVT::v8i8)
23162
+ return SDValue();
23163
+
23164
+ // Let's generate new sequence with AArch64ISD::NVCAST.
23165
+ SDLoc DL(N);
23166
+ SDValue EXTRACT_SUBVEC =
23167
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, UADDLV,
23168
+ DAG.getConstant(0, DL, MVT::i64));
23169
+ SDValue NVCAST =
23170
+ DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, EXTRACT_SUBVEC);
23171
+
23172
+ return NVCAST;
23173
+ }
23174
+
23124
23175
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
23125
23176
DAGCombinerInfo &DCI) const {
23126
23177
SelectionDAG &DAG = DCI.DAG;
@@ -23436,6 +23487,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
23436
23487
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
23437
23488
case ISD::CTLZ:
23438
23489
return performCTLZCombine(N, DAG, Subtarget);
23490
+ case ISD::SCALAR_TO_VECTOR:
23491
+ return performScalarToVectorCombine(N, DCI, DAG);
23439
23492
}
23440
23493
return SDValue();
23441
23494
}
0 commit comments