@@ -7040,6 +7040,31 @@ static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL,
7040
7040
IsAfterLegalize);
7041
7041
}
7042
7042
7043
+ static Constant *getConstantVector(MVT VT, ArrayRef<APInt> Bits,
7044
+ const APInt &Undefs, LLVMContext &C) {
7045
+ unsigned ScalarSize = VT.getScalarSizeInBits();
7046
+ Type *Ty = EVT(VT.getScalarType()).getTypeForEVT(C);
7047
+
7048
+ auto getConstantScalar = [&](const APInt &Val) -> Constant * {
7049
+ if (VT.isFloatingPoint()) {
7050
+ if (ScalarSize == 16)
7051
+ return ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val));
7052
+ if (ScalarSize == 32)
7053
+ return ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
7054
+ assert(ScalarSize == 64 && "Unsupported floating point scalar size");
7055
+ return ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
7056
+ }
7057
+ return Constant::getIntegerValue(Ty, Val);
7058
+ };
7059
+
7060
+ SmallVector<Constant *, 32> ConstantVec;
7061
+ for (unsigned I = 0, E = Bits.size(); I != E; ++I)
7062
+ ConstantVec.push_back(Undefs[I] ? UndefValue::get(Ty)
7063
+ : getConstantScalar(Bits[I]));
7064
+
7065
+ return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
7066
+ }
7067
+
7043
7068
static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
7044
7069
unsigned SplatBitSize, LLVMContext &C) {
7045
7070
unsigned ScalarSize = VT.getScalarSizeInBits();
@@ -54978,6 +55003,32 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
54978
55003
}
54979
55004
}
54980
55005
55006
+ // Attempt to fold target constant loads.
55007
+ if (all_of(Ops, [](SDValue Op) { return getTargetConstantFromNode(Op); })) {
55008
+ SmallVector<APInt> EltBits;
55009
+ APInt UndefElts = APInt::getZero(VT.getVectorNumElements());
55010
+ for (unsigned I = 0; I != NumOps; ++I) {
55011
+ APInt OpUndefElts;
55012
+ SmallVector<APInt> OpEltBits;
55013
+ if (!getTargetConstantBitsFromNode(Ops[I], EltSizeInBits, OpUndefElts,
55014
+ OpEltBits, true, false))
55015
+ break;
55016
+ EltBits.append(OpEltBits);
55017
+ UndefElts.insertBits(OpUndefElts, I * OpUndefElts.getBitWidth());
55018
+ }
55019
+ if (EltBits.size() == VT.getVectorNumElements()) {
55020
+ Constant *C = getConstantVector(VT, EltBits, UndefElts, Ctx);
55021
+ MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
55022
+ SDValue CV = DAG.getConstantPool(C, PVT);
55023
+ MachineFunction &MF = DAG.getMachineFunction();
55024
+ MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF);
55025
+ SDValue Ld = DAG.getLoad(VT, DL, DAG.getEntryNode(), CV, MPI);
55026
+ SDValue Sub = extractSubVector(Ld, 0, DAG, DL, Op0.getValueSizeInBits());
55027
+ DAG.ReplaceAllUsesOfValueWith(Op0, Sub);
55028
+ return Ld;
55029
+ }
55030
+ }
55031
+
54981
55032
// If this simple subvector or scalar/subvector broadcast_load is inserted
54982
55033
// into both halves, use a larger broadcast_load. Update other uses to use
54983
55034
// an extracted subvector.
@@ -55000,23 +55051,6 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
55000
55051
}
55001
55052
}
55002
55053
55003
- // Attempt to fold target constant loads.
55004
- if (all_of(Ops, [](SDValue Op) { return getTargetConstantFromNode(Op); })) {
55005
- SmallVector<APInt> EltBits;
55006
- APInt UndefElts = APInt::getZero(VT.getVectorNumElements());
55007
- for (unsigned I = 0; I != NumOps; ++I) {
55008
- APInt OpUndefElts;
55009
- SmallVector<APInt> OpEltBits;
55010
- if (!getTargetConstantBitsFromNode(Ops[I], EltSizeInBits, OpUndefElts,
55011
- OpEltBits, true, false))
55012
- break;
55013
- EltBits.append(OpEltBits);
55014
- UndefElts.insertBits(OpUndefElts, I * OpUndefElts.getBitWidth());
55015
- }
55016
- if (EltBits.size() == VT.getVectorNumElements())
55017
- return getConstVector(EltBits, UndefElts, VT, DAG, DL);
55018
- }
55019
-
55020
55054
// If we're splatting a 128-bit subvector to 512-bits, use SHUF128 directly.
55021
55055
if (IsSplat && NumOps == 4 && VT.is512BitVector() &&
55022
55056
Subtarget.useAVX512Regs()) {
0 commit comments