@@ -7135,6 +7135,7 @@ static bool isFoldableUseOfShuffle(SDNode *N) {
7135
7135
/// The VBROADCAST node is returned when a pattern is found,
7136
7136
/// or SDValue() otherwise.
7137
7137
static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
7138
+ const SDLoc &dl,
7138
7139
const X86Subtarget &Subtarget,
7139
7140
SelectionDAG &DAG) {
7140
7141
// VBROADCAST requires AVX.
@@ -7145,8 +7146,6 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
7145
7146
7146
7147
MVT VT = BVOp->getSimpleValueType(0);
7147
7148
unsigned NumElts = VT.getVectorNumElements();
7148
- SDLoc dl(BVOp);
7149
-
7150
7149
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
7151
7150
"Unsupported vector type for broadcast.");
7152
7151
@@ -7492,14 +7491,13 @@ static SDValue LowerBUILD_VECTORvXbf16(SDValue Op, SelectionDAG &DAG,
7492
7491
}
7493
7492
7494
7493
// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
7495
- static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
7494
+ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl,
7495
+ SelectionDAG &DAG,
7496
7496
const X86Subtarget &Subtarget) {
7497
7497
7498
7498
MVT VT = Op.getSimpleValueType();
7499
7499
assert((VT.getVectorElementType() == MVT::i1) &&
7500
7500
"Unexpected type in LowerBUILD_VECTORvXi1!");
7501
-
7502
- SDLoc dl(Op);
7503
7501
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
7504
7502
ISD::isBuildVectorAllOnes(Op.getNode()))
7505
7503
return Op;
@@ -7618,7 +7616,7 @@ LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) {
7618
7616
/// See the corrected implementation in isHopBuildVector(). Can we reduce this
7619
7617
/// code because it is only used for partial h-op matching now?
7620
7618
static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode,
7621
- SelectionDAG &DAG,
7619
+ const SDLoc &DL, SelectionDAG &DAG,
7622
7620
unsigned BaseIdx, unsigned LastIdx,
7623
7621
SDValue &V0, SDValue &V1) {
7624
7622
EVT VT = N->getValueType(0);
@@ -7928,6 +7926,7 @@ static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
7928
7926
/// 'fsubadd' operation accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB or
7929
7927
/// X86ISD::FMSUBADD node.
7930
7928
static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
7929
+ const SDLoc &DL,
7931
7930
const X86Subtarget &Subtarget,
7932
7931
SelectionDAG &DAG) {
7933
7932
SDValue Opnd0, Opnd1;
@@ -7938,7 +7937,6 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
7938
7937
return SDValue();
7939
7938
7940
7939
MVT VT = BV->getSimpleValueType(0);
7941
- SDLoc DL(BV);
7942
7940
7943
7941
// Try to generate X86ISD::FMADDSUB node here.
7944
7942
SDValue Opnd2;
@@ -8057,22 +8055,22 @@ static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG,
8057
8055
}
8058
8056
8059
8057
static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
8060
- SelectionDAG &DAG, unsigned HOpcode ,
8061
- SDValue V0, SDValue V1) {
8058
+ const SDLoc &DL, SelectionDAG &DAG ,
8059
+ unsigned HOpcode, SDValue V0, SDValue V1) {
8062
8060
// If either input vector is not the same size as the build vector,
8063
8061
// extract/insert the low bits to the correct size.
8064
8062
// This is free (examples: zmm --> xmm, xmm --> ymm).
8065
8063
MVT VT = BV->getSimpleValueType(0);
8066
8064
unsigned Width = VT.getSizeInBits();
8067
8065
if (V0.getValueSizeInBits() > Width)
8068
- V0 = extractSubVector(V0, 0, DAG, SDLoc(BV) , Width);
8066
+ V0 = extractSubVector(V0, 0, DAG, DL , Width);
8069
8067
else if (V0.getValueSizeInBits() < Width)
8070
- V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, SDLoc(BV) , Width);
8068
+ V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, DL , Width);
8071
8069
8072
8070
if (V1.getValueSizeInBits() > Width)
8073
- V1 = extractSubVector(V1, 0, DAG, SDLoc(BV) , Width);
8071
+ V1 = extractSubVector(V1, 0, DAG, DL , Width);
8074
8072
else if (V1.getValueSizeInBits() < Width)
8075
- V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV) , Width);
8073
+ V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, DL , Width);
8076
8074
8077
8075
unsigned NumElts = VT.getVectorNumElements();
8078
8076
APInt DemandedElts = APInt::getAllOnes(NumElts);
@@ -8084,17 +8082,17 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
8084
8082
unsigned HalfNumElts = NumElts / 2;
8085
8083
if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) {
8086
8084
MVT HalfVT = VT.getHalfNumVectorElementsVT();
8087
- V0 = extractSubVector(V0, 0, DAG, SDLoc(BV) , 128);
8088
- V1 = extractSubVector(V1, 0, DAG, SDLoc(BV) , 128);
8089
- SDValue Half = DAG.getNode(HOpcode, SDLoc(BV) , HalfVT, V0, V1);
8090
- return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV) , 256);
8085
+ V0 = extractSubVector(V0, 0, DAG, DL , 128);
8086
+ V1 = extractSubVector(V1, 0, DAG, DL , 128);
8087
+ SDValue Half = DAG.getNode(HOpcode, DL , HalfVT, V0, V1);
8088
+ return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, DL , 256);
8091
8089
}
8092
8090
8093
- return DAG.getNode(HOpcode, SDLoc(BV) , VT, V0, V1);
8091
+ return DAG.getNode(HOpcode, DL , VT, V0, V1);
8094
8092
}
8095
8093
8096
8094
/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
8097
- static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
8095
+ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL,
8098
8096
const X86Subtarget &Subtarget,
8099
8097
SelectionDAG &DAG) {
8100
8098
// We need at least 2 non-undef elements to make this worthwhile by default.
@@ -8114,7 +8112,7 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
8114
8112
unsigned HOpcode;
8115
8113
SDValue V0, V1;
8116
8114
if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
8117
- return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
8115
+ return getHopForBuildVector(BV, DL, DAG, HOpcode, V0, V1);
8118
8116
}
8119
8117
8120
8118
// Try harder to match 256-bit ops by using extract/concat.
@@ -8134,22 +8132,21 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
8134
8132
if (BV->getOperand(i)->isUndef())
8135
8133
NumUndefsHI++;
8136
8134
8137
- SDLoc DL(BV);
8138
8135
SDValue InVec0, InVec1;
8139
8136
if (VT == MVT::v8i32 || VT == MVT::v16i16) {
8140
8137
SDValue InVec2, InVec3;
8141
8138
unsigned X86Opcode;
8142
8139
bool CanFold = true;
8143
8140
8144
- if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
8145
- isHorizontalBinOpPart(BV, ISD::ADD, DAG, Half, NumElts, InVec2,
8141
+ if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, Half, InVec0, InVec1) &&
8142
+ isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, Half, NumElts, InVec2,
8146
8143
InVec3) &&
8147
8144
((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
8148
8145
((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
8149
8146
X86Opcode = X86ISD::HADD;
8150
- else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, Half, InVec0,
8147
+ else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, Half, InVec0,
8151
8148
InVec1) &&
8152
- isHorizontalBinOpPart(BV, ISD::SUB, DAG, Half, NumElts, InVec2,
8149
+ isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, Half, NumElts, InVec2,
8153
8150
InVec3) &&
8154
8151
((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
8155
8152
((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
@@ -8179,15 +8176,16 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
8179
8176
if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
8180
8177
VT == MVT::v16i16) {
8181
8178
unsigned X86Opcode;
8182
- if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
8179
+ if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, NumElts, InVec0,
8180
+ InVec1))
8183
8181
X86Opcode = X86ISD::HADD;
8184
- else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, NumElts, InVec0,
8182
+ else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, NumElts, InVec0,
8185
8183
InVec1))
8186
8184
X86Opcode = X86ISD::HSUB;
8187
- else if (isHorizontalBinOpPart(BV, ISD::FADD, DAG, 0, NumElts, InVec0,
8185
+ else if (isHorizontalBinOpPart(BV, ISD::FADD, DL, DAG, 0, NumElts, InVec0,
8188
8186
InVec1))
8189
8187
X86Opcode = X86ISD::FHADD;
8190
- else if (isHorizontalBinOpPart(BV, ISD::FSUB, DAG, 0, NumElts, InVec0,
8188
+ else if (isHorizontalBinOpPart(BV, ISD::FSUB, DL, DAG, 0, NumElts, InVec0,
8191
8189
InVec1))
8192
8190
X86Opcode = X86ISD::FHSUB;
8193
8191
else
@@ -8218,10 +8216,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
8218
8216
/// NOTE: Its not in our interest to start make a general purpose vectorizer
8219
8217
/// from this, but enough scalar bit operations are created from the later
8220
8218
/// legalization + scalarization stages to need basic support.
8221
- static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
8219
+ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, const SDLoc &DL,
8222
8220
const X86Subtarget &Subtarget,
8223
8221
SelectionDAG &DAG) {
8224
- SDLoc DL(Op);
8225
8222
MVT VT = Op->getSimpleValueType(0);
8226
8223
unsigned NumElems = VT.getVectorNumElements();
8227
8224
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -8296,9 +8293,9 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
8296
8293
/// Create a vector constant without a load. SSE/AVX provide the bare minimum
8297
8294
/// functionality to do this, so it's all zeros, all ones, or some derivation
8298
8295
/// that is cheap to calculate.
8299
- static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
8296
+ static SDValue materializeVectorConstant(SDValue Op, const SDLoc &DL,
8297
+ SelectionDAG &DAG,
8300
8298
const X86Subtarget &Subtarget) {
8301
- SDLoc DL(Op);
8302
8299
MVT VT = Op.getSimpleValueType();
8303
8300
8304
8301
// Vectors containing all zeros can be matched by pxor and xorps.
@@ -8322,7 +8319,7 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
8322
8319
/// from a vector of source values and a vector of extraction indices.
8323
8320
/// The vectors might be manipulated to match the type of the permute op.
8324
8321
static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
8325
- SDLoc &DL, SelectionDAG &DAG,
8322
+ const SDLoc &DL, SelectionDAG &DAG,
8326
8323
const X86Subtarget &Subtarget) {
8327
8324
MVT ShuffleVT = VT;
8328
8325
EVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
@@ -8590,7 +8587,8 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
8590
8587
// TODO: Utilize pshufb and zero mask blending to support more efficient
8591
8588
// construction of vectors with constant-0 elements.
8592
8589
static SDValue
8593
- LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
8590
+ LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL,
8591
+ SelectionDAG &DAG,
8594
8592
const X86Subtarget &Subtarget) {
8595
8593
SDValue SrcVec, IndicesVec;
8596
8594
// Check for a match of the permute source vector and permute index elements.
@@ -8629,7 +8627,6 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
8629
8627
return SDValue();
8630
8628
}
8631
8629
8632
- SDLoc DL(V);
8633
8630
MVT VT = V.getSimpleValueType();
8634
8631
return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
8635
8632
}
@@ -8645,14 +8642,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
8645
8642
8646
8643
// Generate vectors for predicate vectors.
8647
8644
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
8648
- return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget);
8645
+ return LowerBUILD_VECTORvXi1(Op, dl, DAG, Subtarget);
8649
8646
8650
8647
if (VT.getVectorElementType() == MVT::bf16 &&
8651
8648
(Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16()))
8652
8649
return LowerBUILD_VECTORvXbf16(Op, DAG, Subtarget);
8653
8650
8654
- if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
8655
- return VectorConstant ;
8651
+ if (SDValue VectorCst = materializeVectorConstant(Op, dl , DAG, Subtarget))
8652
+ return VectorCst ;
8656
8653
8657
8654
unsigned EVTBits = EltVT.getSizeInBits();
8658
8655
APInt UndefMask = APInt::getZero(NumElems);
@@ -8747,13 +8744,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
8747
8744
}
8748
8745
}
8749
8746
8750
- if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
8747
+ if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, dl, Subtarget, DAG))
8751
8748
return AddSub;
8752
- if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
8749
+ if (SDValue HorizontalOp = LowerToHorizontalOp(BV, dl, Subtarget, DAG))
8753
8750
return HorizontalOp;
8754
- if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
8751
+ if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, dl, Subtarget, DAG))
8755
8752
return Broadcast;
8756
- if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG))
8753
+ if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
8757
8754
return BitOp;
8758
8755
8759
8756
unsigned NumZero = ZeroMask.popcount();
@@ -8901,8 +8898,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
8901
8898
if (IsAllConstants)
8902
8899
return SDValue();
8903
8900
8904
- if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget))
8905
- return V;
8901
+ if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, dl, DAG, Subtarget))
8902
+ return V;
8906
8903
8907
8904
// See if we can use a vector load to get all of the elements.
8908
8905
{
0 commit comments