-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Legalize 64bit elements for BUILD_VECTOR on gfx942 #145052
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5206,6 +5206,14 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, | |
case ISD::BITCAST: { | ||
EVT DestVT = N->getValueType(0); | ||
|
||
// Avoid undoing build_vector with 64b elements if subtarget supports 64b | ||
// movs (i.e., avoid inf loop through combines). | ||
if (Subtarget->isGCN()) { | ||
const GCNSubtarget &ST = DAG.getSubtarget<GCNSubtarget>(); | ||
if (ST.hasMovB64()) | ||
break; | ||
} | ||
Comment on lines
+5209
to
+5215
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move this into the SITargetLowering one and avoid calling the base class implementation instead |
||
|
||
// Push casts through vector builds. This helps avoid emitting a large | ||
// number of copies when materializing floating point vector constants. | ||
// | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -357,9 +357,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |||||
// Most operations are naturally 32-bit vector operations. We only support | ||||||
// load and store of i64 vectors, so promote v2i64 vector operations to v4i32. | ||||||
for (MVT Vec64 : {MVT::v2i64, MVT::v2f64}) { | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32); | ||||||
|
||||||
if (STI.hasMovB64()) | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Legal); | ||||||
else { | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32); | ||||||
} | ||||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); | ||||||
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32); | ||||||
|
||||||
|
@@ -371,9 +374,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |||||
} | ||||||
|
||||||
for (MVT Vec64 : {MVT::v3i64, MVT::v3f64}) { | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32); | ||||||
|
||||||
if (STI.hasMovB64()) | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Legal); | ||||||
else { | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32); | ||||||
} | ||||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); | ||||||
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v6i32); | ||||||
|
||||||
|
@@ -385,9 +391,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |||||
} | ||||||
|
||||||
for (MVT Vec64 : {MVT::v4i64, MVT::v4f64}) { | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32); | ||||||
|
||||||
if (STI.hasMovB64()) | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Legal); | ||||||
else { | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32); | ||||||
} | ||||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); | ||||||
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v8i32); | ||||||
|
||||||
|
@@ -399,9 +408,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |||||
} | ||||||
|
||||||
for (MVT Vec64 : {MVT::v8i64, MVT::v8f64}) { | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32); | ||||||
|
||||||
if (STI.hasMovB64()) | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Legal); | ||||||
else { | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32); | ||||||
} | ||||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); | ||||||
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v16i32); | ||||||
|
||||||
|
@@ -413,9 +425,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |||||
} | ||||||
|
||||||
for (MVT Vec64 : {MVT::v16i64, MVT::v16f64}) { | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32); | ||||||
|
||||||
if (STI.hasMovB64()) | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Legal); | ||||||
Comment on lines
+428
to
+429
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be the default already? |
||||||
else { | ||||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32); | ||||||
} | ||||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); | ||||||
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v32i32); | ||||||
|
||||||
|
@@ -945,6 +960,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |||||
} | ||||||
|
||||||
setTargetDAGCombine({ISD::ADD, | ||||||
ISD::BUILD_VECTOR, | ||||||
ISD::UADDO_CARRY, | ||||||
ISD::SUB, | ||||||
ISD::USUBO_CARRY, | ||||||
|
@@ -15486,6 +15502,72 @@ SDValue SITargetLowering::performClampCombine(SDNode *N, | |||||
return SDValue(CSrc, 0); | ||||||
} | ||||||
|
||||||
SDValue | ||||||
SITargetLowering::performBuildVectorCombine(SDNode *N, | ||||||
DAGCombinerInfo &DCI) const { | ||||||
const GCNSubtarget *ST = getSubtarget(); | ||||||
if (DCI.Level < AfterLegalizeDAG || !ST->hasMovB64()) | ||||||
return SDValue(); | ||||||
Comment on lines
+15509
to
+15510
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is still worthwhile without v_mov_b64 if we are going to use s_mov_b64 for the final use |
||||||
|
||||||
SelectionDAG &DAG = DCI.DAG; | ||||||
SDLoc SL(N); | ||||||
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); | ||||||
|
||||||
EVT VT = N->getValueType(0); | ||||||
EVT EltVT = VT.getVectorElementType(); | ||||||
unsigned SizeBits = VT.getSizeInBits(); | ||||||
unsigned EltSize = EltVT.getSizeInBits(); | ||||||
|
||||||
// Skip if: | ||||||
// - Value type isn't multiplication of 64 bit (e.g., v3i32), or | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Still can handle the v3 case |
||||||
// - BuildVector instruction has non-constants, or | ||||||
// - Element type has already been combined into i64 elements | ||||||
if ((SizeBits % 64) != 0 || !BVN->isConstant() || EltVT == MVT::i64) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also f64? |
||||||
return SDValue(); | ||||||
Comment on lines
+15525
to
+15526
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably should drop the isConstant check, you're necessarily performing the same check below anyway |
||||||
|
||||||
// Construct the 64b values. | ||||||
SmallVector<uint64_t, 8> ImmVals; | ||||||
uint64_t ImmVal = 0; | ||||||
uint64_t ImmSize = 0; | ||||||
for (SDValue Opand : N->ops()) { | ||||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Opand); | ||||||
if (!C) | ||||||
return SDValue(); | ||||||
|
||||||
ImmVal |= C->getZExtValue() << ImmSize; | ||||||
ImmSize += EltSize; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand what ImmSize is for. All the sizes are exactly computable from the type and number of operands, you shouldn't need to sum anything? |
||||||
if (ImmSize > 64) | ||||||
return SDValue(); | ||||||
Comment on lines
+15539
to
+15540
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess this only handles v2i32 and maybe v4i16? Arbitrary width should work |
||||||
if (ImmSize == 64) { | ||||||
if (!isUInt<32>(ImmVal)) | ||||||
return SDValue(); | ||||||
ImmVals.push_back(ImmVal); | ||||||
ImmVal = 0; | ||||||
ImmSize = 0; | ||||||
} | ||||||
} | ||||||
|
||||||
// Avoid emitting build_vector with 1 element and directly emit value. | ||||||
if (ImmVals.size() == 1) { | ||||||
SDValue Val = DAG.getConstant(ImmVals[0], SL, MVT::i64); | ||||||
return DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Val); | ||||||
} | ||||||
|
||||||
// Construct and return build_vector with 64b elements. | ||||||
if (!ImmVals.empty()) { | ||||||
SmallVector<SDValue, 8> VectorConsts; | ||||||
for (uint64_t I : ImmVals) | ||||||
VectorConsts.push_back(DAG.getConstant(I, SL, MVT::i64)); | ||||||
unsigned NewNumElts = SizeBits / 64; | ||||||
LLVMContext &Ctx = *DAG.getContext(); | ||||||
EVT NewVT = EVT::getVectorVT(Ctx, MVT::i64, NewNumElts); | ||||||
SDValue BV = DAG.getBuildVector( | ||||||
NewVT, SL, ArrayRef(VectorConsts.begin(), VectorConsts.end())); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
return DAG.getBitcast(VT, BV); | ||||||
} | ||||||
return SDValue(); | ||||||
} | ||||||
|
||||||
SDValue SITargetLowering::PerformDAGCombine(SDNode *N, | ||||||
DAGCombinerInfo &DCI) const { | ||||||
switch (N->getOpcode()) { | ||||||
|
@@ -15573,6 +15655,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, | |||||
return performFCanonicalizeCombine(N, DCI); | ||||||
case AMDGPUISD::RCP: | ||||||
return performRcpCombine(N, DCI); | ||||||
case ISD::BUILD_VECTOR: | ||||||
return performBuildVectorCombine(N, DCI); | ||||||
case ISD::FLDEXP: | ||||||
case AMDGPUISD::FRACT: | ||||||
case AMDGPUISD::RSQ: | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.