-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64] Allow lowering of more types to GET_ACTIVE_LANE_MASK #140062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
711b568
c670009
c4a7e8a
03f752f
eecc4a6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1401,6 +1401,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { | |
case ISD::PARTIAL_REDUCE_SMLA: | ||
SplitVecRes_PARTIAL_REDUCE_MLA(N, Lo, Hi); | ||
break; | ||
case ISD::GET_ACTIVE_LANE_MASK: | ||
SplitVecRes_GET_ACTIVE_LANE_MASK(N, Lo, Hi); | ||
break; | ||
} | ||
|
||
// If Lo/Hi is null, the sub-method took care of registering results etc. | ||
|
@@ -3248,6 +3251,22 @@ void DAGTypeLegalizer::SplitVecRes_PARTIAL_REDUCE_MLA(SDNode *N, SDValue &Lo, | |
Hi = DAG.getNode(Opcode, DL, ResultVT, AccHi, Input1Hi, Input2Hi); | ||
} | ||
|
||
void DAGTypeLegalizer::SplitVecRes_GET_ACTIVE_LANE_MASK(SDNode *N, SDValue &Lo, | ||
SDValue &Hi) { | ||
SDLoc DL(N); | ||
SDValue Op0 = N->getOperand(0); | ||
SDValue Op1 = N->getOperand(1); | ||
EVT OpVT = Op0.getValueType(); | ||
|
||
EVT LoVT, HiVT; | ||
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); | ||
|
||
Lo = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, LoVT, Op0, Op1); | ||
SDValue LoElts = DAG.getElementCount(DL, OpVT, LoVT.getVectorElementCount()); | ||
SDValue HiStartVal = DAG.getNode(ISD::UADDSAT, DL, OpVT, Op0, LoElts); | ||
Hi = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, HiVT, HiStartVal, Op1); | ||
} | ||
|
||
void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) { | ||
unsigned Factor = N->getNumOperands(); | ||
|
||
|
@@ -4648,6 +4667,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { | |
case ISD::VECTOR_REVERSE: | ||
Res = WidenVecRes_VECTOR_REVERSE(N); | ||
break; | ||
case ISD::GET_ACTIVE_LANE_MASK: | ||
Res = WidenVecRes_GET_ACTIVE_LANE_MASK(N); | ||
break; | ||
|
||
case ISD::ADD: case ISD::VP_ADD: | ||
case ISD::AND: case ISD::VP_AND: | ||
|
@@ -6595,6 +6617,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) { | |
Mask); | ||
} | ||
|
||
SDValue DAGTypeLegalizer::WidenVecRes_GET_ACTIVE_LANE_MASK(SDNode *N) { | ||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); | ||
return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, SDLoc(N), NVT, N->ops()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know which tests trigger the widening code here - I assume it's I assume it's the former, in which case this code looks right! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is the
|
||
} | ||
|
||
SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { | ||
assert(N->getValueType(0).isVector() && | ||
N->getOperand(0).getValueType().isVector() && | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
According to the documentation for the node:
I assume here that
the internal addition of Base + index cannot overflow
is a statement that we have to generate appropriate code to ensure it does not overflow because the operation requires it. As opposed to it being a guarantee that it cannot overflow? If it's the former then UADDSAT makes sense, but if it's the latter then presumably we don't need the UADDSAT?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is correct, and why the UADDSAT is needed here.