Skip to content

Commit e9d4e34

Browse files
committed
[AArch64][SVE] Add legalization support for i32/i64 vector srem/urem
Implement them on top of sdiv/udiv, similar to what we do for integer types. Potential future work: implementing i8/i16 srem/urem, optimizations for constant divisors, optimizing the mul+sub to mls. Differential Revision: https://reviews.llvm.org/D81511
1 parent 90ad786 commit e9d4e34

File tree

8 files changed

+115
-19
lines changed

8 files changed

+115
-19
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4421,6 +4421,10 @@ class TargetLowering : public TargetLoweringBase {
44214421
/// only the first Count elements of the vector are used.
44224422
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
44234423

4424+
/// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
4425+
/// Returns true if the expansion was successful.
4426+
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const;
4427+
44244428
//===--------------------------------------------------------------------===//
44254429
// Instruction Emitting Hooks
44264430
//

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3343,26 +3343,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
33433343
break;
33443344
}
33453345
case ISD::UREM:
3346-
case ISD::SREM: {
3347-
EVT VT = Node->getValueType(0);
3348-
bool isSigned = Node->getOpcode() == ISD::SREM;
3349-
unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
3350-
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3351-
Tmp2 = Node->getOperand(0);
3352-
Tmp3 = Node->getOperand(1);
3353-
if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
3354-
SDVTList VTs = DAG.getVTList(VT, VT);
3355-
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
3356-
Results.push_back(Tmp1);
3357-
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
3358-
// X % Y -> X-X/Y*Y
3359-
Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
3360-
Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
3361-
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
3346+
case ISD::SREM:
3347+
if (TLI.expandREM(Node, Tmp1, DAG))
33623348
Results.push_back(Tmp1);
3363-
}
33643349
break;
3365-
}
33663350
case ISD::UDIV:
33673351
case ISD::SDIV: {
33683352
bool isSigned = Node->getOpcode() == ISD::SDIV;

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ class VectorLegalizer {
145145
void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
146146
SDValue ExpandStrictFPOp(SDNode *Node);
147147
void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
148+
void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
148149

149150
void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
150151

@@ -867,6 +868,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
867868
case ISD::VECREDUCE_FMIN:
868869
Results.push_back(TLI.expandVecReduce(Node, DAG));
869870
return;
871+
case ISD::SREM:
872+
case ISD::UREM:
873+
ExpandREM(Node, Results);
874+
return;
870875
}
871876

872877
Results.push_back(DAG.UnrollVectorOp(Node));
@@ -1353,6 +1358,17 @@ void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
13531358
UnrollStrictFPOp(Node, Results);
13541359
}
13551360

1361+
void VectorLegalizer::ExpandREM(SDNode *Node,
1362+
SmallVectorImpl<SDValue> &Results) {
1363+
assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
1364+
"Expected REM node");
1365+
1366+
SDValue Result;
1367+
if (!TLI.expandREM(Node, Result, DAG))
1368+
Result = DAG.UnrollVectorOp(Node);
1369+
Results.push_back(Result);
1370+
}
1371+
13561372
void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
13571373
SmallVectorImpl<SDValue> &Results) {
13581374
EVT VT = Node->getValueType(0);

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7823,3 +7823,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
78237823
Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
78247824
return Res;
78257825
}
7826+
7827+
bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
7828+
SelectionDAG &DAG) const {
7829+
EVT VT = Node->getValueType(0);
7830+
SDLoc dl(Node);
7831+
bool isSigned = Node->getOpcode() == ISD::SREM;
7832+
unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
7833+
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
7834+
SDValue Dividend = Node->getOperand(0);
7835+
SDValue Divisor = Node->getOperand(1);
7836+
if (isOperationLegalOrCustom(DivRemOpc, VT)) {
7837+
SDVTList VTs = DAG.getVTList(VT, VT);
7838+
Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
7839+
return true;
7840+
} else if (isOperationLegalOrCustom(DivOpc, VT)) {
7841+
// X % Y -> X-X/Y*Y
7842+
SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
7843+
SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
7844+
Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
7845+
return true;
7846+
}
7847+
return false;
7848+
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
199199
setOperationAction(ISD::UADDSAT, VT, Legal);
200200
setOperationAction(ISD::SSUBSAT, VT, Legal);
201201
setOperationAction(ISD::USUBSAT, VT, Legal);
202+
setOperationAction(ISD::UREM, VT, Expand);
203+
setOperationAction(ISD::SREM, VT, Expand);
204+
setOperationAction(ISD::SDIVREM, VT, Expand);
205+
setOperationAction(ISD::UDIVREM, VT, Expand);
202206
}
203207

204208
for (auto VT :

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
443443
setOperationAction(ISD::UREM, VT, Expand);
444444
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
445445
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
446-
setOperationAction(ISD::SDIVREM, VT, Custom);
446+
setOperationAction(ISD::SDIVREM, VT, Expand);
447447
setOperationAction(ISD::UDIVREM, VT, Expand);
448448
setOperationAction(ISD::SELECT, VT, Expand);
449449
setOperationAction(ISD::VSELECT, VT, Expand);

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
210210
setOperationAction(ISD::SREM, VT, Expand);
211211
setOperationAction(ISD::UREM, VT, Expand);
212212
setOperationAction(ISD::FREM, VT, Expand);
213+
setOperationAction(ISD::SDIVREM, VT, Expand);
214+
setOperationAction(ISD::UDIVREM, VT, Expand);
213215

214216
if (!VT.isFloatingPoint() &&
215217
VT != MVT::v2i64 && VT != MVT::v1i64)
@@ -284,6 +286,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
284286
setOperationAction(ISD::SDIV, VT, Expand);
285287
setOperationAction(ISD::UREM, VT, Expand);
286288
setOperationAction(ISD::SREM, VT, Expand);
289+
setOperationAction(ISD::UDIVREM, VT, Expand);
290+
setOperationAction(ISD::SDIVREM, VT, Expand);
287291
setOperationAction(ISD::CTPOP, VT, Expand);
288292

289293
// Vector reductions

llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,36 @@ define <vscale x 4 x i64> @sdiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i
5959
ret <vscale x 4 x i64> %div
6060
}
6161

62+
;
63+
; SREM
64+
;
65+
66+
define <vscale x 4 x i32> @srem_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
67+
; CHECK-LABEL: srem_i32:
68+
; CHECK: // %bb.0:
69+
; CHECK-NEXT: ptrue p0.s
70+
; CHECK-NEXT: mov z2.d, z0.d
71+
; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
72+
; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
73+
; CHECK-NEXT: sub z0.s, z0.s, z2.s
74+
; CHECK-NEXT: ret
75+
%div = srem <vscale x 4 x i32> %a, %b
76+
ret <vscale x 4 x i32> %div
77+
}
78+
79+
define <vscale x 2 x i64> @srem_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
80+
; CHECK-LABEL: srem_i64:
81+
; CHECK: // %bb.0:
82+
; CHECK-NEXT: ptrue p0.d
83+
; CHECK-NEXT: mov z2.d, z0.d
84+
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
85+
; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
86+
; CHECK-NEXT: sub z0.d, z0.d, z2.d
87+
; CHECK-NEXT: ret
88+
%div = srem <vscale x 2 x i64> %a, %b
89+
ret <vscale x 2 x i64> %div
90+
}
91+
6292
;
6393
; UDIV
6494
;
@@ -117,6 +147,37 @@ define <vscale x 4 x i64> @udiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i
117147
ret <vscale x 4 x i64> %div
118148
}
119149

150+
151+
;
152+
; UREM
153+
;
154+
155+
define <vscale x 4 x i32> @urem_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
156+
; CHECK-LABEL: urem_i32:
157+
; CHECK: // %bb.0:
158+
; CHECK-NEXT: ptrue p0.s
159+
; CHECK-NEXT: mov z2.d, z0.d
160+
; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s
161+
; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
162+
; CHECK-NEXT: sub z0.s, z0.s, z2.s
163+
; CHECK-NEXT: ret
164+
%div = urem <vscale x 4 x i32> %a, %b
165+
ret <vscale x 4 x i32> %div
166+
}
167+
168+
define <vscale x 2 x i64> @urem_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
169+
; CHECK-LABEL: urem_i64:
170+
; CHECK: // %bb.0:
171+
; CHECK-NEXT: ptrue p0.d
172+
; CHECK-NEXT: mov z2.d, z0.d
173+
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
174+
; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
175+
; CHECK-NEXT: sub z0.d, z0.d, z2.d
176+
; CHECK-NEXT: ret
177+
%div = urem <vscale x 2 x i64> %a, %b
178+
ret <vscale x 2 x i64> %div
179+
}
180+
120181
;
121182
; SMIN
122183
;

0 commit comments

Comments
 (0)