Skip to content

Commit a983ef2

Browse files
committed
[DAGCombiner][AArch64][VE] Teach BuildUDIV/SDIV to use 2x mul when mulh/mul_lohi are not available.
Correct the legality of i32 mul_lohi on AArch64. Previously, AArch64 incorrectly reported i32 mul_lohi as Legal. This allowed BuildUDIV/SDIV to use them. A later DAGCombiner would replace them with MULHS/MULHU because only the high half was used. This conversion does not check the legality of MULHS/MULHU under the assumption that LegalizeDAG can turn it back into MUL_LOHI later. After they are converted to MULHS/MULHU, DAGCombine ran and saw that these operations aren't supported but an i64 MUL is. So they get converted to that plus a shift. Without this, LegalizeDAG would convert back MUL_LOHI and isel would fail to find a pattern. This patch teaches BuildUDIV/SDIV to create the wide mul and shift so that we can report the correct operation legality on AArch64. It also enables div by constant folding for more cases on VE. I don't know if VE wants this div by constant optimization or not. If they don't want it, they can use the isIntDivCheap hook to disable it. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D150333
1 parent 2da2995 commit a983ef2

File tree

5 files changed

+70
-19
lines changed

5 files changed

+70
-19
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5990,6 +5990,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
59905990
DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
59915991
return SDValue(LoHi.getNode(), 1);
59925992
}
5993+
// If type twice as wide legal, widen and use a mul plus a shift.
5994+
if (!VT.isVector()) {
5995+
unsigned Size = VT.getSizeInBits();
5996+
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
5997+
if (isOperationLegal(ISD::MUL, WideVT)) {
5998+
X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
5999+
Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6000+
Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6001+
Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6002+
DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6003+
return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6004+
}
6005+
}
59936006
return SDValue();
59946007
};
59956008

@@ -6163,6 +6176,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
61636176
DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
61646177
return SDValue(LoHi.getNode(), 1);
61656178
}
6179+
// If type twice as wide legal, widen and use a mul plus a shift.
6180+
if (!VT.isVector()) {
6181+
unsigned Size = VT.getSizeInBits();
6182+
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6183+
if (isOperationLegal(ISD::MUL, WideVT)) {
6184+
X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6185+
Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6186+
Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6187+
Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6188+
DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6189+
return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6190+
}
6191+
}
61666192
return SDValue(); // No mulhu or equivalent
61676193
};
61686194

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
580580
setOperationAction(ISD::MULHS, MVT::i32, Expand);
581581

582582
// AArch64 doesn't have {U|S}MUL_LOHI.
583+
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
584+
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
583585
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
584586
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
585587

llvm/test/CodeGen/VE/Scalar/div.ll

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,11 @@ define i64 @divi64ri(i64 %a, i64 %b) {
149149
define signext i32 @divi32ri(i32 signext %a, i32 signext %b) {
150150
; CHECK-LABEL: divi32ri:
151151
; CHECK: # %bb.0:
152-
; CHECK-NEXT: divs.w.sx %s0, %s0, (62)0
152+
; CHECK-NEXT: lea %s1, 1431655766
153+
; CHECK-NEXT: muls.l %s0, %s0, %s1
154+
; CHECK-NEXT: srl %s1, %s0, 63
155+
; CHECK-NEXT: srl %s0, %s0, 32
156+
; CHECK-NEXT: adds.w.sx %s0, %s0, %s1
153157
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
154158
; CHECK-NEXT: b.l.t (, %s10)
155159
%r = sdiv i32 %a, 3
@@ -185,8 +189,10 @@ define i64 @divu64ri(i64 %a, i64 %b) {
185189
define zeroext i32 @divu32ri(i32 zeroext %a, i32 zeroext %b) {
186190
; CHECK-LABEL: divu32ri:
187191
; CHECK: # %bb.0:
188-
; CHECK-NEXT: divu.w %s0, %s0, (62)0
189-
; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
192+
; CHECK-NEXT: lea %s1, -1431655765
193+
; CHECK-NEXT: and %s1, %s1, (32)0
194+
; CHECK-NEXT: muls.l %s0, %s0, %s1
195+
; CHECK-NEXT: srl %s0, %s0, 33
190196
; CHECK-NEXT: b.l.t (, %s10)
191197
%r = udiv i32 %a, 3
192198
ret i32 %r

llvm/test/CodeGen/VE/Scalar/rem.ll

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,11 @@ define i64 @remi64ri(i64 %a) {
165165
define signext i32 @remi32ri(i32 signext %a) {
166166
; CHECK-LABEL: remi32ri:
167167
; CHECK: # %bb.0:
168-
; CHECK-NEXT: divs.w.sx %s1, %s0, (62)0
168+
; CHECK-NEXT: lea %s1, 1431655766
169+
; CHECK-NEXT: muls.l %s1, %s0, %s1
170+
; CHECK-NEXT: srl %s2, %s1, 63
171+
; CHECK-NEXT: srl %s1, %s1, 32
172+
; CHECK-NEXT: adds.w.sx %s1, %s1, %s2
169173
; CHECK-NEXT: muls.w.sx %s1, 3, %s1
170174
; CHECK-NEXT: subs.w.sx %s0, %s0, %s1
171175
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
@@ -205,7 +209,10 @@ define i64 @remu64ri(i64 %a) {
205209
define zeroext i32 @remu32ri(i32 zeroext %a) {
206210
; CHECK-LABEL: remu32ri:
207211
; CHECK: # %bb.0:
208-
; CHECK-NEXT: divu.w %s1, %s0, (62)0
212+
; CHECK-NEXT: lea %s1, -1431655765
213+
; CHECK-NEXT: and %s1, %s1, (32)0
214+
; CHECK-NEXT: muls.l %s1, %s0, %s1
215+
; CHECK-NEXT: srl %s1, %s1, 33
209216
; CHECK-NEXT: muls.w.sx %s1, 3, %s1
210217
; CHECK-NEXT: subs.w.sx %s0, %s0, %s1
211218
; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1

llvm/test/CodeGen/VE/Vector/vec_divrem.ll

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,18 @@ define <4 x i8> @udiv_by_minus_one(<4 x i8> %x) {
88
; CHECK-LABEL: udiv_by_minus_one:
99
; CHECK: # %bb.0:
1010
; CHECK-NEXT: and %s0, %s0, (56)0
11+
; CHECK-NEXT: lea %s4, 16843010
12+
; CHECK-NEXT: muls.l %s0, %s0, %s4
13+
; CHECK-NEXT: srl %s0, %s0, 32
1114
; CHECK-NEXT: and %s1, %s1, (56)0
15+
; CHECK-NEXT: muls.l %s1, %s1, %s4
16+
; CHECK-NEXT: srl %s1, %s1, 32
1217
; CHECK-NEXT: and %s2, %s2, (56)0
18+
; CHECK-NEXT: muls.l %s2, %s2, %s4
19+
; CHECK-NEXT: srl %s2, %s2, 32
1320
; CHECK-NEXT: and %s3, %s3, (56)0
14-
; CHECK-NEXT: divu.w %s3, %s3, (56)0
15-
; CHECK-NEXT: divu.w %s2, %s2, (56)0
16-
; CHECK-NEXT: divu.w %s1, %s1, (56)0
17-
; CHECK-NEXT: divu.w %s0, %s0, (56)0
21+
; CHECK-NEXT: muls.l %s3, %s3, %s4
22+
; CHECK-NEXT: srl %s3, %s3, 32
1823
; CHECK-NEXT: b.l.t (, %s10)
1924
%r = udiv <4 x i8> %x, <i8 255, i8 255, i8 255, i8 255>
2025
ret <4 x i8> %r
@@ -27,16 +32,21 @@ define <4 x i8> @urem_by_minus_one(<4 x i8> %x) {
2732
; CHECK-NEXT: and %s1, %s1, (56)0
2833
; CHECK-NEXT: and %s2, %s2, (56)0
2934
; CHECK-NEXT: and %s3, %s3, (56)0
30-
; CHECK-NEXT: divu.w %s4, %s3, (56)0
31-
; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0
32-
; CHECK-NEXT: subs.w.sx %s3, %s3, %s4
33-
; CHECK-NEXT: divu.w %s4, %s2, (56)0
34-
; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0
35-
; CHECK-NEXT: subs.w.sx %s2, %s2, %s4
36-
; CHECK-NEXT: divu.w %s4, %s1, (56)0
37-
; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0
38-
; CHECK-NEXT: subs.w.sx %s1, %s1, %s4
39-
; CHECK-NEXT: divu.w %s4, %s0, (56)0
35+
; CHECK-NEXT: lea %s4, 16843010
36+
; CHECK-NEXT: muls.l %s5, %s3, %s4
37+
; CHECK-NEXT: srl %s5, %s5, 32
38+
; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
39+
; CHECK-NEXT: subs.w.sx %s3, %s3, %s5
40+
; CHECK-NEXT: muls.l %s5, %s2, %s4
41+
; CHECK-NEXT: srl %s5, %s5, 32
42+
; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
43+
; CHECK-NEXT: subs.w.sx %s2, %s2, %s5
44+
; CHECK-NEXT: muls.l %s5, %s1, %s4
45+
; CHECK-NEXT: srl %s5, %s5, 32
46+
; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
47+
; CHECK-NEXT: subs.w.sx %s1, %s1, %s5
48+
; CHECK-NEXT: muls.l %s4, %s0, %s4
49+
; CHECK-NEXT: srl %s4, %s4, 32
4050
; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0
4151
; CHECK-NEXT: subs.w.sx %s0, %s0, %s4
4252
; CHECK-NEXT: b.l.t (, %s10)

0 commit comments

Comments
 (0)