Skip to content

Commit db25f51

Browse files
committed
Revert "[DAGCombiner] Fold (mul (sra X, BW-1), Y) -> (neg (and (sra X, BW-1), Y))"
This reverts commit e8b3ffa. The AMDGPU/mad_64_32.ll seems to fail on some of the build bots but passes locally. I'm really confused.
1 parent ef72ff7 commit db25f51

File tree

13 files changed

+1735
-1925
lines changed

13 files changed

+1735
-1925
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3957,30 +3957,6 @@ SDValue DAGCombiner::visitMULFIX(SDNode *N) {
39573957
return SDValue();
39583958
}
39593959

3960-
// Fold (mul (sra X, BW-1), Y) -> (neg (and (sra X, BW-1), Y))
3961-
static SDValue foldSraMulToAndNeg(SDNode *N, SDValue N0, SDValue N1,
3962-
SelectionDAG &DAG) {
3963-
if (N0.getOpcode() != ISD::SRA)
3964-
return SDValue();
3965-
3966-
EVT VT = N->getValueType(0);
3967-
3968-
// TODO: Use computeNumSignBits() == BitWidth?
3969-
unsigned BitWidth = VT.getScalarSizeInBits();
3970-
ConstantSDNode *ShiftAmt = isConstOrConstSplat(N0.getOperand(1));
3971-
if (!ShiftAmt || ShiftAmt->getAPIntValue() != (BitWidth - 1))
3972-
return SDValue();
3973-
3974-
// If optimizing for minsize, we don't want to increase the number of
3975-
// instructions.
3976-
if (DAG.getMachineFunction().getFunction().hasMinSize())
3977-
return SDValue();
3978-
3979-
SDLoc dl(N);
3980-
SDValue And = DAG.getNode(ISD::AND, dl, VT, N0, N1);
3981-
return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), And);
3982-
}
3983-
39843960
SDValue DAGCombiner::visitMUL(SDNode *N) {
39853961
SDValue N0 = N->getOperand(0);
39863962
SDValue N1 = N->getOperand(1);
@@ -4191,11 +4167,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
41914167
}
41924168
}
41934169

4194-
if (SDValue V = foldSraMulToAndNeg(N, N0, N1, DAG))
4195-
return V;
4196-
if (SDValue V = foldSraMulToAndNeg(N, N1, N0, DAG))
4197-
return V;
4198-
41994170
// reassociate mul
42004171
if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
42014172
return RMUL;

llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,24 +39,21 @@ define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align
3939
; AARCH: // %bb.0: // %Entry
4040
; AARCH-NEXT: asr x9, x1, #63
4141
; AARCH-NEXT: asr x10, x3, #63
42-
; AARCH-NEXT: and x11, x9, x2
43-
; AARCH-NEXT: and x14, x10, x1
44-
; AARCH-NEXT: umulh x12, x2, x9
45-
; AARCH-NEXT: and x9, x9, x3
46-
; AARCH-NEXT: umulh x13, x10, x0
47-
; AARCH-NEXT: and x10, x10, x0
48-
; AARCH-NEXT: sub x12, x12, x11
49-
; AARCH-NEXT: neg x11, x11
50-
; AARCH-NEXT: sub x13, x13, x14
51-
; AARCH-NEXT: sub x9, x12, x9
52-
; AARCH-NEXT: sub x12, x13, x10
53-
; AARCH-NEXT: neg x10, x10
5442
; AARCH-NEXT: umulh x14, x0, x2
43+
; AARCH-NEXT: mov x8, x1
44+
; AARCH-NEXT: mul x11, x2, x9
45+
; AARCH-NEXT: str wzr, [x4]
46+
; AARCH-NEXT: umulh x12, x10, x0
47+
; AARCH-NEXT: umulh x13, x2, x9
48+
; AARCH-NEXT: madd x12, x10, x1, x12
49+
; AARCH-NEXT: add x13, x13, x11
50+
; AARCH-NEXT: mul x10, x10, x0
51+
; AARCH-NEXT: madd x9, x3, x9, x13
52+
; AARCH-NEXT: add x12, x12, x10
5553
; AARCH-NEXT: adds x10, x10, x11
5654
; AARCH-NEXT: mul x11, x1, x2
5755
; AARCH-NEXT: adc x9, x12, x9
5856
; AARCH-NEXT: umulh x13, x1, x2
59-
; AARCH-NEXT: mov x8, x1
6057
; AARCH-NEXT: mul x12, x0, x3
6158
; AARCH-NEXT: adds x11, x11, x14
6259
; AARCH-NEXT: umulh x14, x0, x3
@@ -76,7 +73,6 @@ define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align
7673
; AARCH-NEXT: eor x9, x9, x11
7774
; AARCH-NEXT: eor x10, x10, x11
7875
; AARCH-NEXT: orr x9, x10, x9
79-
; AARCH-NEXT: str wzr, [x4]
8076
; AARCH-NEXT: cmp x9, #0
8177
; AARCH-NEXT: cset w9, ne
8278
; AARCH-NEXT: tbz x8, #63, .LBB1_2

llvm/test/CodeGen/AMDGPU/mad_64_32.ll

Lines changed: 75 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -159,93 +159,85 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
159159
; CI: ; %bb.0:
160160
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161161
; CI-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v1, 0
162-
; CI-NEXT: v_ashrrev_i32_e32 v11, 31, v0
162+
; CI-NEXT: v_ashrrev_i32_e32 v13, 31, v0
163163
; CI-NEXT: v_mov_b32_e32 v8, 0
164-
; CI-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v11, v1, v[7:8]
165-
; CI-NEXT: v_ashrrev_i32_e32 v12, 31, v1
166-
; CI-NEXT: v_and_b32_e32 v14, v11, v1
167-
; CI-NEXT: v_mov_b32_e32 v1, v10
164+
; CI-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v1, v[7:8]
165+
; CI-NEXT: v_ashrrev_i32_e32 v14, 31, v1
166+
; CI-NEXT: v_mad_i64_i32 v[11:12], s[4:5], v1, v13, 0
167+
; CI-NEXT: v_mov_b32_e32 v7, v10
168168
; CI-NEXT: v_mov_b32_e32 v10, v8
169-
; CI-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v0, v12, v[9:10]
170-
; CI-NEXT: v_and_b32_e32 v13, v11, v12
171-
; CI-NEXT: v_sub_i32_e32 v9, vcc, 0, v14
172-
; CI-NEXT: v_subb_u32_e32 v10, vcc, 0, v13, vcc
173-
; CI-NEXT: v_mad_i64_i32 v[9:10], s[4:5], v12, v0, v[9:10]
174-
; CI-NEXT: v_mov_b32_e32 v0, v8
175-
; CI-NEXT: v_add_i32_e32 v0, vcc, v1, v0
176-
; CI-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, vcc
177-
; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v12, v[0:1]
178-
; CI-NEXT: v_add_i32_e32 v8, vcc, v0, v9
179-
; CI-NEXT: v_addc_u32_e32 v9, vcc, v1, v10, vcc
180-
; CI-NEXT: v_mov_b32_e32 v1, v7
169+
; CI-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v14, v[9:10]
170+
; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v14, v0, v[11:12]
171+
; CI-NEXT: v_add_i32_e32 v9, vcc, v7, v9
172+
; CI-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc
173+
; CI-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v14, v[9:10]
174+
; CI-NEXT: v_add_i32_e32 v7, vcc, v9, v0
175+
; CI-NEXT: v_addc_u32_e32 v9, vcc, v10, v1, vcc
176+
; CI-NEXT: v_mov_b32_e32 v1, v8
181177
; CI-NEXT: v_add_i32_e32 v0, vcc, v6, v2
182178
; CI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
183-
; CI-NEXT: v_addc_u32_e32 v2, vcc, v8, v4, vcc
179+
; CI-NEXT: v_addc_u32_e32 v2, vcc, v7, v4, vcc
184180
; CI-NEXT: v_addc_u32_e32 v3, vcc, v9, v5, vcc
185181
; CI-NEXT: s_setpc_b64 s[30:31]
186182
;
187183
; SI-LABEL: mad_i64_i32_sextops_i32_i128:
188184
; SI: ; %bb.0:
189185
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190186
; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v0
187+
; SI-NEXT: v_mul_lo_u32 v11, v6, v1
188+
; SI-NEXT: v_mul_hi_u32 v12, v0, v1
191189
; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v1
192-
; SI-NEXT: v_and_b32_e32 v9, v6, v1
193-
; SI-NEXT: v_and_b32_e32 v10, v7, v0
194-
; SI-NEXT: v_mul_lo_u32 v13, v6, v1
195-
; SI-NEXT: v_mul_hi_u32 v14, v0, v1
196-
; SI-NEXT: v_and_b32_e32 v8, v6, v7
197-
; SI-NEXT: v_add_i32_e32 v9, vcc, v10, v9
198-
; SI-NEXT: v_mul_hi_u32 v10, v6, v7
199-
; SI-NEXT: v_mul_i32_i24_e32 v11, v6, v7
200-
; SI-NEXT: v_mul_hi_u32 v6, v6, v1
201-
; SI-NEXT: v_mul_hi_u32 v12, v0, v7
202-
; SI-NEXT: v_mul_lo_u32 v7, v0, v7
203-
; SI-NEXT: v_addc_u32_e32 v8, vcc, v8, v8, vcc
204-
; SI-NEXT: v_add_i32_e32 v13, vcc, v13, v14
205-
; SI-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
206-
; SI-NEXT: v_add_i32_e32 v7, vcc, v7, v13
207-
; SI-NEXT: v_addc_u32_e32 v12, vcc, 0, v12, vcc
208-
; SI-NEXT: v_add_i32_e32 v6, vcc, v6, v12
209-
; SI-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc
210-
; SI-NEXT: v_add_i32_e32 v6, vcc, v11, v6
190+
; SI-NEXT: v_mul_hi_u32 v14, v6, v1
191+
; SI-NEXT: v_mul_lo_u32 v13, v0, v7
192+
; SI-NEXT: v_mul_hi_u32 v10, v0, v7
193+
; SI-NEXT: v_add_i32_e32 v12, vcc, v11, v12
194+
; SI-NEXT: v_addc_u32_e32 v14, vcc, 0, v14, vcc
195+
; SI-NEXT: v_mul_hi_u32 v8, v6, v7
196+
; SI-NEXT: v_add_i32_e32 v12, vcc, v13, v12
197+
; SI-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc
198+
; SI-NEXT: v_mul_i32_i24_e32 v9, v6, v7
199+
; SI-NEXT: v_add_i32_e32 v10, vcc, v14, v10
200+
; SI-NEXT: v_mul_hi_i32 v6, v1, v6
201+
; SI-NEXT: v_mul_hi_i32 v7, v7, v0
202+
; SI-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, vcc
203+
; SI-NEXT: v_add_i32_e32 v9, vcc, v9, v10
204+
; SI-NEXT: v_addc_u32_e32 v8, vcc, v8, v14, vcc
205+
; SI-NEXT: v_add_i32_e32 v10, vcc, v13, v11
211206
; SI-NEXT: v_mul_lo_u32 v0, v0, v1
212-
; SI-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc
213-
; SI-NEXT: v_sub_i32_e32 v6, vcc, v6, v9
214-
; SI-NEXT: v_subb_u32_e32 v8, vcc, v10, v8, vcc
207+
; SI-NEXT: v_addc_u32_e32 v6, vcc, v7, v6, vcc
208+
; SI-NEXT: v_add_i32_e32 v7, vcc, v9, v10
209+
; SI-NEXT: v_addc_u32_e32 v6, vcc, v8, v6, vcc
215210
; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v2
216-
; SI-NEXT: v_addc_u32_e32 v1, vcc, v7, v3, vcc
217-
; SI-NEXT: v_addc_u32_e32 v2, vcc, v6, v4, vcc
218-
; SI-NEXT: v_addc_u32_e32 v3, vcc, v8, v5, vcc
211+
; SI-NEXT: v_addc_u32_e32 v1, vcc, v12, v3, vcc
212+
; SI-NEXT: v_addc_u32_e32 v2, vcc, v7, v4, vcc
213+
; SI-NEXT: v_addc_u32_e32 v3, vcc, v6, v5, vcc
219214
; SI-NEXT: s_setpc_b64 s[30:31]
220215
;
221216
; GFX9-LABEL: mad_i64_i32_sextops_i32_i128:
222217
; GFX9: ; %bb.0:
223218
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224-
; GFX9-NEXT: v_ashrrev_i32_e32 v14, 31, v0
225-
; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v1, 0
226-
; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v1
227-
; GFX9-NEXT: v_and_b32_e32 v6, v14, v1
228-
; GFX9-NEXT: v_mov_b32_e32 v11, 0
229-
; GFX9-NEXT: v_mov_b32_e32 v10, v9
230-
; GFX9-NEXT: v_and_b32_e32 v7, v14, v15
231-
; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, 0, v6
232-
; GFX9-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v14, v1, v[10:11]
233-
; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, 0, v7, vcc
234-
; GFX9-NEXT: v_mov_b32_e32 v10, v13
235-
; GFX9-NEXT: v_mov_b32_e32 v13, v11
236-
; GFX9-NEXT: v_mad_i64_i32 v[6:7], s[4:5], v15, v0, v[6:7]
237-
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v15, v[12:13]
238-
; GFX9-NEXT: v_mov_b32_e32 v12, v1
239-
; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v10, v12
240-
; GFX9-NEXT: v_addc_co_u32_e64 v11, s[4:5], 0, 0, vcc
241-
; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v15, v[10:11]
242-
; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v10, v6
243-
; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v11, v7, vcc
244-
; GFX9-NEXT: v_mov_b32_e32 v1, v0
245-
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v8, v2
219+
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v1, 0
220+
; GFX9-NEXT: v_ashrrev_i32_e32 v13, 31, v0
221+
; GFX9-NEXT: v_mov_b32_e32 v9, 0
222+
; GFX9-NEXT: v_mov_b32_e32 v8, v7
223+
; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v1, v[8:9]
224+
; GFX9-NEXT: v_ashrrev_i32_e32 v14, 31, v1
225+
; GFX9-NEXT: v_mov_b32_e32 v8, v11
226+
; GFX9-NEXT: v_mov_b32_e32 v11, v9
227+
; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v0, v14, v[10:11]
228+
; GFX9-NEXT: v_mov_b32_e32 v12, v11
229+
; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v12
230+
; GFX9-NEXT: v_addc_co_u32_e64 v9, s[4:5], 0, 0, vcc
231+
; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v14, v[8:9]
232+
; GFX9-NEXT: v_mad_i64_i32 v[12:13], s[4:5], v1, v13, 0
233+
; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v14, v0, v[12:13]
234+
; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v8, v0
235+
; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v9, v1, vcc
236+
; GFX9-NEXT: v_mov_b32_e32 v1, v10
237+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v2
246238
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
247-
; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v4, vcc
248-
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v5, vcc
239+
; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v7, v4, vcc
240+
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v8, v5, vcc
249241
; GFX9-NEXT: s_setpc_b64 s[30:31]
250242
;
251243
; GFX11-LABEL: mad_i64_i32_sextops_i32_i128:
@@ -254,30 +246,27 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
254246
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
255247
; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0
256248
; GFX11-NEXT: v_mov_b32_e32 v8, 0
257-
; GFX11-NEXT: v_ashrrev_i32_e32 v16, 31, v0
258-
; GFX11-NEXT: v_ashrrev_i32_e32 v17, 31, v1
249+
; GFX11-NEXT: v_ashrrev_i32_e32 v14, 31, v0
250+
; GFX11-NEXT: v_ashrrev_i32_e32 v15, 31, v1
259251
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
260-
; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v16, v1, v[7:8]
252+
; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v14, v1, v[7:8]
261253
; GFX11-NEXT: v_dual_mov_b32 v7, v10 :: v_dual_mov_b32 v10, v8
262-
; GFX11-NEXT: v_and_b32_e32 v8, v16, v1
263-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
264-
; GFX11-NEXT: v_mad_u64_u32 v[11:12], null, v0, v17, v[9:10]
265-
; GFX11-NEXT: v_and_b32_e32 v9, v16, v17
266-
; GFX11-NEXT: v_sub_co_u32 v8, vcc_lo, 0, v8
267-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
268-
; GFX11-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, 0, v9, vcc_lo
269-
; GFX11-NEXT: v_mov_b32_e32 v1, v12
254+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
255+
; GFX11-NEXT: v_mad_u64_u32 v[11:12], null, v0, v15, v[9:10]
256+
; GFX11-NEXT: v_mad_i64_i32 v[9:10], null, v1, v14, 0
257+
; GFX11-NEXT: v_mov_b32_e32 v8, v12
270258
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
271-
; GFX11-NEXT: v_mad_i64_i32 v[14:15], null, v17, v0, v[8:9]
272-
; GFX11-NEXT: v_add_co_u32 v12, s0, v7, v1
273-
; GFX11-NEXT: v_mov_b32_e32 v7, v11
274-
; GFX11-NEXT: v_add_co_ci_u32_e64 v13, null, 0, 0, s0
259+
; GFX11-NEXT: v_mad_i64_i32 v[12:13], null, v15, v0, v[9:10]
260+
; GFX11-NEXT: v_add_co_u32 v7, s0, v7, v8
275261
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
276-
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v16, v17, v[12:13]
277-
; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v0, v14
278-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
279-
; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v15, vcc_lo
262+
; GFX11-NEXT: v_add_co_ci_u32_e64 v8, null, 0, 0, s0
263+
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v14, v15, v[7:8]
264+
; GFX11-NEXT: v_mov_b32_e32 v7, v11
265+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
266+
; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v0, v12
267+
; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v13, vcc_lo
280268
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2
269+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
281270
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo
282271
; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo
283272
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)

llvm/test/CodeGen/PowerPC/pr45448.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ define hidden void @julia_tryparse_internal_45896() #0 {
2525
; CHECK-NEXT: rldic r5, r5, 4, 32
2626
; CHECK-NEXT: crnot 4*cr5+lt, eq
2727
; CHECK-NEXT: mulhdu r3, r3, r5
28-
; CHECK-NEXT: and r6, r4, r5
29-
; CHECK-NEXT: sub r6, r3, r6
28+
; CHECK-NEXT: maddld r6, r4, r5, r3
3029
; CHECK-NEXT: cmpld cr1, r6, r3
3130
; CHECK-NEXT: mulhdu. r3, r4, r5
3231
; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10

llvm/test/CodeGen/RISCV/mul.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,18 +1480,18 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
14801480
; RV32IM-NEXT: add a5, a6, a2
14811481
; RV32IM-NEXT: mul a7, a1, a3
14821482
; RV32IM-NEXT: add t0, a7, a5
1483-
; RV32IM-NEXT: and t1, a4, a0
1484-
; RV32IM-NEXT: sub a2, t0, t1
1483+
; RV32IM-NEXT: mul t1, a4, a0
1484+
; RV32IM-NEXT: add a2, t0, t1
14851485
; RV32IM-NEXT: sltu t2, a2, t0
14861486
; RV32IM-NEXT: sltu a7, t0, a7
14871487
; RV32IM-NEXT: sltu a5, a5, a6
14881488
; RV32IM-NEXT: mulhu a3, a1, a3
14891489
; RV32IM-NEXT: add a3, a3, a5
14901490
; RV32IM-NEXT: add a3, a3, a7
1491-
; RV32IM-NEXT: and a1, a4, a1
1491+
; RV32IM-NEXT: mul a1, a4, a1
14921492
; RV32IM-NEXT: mulhu a0, a4, a0
1493-
; RV32IM-NEXT: sub a0, a0, a1
1494-
; RV32IM-NEXT: sub a0, a0, t1
1493+
; RV32IM-NEXT: add a0, a0, a1
1494+
; RV32IM-NEXT: add a0, a0, t1
14951495
; RV32IM-NEXT: add a0, a3, a0
14961496
; RV32IM-NEXT: add a1, a0, t2
14971497
; RV32IM-NEXT: mv a0, a2

0 commit comments

Comments
 (0)