Skip to content

Commit e8b3ffa

Browse files
committed
[DAGCombiner] Fold (mul (sra X, BW-1), Y) -> (neg (and (sra X, BW-1), Y))
(sra X, BW-1) is either 0 or -1. So the multiply is a conditional negate of Y. This pattern shows up when type legalizing wide multiplies involving a sign extended value. Fixes PR57549. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D133399
1 parent c9447c6 commit e8b3ffa

File tree

13 files changed

+1924
-1734
lines changed

13 files changed

+1924
-1734
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3957,6 +3957,30 @@ SDValue DAGCombiner::visitMULFIX(SDNode *N) {
39573957
return SDValue();
39583958
}
39593959

3960+
// Fold (mul (sra X, BW-1), Y) -> (neg (and (sra X, BW-1), Y))
3961+
static SDValue foldSraMulToAndNeg(SDNode *N, SDValue N0, SDValue N1,
3962+
SelectionDAG &DAG) {
3963+
if (N0.getOpcode() != ISD::SRA)
3964+
return SDValue();
3965+
3966+
EVT VT = N->getValueType(0);
3967+
3968+
// TODO: Use computeNumSignBits() == BitWidth?
3969+
unsigned BitWidth = VT.getScalarSizeInBits();
3970+
ConstantSDNode *ShiftAmt = isConstOrConstSplat(N0.getOperand(1));
3971+
if (!ShiftAmt || ShiftAmt->getAPIntValue() != (BitWidth - 1))
3972+
return SDValue();
3973+
3974+
// If optimizing for minsize, we don't want to increase the number of
3975+
// instructions.
3976+
if (DAG.getMachineFunction().getFunction().hasMinSize())
3977+
return SDValue();
3978+
3979+
SDLoc dl(N);
3980+
SDValue And = DAG.getNode(ISD::AND, dl, VT, N0, N1);
3981+
return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), And);
3982+
}
3983+
39603984
SDValue DAGCombiner::visitMUL(SDNode *N) {
39613985
SDValue N0 = N->getOperand(0);
39623986
SDValue N1 = N->getOperand(1);
@@ -4167,6 +4191,11 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
41674191
}
41684192
}
41694193

4194+
if (SDValue V = foldSraMulToAndNeg(N, N0, N1, DAG))
4195+
return V;
4196+
if (SDValue V = foldSraMulToAndNeg(N, N1, N0, DAG))
4197+
return V;
4198+
41704199
// reassociate mul
41714200
if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
41724201
return RMUL;

llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,21 +39,24 @@ define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align
3939
; AARCH: // %bb.0: // %Entry
4040
; AARCH-NEXT: asr x9, x1, #63
4141
; AARCH-NEXT: asr x10, x3, #63
42+
; AARCH-NEXT: and x11, x9, x2
43+
; AARCH-NEXT: and x14, x10, x1
44+
; AARCH-NEXT: umulh x12, x2, x9
45+
; AARCH-NEXT: and x9, x9, x3
46+
; AARCH-NEXT: umulh x13, x10, x0
47+
; AARCH-NEXT: and x10, x10, x0
48+
; AARCH-NEXT: sub x12, x12, x11
49+
; AARCH-NEXT: neg x11, x11
50+
; AARCH-NEXT: sub x13, x13, x14
51+
; AARCH-NEXT: sub x9, x12, x9
52+
; AARCH-NEXT: sub x12, x13, x10
53+
; AARCH-NEXT: neg x10, x10
4254
; AARCH-NEXT: umulh x14, x0, x2
43-
; AARCH-NEXT: mov x8, x1
44-
; AARCH-NEXT: mul x11, x2, x9
45-
; AARCH-NEXT: str wzr, [x4]
46-
; AARCH-NEXT: umulh x12, x10, x0
47-
; AARCH-NEXT: umulh x13, x2, x9
48-
; AARCH-NEXT: madd x12, x10, x1, x12
49-
; AARCH-NEXT: add x13, x13, x11
50-
; AARCH-NEXT: mul x10, x10, x0
51-
; AARCH-NEXT: madd x9, x3, x9, x13
52-
; AARCH-NEXT: add x12, x12, x10
5355
; AARCH-NEXT: adds x10, x10, x11
5456
; AARCH-NEXT: mul x11, x1, x2
5557
; AARCH-NEXT: adc x9, x12, x9
5658
; AARCH-NEXT: umulh x13, x1, x2
59+
; AARCH-NEXT: mov x8, x1
5760
; AARCH-NEXT: mul x12, x0, x3
5861
; AARCH-NEXT: adds x11, x11, x14
5962
; AARCH-NEXT: umulh x14, x0, x3
@@ -73,6 +76,7 @@ define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align
7376
; AARCH-NEXT: eor x9, x9, x11
7477
; AARCH-NEXT: eor x10, x10, x11
7578
; AARCH-NEXT: orr x9, x10, x9
79+
; AARCH-NEXT: str wzr, [x4]
7680
; AARCH-NEXT: cmp x9, #0
7781
; AARCH-NEXT: cset w9, ne
7882
; AARCH-NEXT: tbz x8, #63, .LBB1_2

llvm/test/CodeGen/AMDGPU/mad_64_32.ll

Lines changed: 86 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -159,85 +159,93 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
159159
; CI: ; %bb.0:
160160
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161161
; CI-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v1, 0
162-
; CI-NEXT: v_ashrrev_i32_e32 v13, 31, v0
162+
; CI-NEXT: v_ashrrev_i32_e32 v11, 31, v0
163163
; CI-NEXT: v_mov_b32_e32 v8, 0
164-
; CI-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v1, v[7:8]
165-
; CI-NEXT: v_ashrrev_i32_e32 v14, 31, v1
166-
; CI-NEXT: v_mad_i64_i32 v[11:12], s[4:5], v1, v13, 0
167-
; CI-NEXT: v_mov_b32_e32 v7, v10
164+
; CI-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v11, v1, v[7:8]
165+
; CI-NEXT: v_ashrrev_i32_e32 v12, 31, v1
166+
; CI-NEXT: v_and_b32_e32 v14, v11, v1
167+
; CI-NEXT: v_mov_b32_e32 v1, v10
168168
; CI-NEXT: v_mov_b32_e32 v10, v8
169-
; CI-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v14, v[9:10]
170-
; CI-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v14, v0, v[11:12]
171-
; CI-NEXT: v_add_i32_e32 v9, vcc, v7, v9
172-
; CI-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc
173-
; CI-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v14, v[9:10]
174-
; CI-NEXT: v_add_i32_e32 v7, vcc, v9, v0
175-
; CI-NEXT: v_addc_u32_e32 v9, vcc, v10, v1, vcc
176-
; CI-NEXT: v_mov_b32_e32 v1, v8
169+
; CI-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v0, v12, v[9:10]
170+
; CI-NEXT: v_and_b32_e32 v13, v11, v12
171+
; CI-NEXT: v_sub_i32_e32 v9, vcc, 0, v14
172+
; CI-NEXT: v_subb_u32_e32 v10, vcc, 0, v13, vcc
173+
; CI-NEXT: v_mad_i64_i32 v[9:10], s[4:5], v12, v0, v[9:10]
174+
; CI-NEXT: v_mov_b32_e32 v0, v8
175+
; CI-NEXT: v_add_i32_e32 v0, vcc, v1, v0
176+
; CI-NEXT: v_addc_u32_e64 v1, s[4:5], 0, 0, vcc
177+
; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v12, v[0:1]
178+
; CI-NEXT: v_add_i32_e32 v8, vcc, v0, v9
179+
; CI-NEXT: v_addc_u32_e32 v9, vcc, v1, v10, vcc
180+
; CI-NEXT: v_mov_b32_e32 v1, v7
177181
; CI-NEXT: v_add_i32_e32 v0, vcc, v6, v2
178182
; CI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
179-
; CI-NEXT: v_addc_u32_e32 v2, vcc, v7, v4, vcc
183+
; CI-NEXT: v_addc_u32_e32 v2, vcc, v8, v4, vcc
180184
; CI-NEXT: v_addc_u32_e32 v3, vcc, v9, v5, vcc
181185
; CI-NEXT: s_setpc_b64 s[30:31]
182186
;
183187
; SI-LABEL: mad_i64_i32_sextops_i32_i128:
184188
; SI: ; %bb.0:
185189
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186190
; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v0
187-
; SI-NEXT: v_mul_lo_u32 v11, v6, v1
188-
; SI-NEXT: v_mul_hi_u32 v12, v0, v1
189191
; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v1
190-
; SI-NEXT: v_mul_hi_u32 v14, v6, v1
191-
; SI-NEXT: v_mul_lo_u32 v13, v0, v7
192-
; SI-NEXT: v_mul_hi_u32 v10, v0, v7
193-
; SI-NEXT: v_add_i32_e32 v12, vcc, v11, v12
194-
; SI-NEXT: v_addc_u32_e32 v14, vcc, 0, v14, vcc
195-
; SI-NEXT: v_mul_hi_u32 v8, v6, v7
196-
; SI-NEXT: v_add_i32_e32 v12, vcc, v13, v12
197-
; SI-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc
198-
; SI-NEXT: v_mul_i32_i24_e32 v9, v6, v7
199-
; SI-NEXT: v_add_i32_e32 v10, vcc, v14, v10
200-
; SI-NEXT: v_mul_hi_i32 v6, v1, v6
201-
; SI-NEXT: v_mul_hi_i32 v7, v7, v0
202-
; SI-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, vcc
203-
; SI-NEXT: v_add_i32_e32 v9, vcc, v9, v10
204-
; SI-NEXT: v_addc_u32_e32 v8, vcc, v8, v14, vcc
205-
; SI-NEXT: v_add_i32_e32 v10, vcc, v13, v11
192+
; SI-NEXT: v_and_b32_e32 v9, v6, v1
193+
; SI-NEXT: v_and_b32_e32 v10, v7, v0
194+
; SI-NEXT: v_mul_lo_u32 v13, v6, v1
195+
; SI-NEXT: v_mul_hi_u32 v14, v0, v1
196+
; SI-NEXT: v_and_b32_e32 v8, v6, v7
197+
; SI-NEXT: v_add_i32_e32 v9, vcc, v10, v9
198+
; SI-NEXT: v_mul_hi_u32 v10, v6, v7
199+
; SI-NEXT: v_mul_i32_i24_e32 v11, v6, v7
200+
; SI-NEXT: v_mul_hi_u32 v6, v6, v1
201+
; SI-NEXT: v_mul_hi_u32 v12, v0, v7
202+
; SI-NEXT: v_mul_lo_u32 v7, v0, v7
203+
; SI-NEXT: v_addc_u32_e32 v8, vcc, v8, v8, vcc
204+
; SI-NEXT: v_add_i32_e32 v13, vcc, v13, v14
205+
; SI-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
206+
; SI-NEXT: v_add_i32_e32 v7, vcc, v7, v13
207+
; SI-NEXT: v_addc_u32_e32 v12, vcc, 0, v12, vcc
208+
; SI-NEXT: v_add_i32_e32 v6, vcc, v6, v12
209+
; SI-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, vcc
210+
; SI-NEXT: v_add_i32_e32 v6, vcc, v11, v6
206211
; SI-NEXT: v_mul_lo_u32 v0, v0, v1
207-
; SI-NEXT: v_addc_u32_e32 v6, vcc, v7, v6, vcc
208-
; SI-NEXT: v_add_i32_e32 v7, vcc, v9, v10
209-
; SI-NEXT: v_addc_u32_e32 v6, vcc, v8, v6, vcc
212+
; SI-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc
213+
; SI-NEXT: v_sub_i32_e32 v6, vcc, v6, v9
214+
; SI-NEXT: v_subb_u32_e32 v8, vcc, v10, v8, vcc
210215
; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v2
211-
; SI-NEXT: v_addc_u32_e32 v1, vcc, v12, v3, vcc
212-
; SI-NEXT: v_addc_u32_e32 v2, vcc, v7, v4, vcc
213-
; SI-NEXT: v_addc_u32_e32 v3, vcc, v6, v5, vcc
216+
; SI-NEXT: v_addc_u32_e32 v1, vcc, v7, v3, vcc
217+
; SI-NEXT: v_addc_u32_e32 v2, vcc, v6, v4, vcc
218+
; SI-NEXT: v_addc_u32_e32 v3, vcc, v8, v5, vcc
214219
; SI-NEXT: s_setpc_b64 s[30:31]
215220
;
216221
; GFX9-LABEL: mad_i64_i32_sextops_i32_i128:
217222
; GFX9: ; %bb.0:
218223
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219-
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v1, 0
220-
; GFX9-NEXT: v_ashrrev_i32_e32 v13, 31, v0
221-
; GFX9-NEXT: v_mov_b32_e32 v9, 0
222-
; GFX9-NEXT: v_mov_b32_e32 v8, v7
223-
; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v1, v[8:9]
224-
; GFX9-NEXT: v_ashrrev_i32_e32 v14, 31, v1
225-
; GFX9-NEXT: v_mov_b32_e32 v8, v11
226-
; GFX9-NEXT: v_mov_b32_e32 v11, v9
227-
; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v0, v14, v[10:11]
228-
; GFX9-NEXT: v_mov_b32_e32 v12, v11
229-
; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v12
230-
; GFX9-NEXT: v_addc_co_u32_e64 v9, s[4:5], 0, 0, vcc
231-
; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v14, v[8:9]
232-
; GFX9-NEXT: v_mad_i64_i32 v[12:13], s[4:5], v1, v13, 0
233-
; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v14, v0, v[12:13]
234-
; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v8, v0
235-
; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v9, v1, vcc
236-
; GFX9-NEXT: v_mov_b32_e32 v1, v10
237-
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v2
224+
; GFX9-NEXT: v_ashrrev_i32_e32 v14, 31, v0
225+
; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v1, 0
226+
; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v1
227+
; GFX9-NEXT: v_and_b32_e32 v6, v14, v1
228+
; GFX9-NEXT: v_mov_b32_e32 v11, 0
229+
; GFX9-NEXT: v_mov_b32_e32 v10, v9
230+
; GFX9-NEXT: v_and_b32_e32 v7, v14, v15
231+
; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, 0, v6
232+
; GFX9-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v14, v1, v[10:11]
233+
; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, 0, v7, vcc
234+
; GFX9-NEXT: v_mov_b32_e32 v10, v13
235+
; GFX9-NEXT: v_mov_b32_e32 v13, v11
236+
; GFX9-NEXT: v_mad_i64_i32 v[6:7], s[4:5], v15, v0, v[6:7]
237+
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v15, v[12:13]
238+
; GFX9-NEXT: v_mov_b32_e32 v12, v1
239+
; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v10, v12
240+
; GFX9-NEXT: v_addc_co_u32_e64 v11, s[4:5], 0, 0, vcc
241+
; GFX9-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v15, v[10:11]
242+
; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v10, v6
243+
; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v11, v7, vcc
244+
; GFX9-NEXT: v_mov_b32_e32 v1, v0
245+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v8, v2
238246
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
239-
; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v7, v4, vcc
240-
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v8, v5, vcc
247+
; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v4, vcc
248+
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v5, vcc
241249
; GFX9-NEXT: s_setpc_b64 s[30:31]
242250
;
243251
; GFX11-LABEL: mad_i64_i32_sextops_i32_i128:
@@ -246,27 +254,30 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
246254
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
247255
; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0
248256
; GFX11-NEXT: v_mov_b32_e32 v8, 0
249-
; GFX11-NEXT: v_ashrrev_i32_e32 v14, 31, v0
250-
; GFX11-NEXT: v_ashrrev_i32_e32 v15, 31, v1
257+
; GFX11-NEXT: v_ashrrev_i32_e32 v16, 31, v0
258+
; GFX11-NEXT: v_ashrrev_i32_e32 v17, 31, v1
251259
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
252-
; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v14, v1, v[7:8]
260+
; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v16, v1, v[7:8]
253261
; GFX11-NEXT: v_dual_mov_b32 v7, v10 :: v_dual_mov_b32 v10, v8
254-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
255-
; GFX11-NEXT: v_mad_u64_u32 v[11:12], null, v0, v15, v[9:10]
256-
; GFX11-NEXT: v_mad_i64_i32 v[9:10], null, v1, v14, 0
257-
; GFX11-NEXT: v_mov_b32_e32 v8, v12
262+
; GFX11-NEXT: v_and_b32_e32 v8, v16, v1
263+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
264+
; GFX11-NEXT: v_mad_u64_u32 v[11:12], null, v0, v17, v[9:10]
265+
; GFX11-NEXT: v_and_b32_e32 v9, v16, v17
266+
; GFX11-NEXT: v_sub_co_u32 v8, vcc_lo, 0, v8
267+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
268+
; GFX11-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, 0, v9, vcc_lo
269+
; GFX11-NEXT: v_mov_b32_e32 v1, v12
258270
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
259-
; GFX11-NEXT: v_mad_i64_i32 v[12:13], null, v15, v0, v[9:10]
260-
; GFX11-NEXT: v_add_co_u32 v7, s0, v7, v8
261-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
262-
; GFX11-NEXT: v_add_co_ci_u32_e64 v8, null, 0, 0, s0
263-
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v14, v15, v[7:8]
271+
; GFX11-NEXT: v_mad_i64_i32 v[14:15], null, v17, v0, v[8:9]
272+
; GFX11-NEXT: v_add_co_u32 v12, s0, v7, v1
264273
; GFX11-NEXT: v_mov_b32_e32 v7, v11
265-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
266-
; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v0, v12
267-
; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v13, vcc_lo
274+
; GFX11-NEXT: v_add_co_ci_u32_e64 v13, null, 0, 0, s0
275+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
276+
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v16, v17, v[12:13]
277+
; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v0, v14
278+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
279+
; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v15, vcc_lo
268280
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2
269-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
270281
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo
271282
; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo
272283
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)

llvm/test/CodeGen/PowerPC/pr45448.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ define hidden void @julia_tryparse_internal_45896() #0 {
2525
; CHECK-NEXT: rldic r5, r5, 4, 32
2626
; CHECK-NEXT: crnot 4*cr5+lt, eq
2727
; CHECK-NEXT: mulhdu r3, r3, r5
28-
; CHECK-NEXT: maddld r6, r4, r5, r3
28+
; CHECK-NEXT: and r6, r4, r5
29+
; CHECK-NEXT: sub r6, r3, r6
2930
; CHECK-NEXT: cmpld cr1, r6, r3
3031
; CHECK-NEXT: mulhdu. r3, r4, r5
3132
; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10

llvm/test/CodeGen/RISCV/mul.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,18 +1480,18 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
14801480
; RV32IM-NEXT: add a5, a6, a2
14811481
; RV32IM-NEXT: mul a7, a1, a3
14821482
; RV32IM-NEXT: add t0, a7, a5
1483-
; RV32IM-NEXT: mul t1, a4, a0
1484-
; RV32IM-NEXT: add a2, t0, t1
1483+
; RV32IM-NEXT: and t1, a4, a0
1484+
; RV32IM-NEXT: sub a2, t0, t1
14851485
; RV32IM-NEXT: sltu t2, a2, t0
14861486
; RV32IM-NEXT: sltu a7, t0, a7
14871487
; RV32IM-NEXT: sltu a5, a5, a6
14881488
; RV32IM-NEXT: mulhu a3, a1, a3
14891489
; RV32IM-NEXT: add a3, a3, a5
14901490
; RV32IM-NEXT: add a3, a3, a7
1491-
; RV32IM-NEXT: mul a1, a4, a1
1491+
; RV32IM-NEXT: and a1, a4, a1
14921492
; RV32IM-NEXT: mulhu a0, a4, a0
1493-
; RV32IM-NEXT: add a0, a0, a1
1494-
; RV32IM-NEXT: add a0, a0, t1
1493+
; RV32IM-NEXT: sub a0, a0, a1
1494+
; RV32IM-NEXT: sub a0, a0, t1
14951495
; RV32IM-NEXT: add a0, a3, a0
14961496
; RV32IM-NEXT: add a1, a0, t2
14971497
; RV32IM-NEXT: mv a0, a2

0 commit comments

Comments
 (0)