Skip to content

Commit a39eadc

Browse files
jacquesguanbenshi001
authored andcommitted
[DAGCombiner] Teach combineShiftToMULH to handle constant and const splat vector.
Fold (srl (mul (zext i32:$a to i64), i64:c), 32) -> (mulhu $a, $b), if c can truncate to i32 without loss. Reviewed By: frasercrmck, craig.topper, RKSimon Differential Revision: https://reviews.llvm.org/D108129
1 parent 4ec1b8e commit a39eadc

File tree

3 files changed

+164
-207
lines changed

3 files changed

+164
-207
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8552,25 +8552,42 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
85528552
// Both operands must be equivalent extend nodes.
85538553
SDValue LeftOp = ShiftOperand.getOperand(0);
85548554
SDValue RightOp = ShiftOperand.getOperand(1);
8555+
85558556
bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
85568557
bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
85578558

8558-
if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8559+
if (!IsSignExt && !IsZeroExt)
85598560
return SDValue();
85608561

8562+
EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8563+
unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8564+
8565+
SDValue MulhRightOp;
8566+
if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
8567+
unsigned ActiveBits = IsSignExt
8568+
? Constant->getAPIntValue().getMinSignedBits()
8569+
: Constant->getAPIntValue().getActiveBits();
8570+
if (ActiveBits > NarrowVTSize)
8571+
return SDValue();
8572+
MulhRightOp = DAG.getConstant(
8573+
Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
8574+
NarrowVT);
8575+
} else {
8576+
if (LeftOp.getOpcode() != RightOp.getOpcode())
8577+
return SDValue();
8578+
// Check that the two extend nodes are the same type.
8579+
if (NarrowVT != RightOp.getOperand(0).getValueType())
8580+
return SDValue();
8581+
MulhRightOp = RightOp.getOperand(0);
8582+
}
8583+
85618584
EVT WideVT = LeftOp.getValueType();
85628585
// Proceed with the transformation if the wide types match.
85638586
assert((WideVT == RightOp.getValueType()) &&
85648587
"Cannot have a multiply node with two different operand types.");
85658588

8566-
EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8567-
// Check that the two extend nodes are the same type.
8568-
if (NarrowVT != RightOp.getOperand(0).getValueType())
8569-
return SDValue();
8570-
85718589
// Proceed with the transformation if the wide type is twice as large
85728590
// as the narrow type.
8573-
unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
85748591
if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
85758592
return SDValue();
85768593

@@ -8589,8 +8606,8 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
85898606
if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
85908607
return SDValue();
85918608

8592-
SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8593-
RightOp.getOperand(0));
8609+
SDValue Result =
8610+
DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
85948611
return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
85958612
: DAG.getZExtOrTrunc(Result, DL, WideVT));
85968613
}

llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll

Lines changed: 74 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
3-
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
2+
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3+
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
44

55
; Test that the prepareSREMEqFold optimization doesn't crash on scalable
66
; vector types.
@@ -60,17 +60,21 @@ define <vscale x 1 x i32> @vmulh_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %x) {
6060
}
6161

6262
define <vscale x 1 x i32> @vmulh_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
63-
; CHECK-LABEL: vmulh_vi_nxv1i32_0:
64-
; CHECK: # %bb.0:
65-
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
66-
; CHECK-NEXT: vsext.vf2 v9, v8
67-
; CHECK-NEXT: addi a0, zero, -7
68-
; CHECK-NEXT: vmul.vx v8, v9, a0
69-
; CHECK-NEXT: addi a0, zero, 32
70-
; CHECK-NEXT: vsrl.vx v8, v8, a0
71-
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
72-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
73-
; CHECK-NEXT: ret
63+
; RV32-LABEL: vmulh_vi_nxv1i32_0:
64+
; RV32: # %bb.0:
65+
; RV32-NEXT: addi a0, zero, -7
66+
; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, mu
67+
; RV32-NEXT: vmulh.vx v8, v8, a0
68+
; RV32-NEXT: ret
69+
;
70+
; RV64-LABEL: vmulh_vi_nxv1i32_0:
71+
; RV64: # %bb.0:
72+
; RV64-NEXT: addi a0, zero, 1
73+
; RV64-NEXT: slli a0, a0, 32
74+
; RV64-NEXT: addi a0, a0, -7
75+
; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, mu
76+
; RV64-NEXT: vmulh.vx v8, v8, a0
77+
; RV64-NEXT: ret
7478
%head1 = insertelement <vscale x 1 x i32> undef, i32 -7, i32 0
7579
%splat1 = shufflevector <vscale x 1 x i32> %head1, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
7680
%vb = sext <vscale x 1 x i32> %splat1 to <vscale x 1 x i64>
@@ -86,13 +90,9 @@ define <vscale x 1 x i32> @vmulh_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
8690
define <vscale x 1 x i32> @vmulh_vi_nxv1i32_1(<vscale x 1 x i32> %va) {
8791
; CHECK-LABEL: vmulh_vi_nxv1i32_1:
8892
; CHECK: # %bb.0:
89-
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
90-
; CHECK-NEXT: vsext.vf2 v9, v8
91-
; CHECK-NEXT: vsll.vi v8, v9, 4
92-
; CHECK-NEXT: addi a0, zero, 32
93-
; CHECK-NEXT: vsrl.vx v8, v8, a0
94-
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
95-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
93+
; CHECK-NEXT: addi a0, zero, 16
94+
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu
95+
; CHECK-NEXT: vmulh.vx v8, v8, a0
9696
; CHECK-NEXT: ret
9797
%head1 = insertelement <vscale x 1 x i32> undef, i32 16, i32 0
9898
%splat1 = shufflevector <vscale x 1 x i32> %head1, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
@@ -141,17 +141,21 @@ define <vscale x 2 x i32> @vmulh_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %x) {
141141
}
142142

143143
define <vscale x 2 x i32> @vmulh_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
144-
; CHECK-LABEL: vmulh_vi_nxv2i32_0:
145-
; CHECK: # %bb.0:
146-
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
147-
; CHECK-NEXT: vsext.vf2 v10, v8
148-
; CHECK-NEXT: addi a0, zero, -7
149-
; CHECK-NEXT: vmul.vx v8, v10, a0
150-
; CHECK-NEXT: addi a0, zero, 32
151-
; CHECK-NEXT: vsrl.vx v10, v8, a0
152-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
153-
; CHECK-NEXT: vnsrl.wi v8, v10, 0
154-
; CHECK-NEXT: ret
144+
; RV32-LABEL: vmulh_vi_nxv2i32_0:
145+
; RV32: # %bb.0:
146+
; RV32-NEXT: addi a0, zero, -7
147+
; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, mu
148+
; RV32-NEXT: vmulh.vx v8, v8, a0
149+
; RV32-NEXT: ret
150+
;
151+
; RV64-LABEL: vmulh_vi_nxv2i32_0:
152+
; RV64: # %bb.0:
153+
; RV64-NEXT: addi a0, zero, 1
154+
; RV64-NEXT: slli a0, a0, 32
155+
; RV64-NEXT: addi a0, a0, -7
156+
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, mu
157+
; RV64-NEXT: vmulh.vx v8, v8, a0
158+
; RV64-NEXT: ret
155159
%head1 = insertelement <vscale x 2 x i32> undef, i32 -7, i32 0
156160
%splat1 = shufflevector <vscale x 2 x i32> %head1, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
157161
%vb = sext <vscale x 2 x i32> %splat1 to <vscale x 2 x i64>
@@ -167,13 +171,9 @@ define <vscale x 2 x i32> @vmulh_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
167171
define <vscale x 2 x i32> @vmulh_vi_nxv2i32_1(<vscale x 2 x i32> %va) {
168172
; CHECK-LABEL: vmulh_vi_nxv2i32_1:
169173
; CHECK: # %bb.0:
170-
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
171-
; CHECK-NEXT: vsext.vf2 v10, v8
172-
; CHECK-NEXT: vsll.vi v8, v10, 4
173-
; CHECK-NEXT: addi a0, zero, 32
174-
; CHECK-NEXT: vsrl.vx v10, v8, a0
175-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
176-
; CHECK-NEXT: vnsrl.wi v8, v10, 0
174+
; CHECK-NEXT: addi a0, zero, 16
175+
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu
176+
; CHECK-NEXT: vmulh.vx v8, v8, a0
177177
; CHECK-NEXT: ret
178178
%head1 = insertelement <vscale x 2 x i32> undef, i32 16, i32 0
179179
%splat1 = shufflevector <vscale x 2 x i32> %head1, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -222,17 +222,21 @@ define <vscale x 4 x i32> @vmulh_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %x) {
222222
}
223223

224224
define <vscale x 4 x i32> @vmulh_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
225-
; CHECK-LABEL: vmulh_vi_nxv4i32_0:
226-
; CHECK: # %bb.0:
227-
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu
228-
; CHECK-NEXT: vsext.vf2 v12, v8
229-
; CHECK-NEXT: addi a0, zero, -7
230-
; CHECK-NEXT: vmul.vx v8, v12, a0
231-
; CHECK-NEXT: addi a0, zero, 32
232-
; CHECK-NEXT: vsrl.vx v12, v8, a0
233-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
234-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
235-
; CHECK-NEXT: ret
225+
; RV32-LABEL: vmulh_vi_nxv4i32_0:
226+
; RV32: # %bb.0:
227+
; RV32-NEXT: addi a0, zero, -7
228+
; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, mu
229+
; RV32-NEXT: vmulh.vx v8, v8, a0
230+
; RV32-NEXT: ret
231+
;
232+
; RV64-LABEL: vmulh_vi_nxv4i32_0:
233+
; RV64: # %bb.0:
234+
; RV64-NEXT: addi a0, zero, 1
235+
; RV64-NEXT: slli a0, a0, 32
236+
; RV64-NEXT: addi a0, a0, -7
237+
; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, mu
238+
; RV64-NEXT: vmulh.vx v8, v8, a0
239+
; RV64-NEXT: ret
236240
%head1 = insertelement <vscale x 4 x i32> undef, i32 -7, i32 0
237241
%splat1 = shufflevector <vscale x 4 x i32> %head1, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
238242
%vb = sext <vscale x 4 x i32> %splat1 to <vscale x 4 x i64>
@@ -248,13 +252,9 @@ define <vscale x 4 x i32> @vmulh_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
248252
define <vscale x 4 x i32> @vmulh_vi_nxv4i32_1(<vscale x 4 x i32> %va) {
249253
; CHECK-LABEL: vmulh_vi_nxv4i32_1:
250254
; CHECK: # %bb.0:
251-
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu
252-
; CHECK-NEXT: vsext.vf2 v12, v8
253-
; CHECK-NEXT: vsll.vi v8, v12, 4
254-
; CHECK-NEXT: addi a0, zero, 32
255-
; CHECK-NEXT: vsrl.vx v12, v8, a0
256-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
257-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
255+
; CHECK-NEXT: addi a0, zero, 16
256+
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu
257+
; CHECK-NEXT: vmulh.vx v8, v8, a0
258258
; CHECK-NEXT: ret
259259
%head1 = insertelement <vscale x 4 x i32> undef, i32 16, i32 0
260260
%splat1 = shufflevector <vscale x 4 x i32> %head1, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -303,17 +303,21 @@ define <vscale x 8 x i32> @vmulh_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %x) {
303303
}
304304

305305
define <vscale x 8 x i32> @vmulh_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
306-
; CHECK-LABEL: vmulh_vi_nxv8i32_0:
307-
; CHECK: # %bb.0:
308-
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu
309-
; CHECK-NEXT: vsext.vf2 v16, v8
310-
; CHECK-NEXT: addi a0, zero, -7
311-
; CHECK-NEXT: vmul.vx v8, v16, a0
312-
; CHECK-NEXT: addi a0, zero, 32
313-
; CHECK-NEXT: vsrl.vx v16, v8, a0
314-
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
315-
; CHECK-NEXT: vnsrl.wi v8, v16, 0
316-
; CHECK-NEXT: ret
306+
; RV32-LABEL: vmulh_vi_nxv8i32_0:
307+
; RV32: # %bb.0:
308+
; RV32-NEXT: addi a0, zero, -7
309+
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
310+
; RV32-NEXT: vmulh.vx v8, v8, a0
311+
; RV32-NEXT: ret
312+
;
313+
; RV64-LABEL: vmulh_vi_nxv8i32_0:
314+
; RV64: # %bb.0:
315+
; RV64-NEXT: addi a0, zero, 1
316+
; RV64-NEXT: slli a0, a0, 32
317+
; RV64-NEXT: addi a0, a0, -7
318+
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
319+
; RV64-NEXT: vmulh.vx v8, v8, a0
320+
; RV64-NEXT: ret
317321
%head1 = insertelement <vscale x 8 x i32> undef, i32 -7, i32 0
318322
%splat1 = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
319323
%vb = sext <vscale x 8 x i32> %splat1 to <vscale x 8 x i64>
@@ -329,13 +333,9 @@ define <vscale x 8 x i32> @vmulh_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
329333
define <vscale x 8 x i32> @vmulh_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
330334
; CHECK-LABEL: vmulh_vi_nxv8i32_1:
331335
; CHECK: # %bb.0:
332-
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu
333-
; CHECK-NEXT: vsext.vf2 v16, v8
334-
; CHECK-NEXT: vsll.vi v8, v16, 4
335-
; CHECK-NEXT: addi a0, zero, 32
336-
; CHECK-NEXT: vsrl.vx v16, v8, a0
337-
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
338-
; CHECK-NEXT: vnsrl.wi v8, v16, 0
336+
; CHECK-NEXT: addi a0, zero, 16
337+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu
338+
; CHECK-NEXT: vmulh.vx v8, v8, a0
339339
; CHECK-NEXT: ret
340340
%head1 = insertelement <vscale x 8 x i32> undef, i32 16, i32 0
341341
%splat1 = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer

0 commit comments

Comments
 (0)