Skip to content

Commit aff6ffc

Browse files
authored
[RISCV] Handle .vx pseudos in hasAllNBitUsers (#67419)
Vector pseudos with scalar operands only use the lower SEW bits (or less in the case of shifts and clips). This patch accounts for this in hasAllNBitUsers for both SDNodes in RISCVISelDAGToDAG. We also need to handle this in RISCVOptWInstrs otherwise we introduce slliw instructions that are less compressible than their original slli counterpart.
1 parent 72b8d25 commit aff6ffc

13 files changed

+907
-796
lines changed

llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,118 @@ parseFeatureBits(bool IsRV64, const FeatureBitset &FeatureBits) {
130130

131131
} // namespace RISCVFeatures
132132

133+
bool RISCVII::vectorInstUsesNBitsOfScalarOp(uint16_t Opcode, unsigned Bits,
134+
unsigned Log2SEW) {
135+
// TODO: Handle Zvbb instructions
136+
switch (Opcode) {
137+
default:
138+
return false;
139+
140+
// 11.6. Vector Single-Width Shift Instructions
141+
case RISCV::VSLL_VX:
142+
case RISCV::VSRL_VX:
143+
case RISCV::VSRA_VX:
144+
// 12.4. Vector Single-Width Scaling Shift Instructions
145+
case RISCV::VSSRL_VX:
146+
case RISCV::VSSRA_VX:
147+
// Only the low lg2(SEW) bits of the shift-amount value are used.
148+
return Log2SEW <= Bits;
149+
150+
// 11.7 Vector Narrowing Integer Right Shift Instructions
151+
case RISCV::VNSRL_WX:
152+
case RISCV::VNSRA_WX:
153+
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
154+
case RISCV::VNCLIPU_WX:
155+
case RISCV::VNCLIP_WX:
156+
// Only the low lg2(2*SEW) bits of the shift-amount value are used.
157+
return (Log2SEW + 1) <= Bits;
158+
159+
// 11.1. Vector Single-Width Integer Add and Subtract
160+
case RISCV::VADD_VX:
161+
case RISCV::VSUB_VX:
162+
case RISCV::VRSUB_VX:
163+
// 11.2. Vector Widening Integer Add/Subtract
164+
case RISCV::VWADDU_VX:
165+
case RISCV::VWSUBU_VX:
166+
case RISCV::VWADD_VX:
167+
case RISCV::VWSUB_VX:
168+
case RISCV::VWADDU_WX:
169+
case RISCV::VWSUBU_WX:
170+
case RISCV::VWADD_WX:
171+
case RISCV::VWSUB_WX:
172+
// 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
173+
case RISCV::VADC_VXM:
174+
case RISCV::VADC_VIM:
175+
case RISCV::VMADC_VXM:
176+
case RISCV::VMADC_VIM:
177+
case RISCV::VMADC_VX:
178+
case RISCV::VSBC_VXM:
179+
case RISCV::VMSBC_VXM:
180+
case RISCV::VMSBC_VX:
181+
// 11.5 Vector Bitwise Logical Instructions
182+
case RISCV::VAND_VX:
183+
case RISCV::VOR_VX:
184+
case RISCV::VXOR_VX:
185+
// 11.8. Vector Integer Compare Instructions
186+
case RISCV::VMSEQ_VX:
187+
case RISCV::VMSNE_VX:
188+
case RISCV::VMSLTU_VX:
189+
case RISCV::VMSLT_VX:
190+
case RISCV::VMSLEU_VX:
191+
case RISCV::VMSLE_VX:
192+
case RISCV::VMSGTU_VX:
193+
case RISCV::VMSGT_VX:
194+
// 11.9. Vector Integer Min/Max Instructions
195+
case RISCV::VMINU_VX:
196+
case RISCV::VMIN_VX:
197+
case RISCV::VMAXU_VX:
198+
case RISCV::VMAX_VX:
199+
// 11.10. Vector Single-Width Integer Multiply Instructions
200+
case RISCV::VMUL_VX:
201+
case RISCV::VMULH_VX:
202+
case RISCV::VMULHU_VX:
203+
case RISCV::VMULHSU_VX:
204+
// 11.11. Vector Integer Divide Instructions
205+
case RISCV::VDIVU_VX:
206+
case RISCV::VDIV_VX:
207+
case RISCV::VREMU_VX:
208+
case RISCV::VREM_VX:
209+
// 11.12. Vector Widening Integer Multiply Instructions
210+
case RISCV::VWMUL_VX:
211+
case RISCV::VWMULU_VX:
212+
case RISCV::VWMULSU_VX:
213+
// 11.13. Vector Single-Width Integer Multiply-Add Instructions
214+
case RISCV::VMACC_VX:
215+
case RISCV::VNMSAC_VX:
216+
case RISCV::VMADD_VX:
217+
case RISCV::VNMSUB_VX:
218+
// 11.14. Vector Widening Integer Multiply-Add Instructions
219+
case RISCV::VWMACCU_VX:
220+
case RISCV::VWMACC_VX:
221+
case RISCV::VWMACCSU_VX:
222+
case RISCV::VWMACCUS_VX:
223+
// 11.15. Vector Integer Merge Instructions
224+
case RISCV::VMERGE_VXM:
225+
// 11.16. Vector Integer Move Instructions
226+
case RISCV::VMV_V_X:
227+
// 12.1. Vector Single-Width Saturating Add and Subtract
228+
case RISCV::VSADDU_VX:
229+
case RISCV::VSADD_VX:
230+
case RISCV::VSSUBU_VX:
231+
case RISCV::VSSUB_VX:
232+
// 12.2. Vector Single-Width Averaging Add and Subtract
233+
case RISCV::VAADDU_VX:
234+
case RISCV::VAADD_VX:
235+
case RISCV::VASUBU_VX:
236+
case RISCV::VASUB_VX:
237+
// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
238+
case RISCV::VSMUL_VX:
239+
// 16.1. Integer Scalar Move Instructions
240+
case RISCV::VMV_S_X:
241+
return (1 << Log2SEW) <= Bits;
242+
}
243+
}
244+
133245
// Encode VTYPE into the binary format used by the the VSETVLI instruction which
134246
// is used by our MC layer representation.
135247
//

llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,11 @@ static inline bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc) {
241241
Desc.getOperandConstraint(Desc.getNumDefs(), MCOI::TIED_TO) == 0;
242242
}
243243

244+
// Returns true if the .vx vector instruction \p Opcode only uses the lower \p
245+
// Bits for a given SEW.
246+
bool vectorInstUsesNBitsOfScalarOp(uint16_t Opcode, unsigned Bits,
247+
unsigned Log2SEW);
248+
244249
// RISC-V Specific Machine Operand Flags
245250
enum {
246251
MO_None = 0,

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2783,8 +2783,32 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
27832783

27842784
// TODO: Add more opcodes?
27852785
switch (User->getMachineOpcode()) {
2786-
default:
2786+
default: {
2787+
if (const RISCVVPseudosTable::PseudoInfo *PseudoInfo =
2788+
RISCVVPseudosTable::getPseudoInfo(User->getMachineOpcode())) {
2789+
2790+
const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
2791+
if (!RISCVII::hasSEWOp(MCID.TSFlags))
2792+
return false;
2793+
assert(RISCVII::hasVLOp(MCID.TSFlags));
2794+
2795+
bool HasGlueOp = User->getGluedNode() != nullptr;
2796+
unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
2797+
bool HasChainOp =
2798+
User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
2799+
bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(MCID.TSFlags);
2800+
unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp -
2801+
HasGlueOp - 2;
2802+
const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
2803+
2804+
if (UI.getOperandNo() == VLIdx)
2805+
return false;
2806+
if (RISCVII::vectorInstUsesNBitsOfScalarOp(PseudoInfo->BaseInstr, Bits,
2807+
Log2SEW))
2808+
break;
2809+
}
27872810
return false;
2811+
}
27882812
case RISCV::ADDW:
27892813
case RISCV::ADDIW:
27902814
case RISCV::SUBW:

llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,23 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
107107
unsigned OpIdx = UserOp.getOperandNo();
108108

109109
switch (UserMI->getOpcode()) {
110-
default:
110+
default: {
111+
if (const RISCVVPseudosTable::PseudoInfo *PseudoInfo =
112+
RISCVVPseudosTable::getPseudoInfo(UserMI->getOpcode())) {
113+
const MCInstrDesc &MCID = UserMI->getDesc();
114+
if (!RISCVII::hasSEWOp(MCID.TSFlags))
115+
return false;
116+
assert(RISCVII::hasVLOp(MCID.TSFlags));
117+
const unsigned Log2SEW =
118+
UserMI->getOperand(RISCVII::getSEWOpNum(MCID)).getImm();
119+
if (UserOp.getOperandNo() == RISCVII::getVLOpNum(MCID))
120+
return false;
121+
if (RISCVII::vectorInstUsesNBitsOfScalarOp(PseudoInfo->BaseInstr,
122+
Bits, Log2SEW))
123+
break;
124+
}
111125
return false;
126+
}
112127

113128
case RISCV::ADDIW:
114129
case RISCV::ADDW:

llvm/test/CodeGen/RISCV/rvv/constant-folding.ll

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
3-
; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
4-
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
5-
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
64

75
; These tests check that the scalable-vector version of this series of
86
; instructions does not get into an infinite DAGCombine loop. This was
@@ -14,26 +12,15 @@
1412
; a constant SPLAT_VECTOR didn't follow suit.
1513

1614
define <2 x i16> @fixedlen(<2 x i32> %x) {
17-
; RV32-LABEL: fixedlen:
18-
; RV32: # %bb.0:
19-
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
20-
; RV32-NEXT: vsrl.vi v8, v8, 16
21-
; RV32-NEXT: lui a0, 1048568
22-
; RV32-NEXT: vand.vx v8, v8, a0
23-
; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
24-
; RV32-NEXT: vnsrl.wi v8, v8, 0
25-
; RV32-NEXT: ret
26-
;
27-
; RV64-LABEL: fixedlen:
28-
; RV64: # %bb.0:
29-
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
30-
; RV64-NEXT: vsrl.vi v8, v8, 16
31-
; RV64-NEXT: lui a0, 131071
32-
; RV64-NEXT: slli a0, a0, 3
33-
; RV64-NEXT: vand.vx v8, v8, a0
34-
; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
35-
; RV64-NEXT: vnsrl.wi v8, v8, 0
36-
; RV64-NEXT: ret
15+
; CHECK-LABEL: fixedlen:
16+
; CHECK: # %bb.0:
17+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
18+
; CHECK-NEXT: vsrl.vi v8, v8, 16
19+
; CHECK-NEXT: lui a0, 1048568
20+
; CHECK-NEXT: vand.vx v8, v8, a0
21+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
22+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
23+
; CHECK-NEXT: ret
3724
%v41 = insertelement <2 x i32> poison, i32 16, i32 0
3825
%v42 = shufflevector <2 x i32> %v41, <2 x i32> poison, <2 x i32> zeroinitializer
3926
%v43 = lshr <2 x i32> %x, %v42

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll

Lines changed: 31 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,39 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s -check-prefixes=CHECK,RV32
3-
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s -check-prefixes=CHECK,RV64
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s
44

55
; Integers
66

77
define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
8-
; RV32-LABEL: vector_deinterleave_load_v16i1_v32i1:
9-
; RV32: # %bb.0:
10-
; RV32-NEXT: li a1, 32
11-
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
12-
; RV32-NEXT: vlm.v v0, (a0)
13-
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
14-
; RV32-NEXT: vmv.v.i v8, 0
15-
; RV32-NEXT: vmerge.vim v10, v8, 1, v0
16-
; RV32-NEXT: vid.v v9
17-
; RV32-NEXT: vadd.vv v11, v9, v9
18-
; RV32-NEXT: vrgather.vv v9, v10, v11
19-
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
20-
; RV32-NEXT: vslidedown.vi v0, v0, 2
21-
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
22-
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
23-
; RV32-NEXT: vadd.vi v12, v11, -16
24-
; RV32-NEXT: lui a0, 16
25-
; RV32-NEXT: addi a0, a0, -256
26-
; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
27-
; RV32-NEXT: vmv.s.x v0, a0
28-
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
29-
; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t
30-
; RV32-NEXT: vmsne.vi v9, v9, 0
31-
; RV32-NEXT: vadd.vi v12, v11, 1
32-
; RV32-NEXT: vrgather.vv v13, v10, v12
33-
; RV32-NEXT: vadd.vi v10, v11, -15
34-
; RV32-NEXT: vrgather.vv v13, v8, v10, v0.t
35-
; RV32-NEXT: vmsne.vi v8, v13, 0
36-
; RV32-NEXT: vmv.v.v v0, v9
37-
; RV32-NEXT: ret
38-
;
39-
; RV64-LABEL: vector_deinterleave_load_v16i1_v32i1:
40-
; RV64: # %bb.0:
41-
; RV64-NEXT: li a1, 32
42-
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
43-
; RV64-NEXT: vlm.v v0, (a0)
44-
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
45-
; RV64-NEXT: vmv.v.i v8, 0
46-
; RV64-NEXT: vmerge.vim v10, v8, 1, v0
47-
; RV64-NEXT: vid.v v9
48-
; RV64-NEXT: vadd.vv v11, v9, v9
49-
; RV64-NEXT: vrgather.vv v9, v10, v11
50-
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
51-
; RV64-NEXT: vslidedown.vi v0, v0, 2
52-
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
53-
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
54-
; RV64-NEXT: vadd.vi v12, v11, -16
55-
; RV64-NEXT: lui a0, 16
56-
; RV64-NEXT: addiw a0, a0, -256
57-
; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
58-
; RV64-NEXT: vmv.s.x v0, a0
59-
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
60-
; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t
61-
; RV64-NEXT: vmsne.vi v9, v9, 0
62-
; RV64-NEXT: vadd.vi v12, v11, 1
63-
; RV64-NEXT: vrgather.vv v13, v10, v12
64-
; RV64-NEXT: vadd.vi v10, v11, -15
65-
; RV64-NEXT: vrgather.vv v13, v8, v10, v0.t
66-
; RV64-NEXT: vmsne.vi v8, v13, 0
67-
; RV64-NEXT: vmv.v.v v0, v9
68-
; RV64-NEXT: ret
8+
; CHECK-LABEL: vector_deinterleave_load_v16i1_v32i1:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: li a1, 32
11+
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
12+
; CHECK-NEXT: vlm.v v0, (a0)
13+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
14+
; CHECK-NEXT: vmv.v.i v8, 0
15+
; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
16+
; CHECK-NEXT: vid.v v9
17+
; CHECK-NEXT: vadd.vv v11, v9, v9
18+
; CHECK-NEXT: vrgather.vv v9, v10, v11
19+
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
20+
; CHECK-NEXT: vslidedown.vi v0, v0, 2
21+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
22+
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
23+
; CHECK-NEXT: vadd.vi v12, v11, -16
24+
; CHECK-NEXT: li a0, -256
25+
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
26+
; CHECK-NEXT: vmv.s.x v0, a0
27+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
28+
; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t
29+
; CHECK-NEXT: vmsne.vi v9, v9, 0
30+
; CHECK-NEXT: vadd.vi v12, v11, 1
31+
; CHECK-NEXT: vrgather.vv v13, v10, v12
32+
; CHECK-NEXT: vadd.vi v10, v11, -15
33+
; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t
34+
; CHECK-NEXT: vmsne.vi v8, v13, 0
35+
; CHECK-NEXT: vmv.v.v v0, v9
36+
; CHECK-NEXT: ret
6937
%vec = load <32 x i1>, ptr %p
7038
%retval = call {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1> %vec)
7139
ret {<16 x i1>, <16 x i1>} %retval

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -585,8 +585,7 @@ define signext i32 @vpreduce_umax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %
585585
;
586586
; RV64-LABEL: vpreduce_umax_v2i32:
587587
; RV64: # %bb.0:
588-
; RV64-NEXT: slli a0, a0, 32
589-
; RV64-NEXT: srli a0, a0, 32
588+
; RV64-NEXT: andi a0, a0, -1
590589
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
591590
; RV64-NEXT: vmv.s.x v9, a0
592591
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
@@ -626,8 +625,7 @@ define signext i32 @vpreduce_umin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %
626625
;
627626
; RV64-LABEL: vpreduce_umin_v2i32:
628627
; RV64: # %bb.0:
629-
; RV64-NEXT: slli a0, a0, 32
630-
; RV64-NEXT: srli a0, a0, 32
628+
; RV64-NEXT: andi a0, a0, -1
631629
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
632630
; RV64-NEXT: vmv.s.x v9, a0
633631
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
@@ -727,8 +725,7 @@ define signext i32 @vpreduce_umax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %
727725
;
728726
; RV64-LABEL: vpreduce_umax_v4i32:
729727
; RV64: # %bb.0:
730-
; RV64-NEXT: slli a0, a0, 32
731-
; RV64-NEXT: srli a0, a0, 32
728+
; RV64-NEXT: andi a0, a0, -1
732729
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
733730
; RV64-NEXT: vmv.s.x v9, a0
734731
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
@@ -768,8 +765,7 @@ define signext i32 @vpreduce_umin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %
768765
;
769766
; RV64-LABEL: vpreduce_umin_v4i32:
770767
; RV64: # %bb.0:
771-
; RV64-NEXT: slli a0, a0, 32
772-
; RV64-NEXT: srli a0, a0, 32
768+
; RV64-NEXT: andi a0, a0, -1
773769
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
774770
; RV64-NEXT: vmv.s.x v9, a0
775771
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma

0 commit comments

Comments
 (0)