Skip to content

Commit 9357712

Browse files
committed
[RISCV] Use vfslide1down for build_vectors of non-constant floats
This adds the vfslide1down (and vfslide1up for consistency) nodes. These mostly parallel the existing vslide1down/up nodes. (See note below on instruction semantics.) We then use the vfslide1down in build_vector lowering instead of going through the stack. The specification is more than a bit vague on the meaning of these instructions. All we're given is "The vfslide1down instruction is defined analogously, but sources its scalar argument from an f register." We have to combine this with a general note at the beginning of section 10. Vector Arithmetic Instruction Formats which reads: "For floating-point operations, the scalar can be taken from a scalar f register. If FLEN > SEW, the value in the f registers is checked for a valid NaN-boxed value, in which case the least-signicant SEW bits of the f register are used, else the canonical NaN value is used. Vector instructions where any floating-point vector operand’s EEW is not a supported floating-point type width (which includes when FLEN < SEW) are reserved.". Note that floats are NaN-boxed when D is implemented. Combining that all together, we're fine as long as the element type matches the vector type - which is does by construction. We shouldn't have legal vectors which hit the reserved encoding case. An assert is included, just to be careful. Differential Revision: https://reviews.llvm.org/D151347
1 parent 74f15d9 commit 9357712

File tree

4 files changed

+65
-46
lines changed

4 files changed

+65
-46
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3218,15 +3218,13 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
32183218
// For constant vectors, use generic constant pool lowering. Otherwise,
32193219
// we'd have to materialize constants in GPRs just to move them into the
32203220
// vector.
3221-
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()))
3221+
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3222+
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
32223223
return SDValue();
32233224

3224-
// We can use a series of vslide1down instructions to move values in GPRs
3225-
// into the appropriate place in the result vector. We use slide1down
3226-
// to avoid the register group overlap constraint of vslide1up.
3227-
if (VT.isFloatingPoint())
3228-
// TODO: Use vfslide1down.
3229-
return SDValue();
3225+
assert((!VT.isFloatingPoint() ||
3226+
VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3227+
"Illegal type which will result in reserved encoding");
32303228

32313229
const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
32323230

@@ -3243,8 +3241,10 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
32433241
Vec, Offset, Mask, VL, Policy);
32443242
UndefCount = 0;
32453243
}
3246-
Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, ContainerVT,
3247-
DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL);
3244+
auto OpCode =
3245+
VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3246+
Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3247+
V, Mask, VL);
32483248
}
32493249
if (UndefCount) {
32503250
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
@@ -15161,6 +15161,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
1516115161
NODE_NAME_CASE(VSLIDE1UP_VL)
1516215162
NODE_NAME_CASE(VSLIDEDOWN_VL)
1516315163
NODE_NAME_CASE(VSLIDE1DOWN_VL)
15164+
NODE_NAME_CASE(VFSLIDE1UP_VL)
15165+
NODE_NAME_CASE(VFSLIDE1DOWN_VL)
1516415166
NODE_NAME_CASE(VID_VL)
1516515167
NODE_NAME_CASE(VFNCVT_ROD_VL)
1516615168
NODE_NAME_CASE(VECREDUCE_ADD_VL)

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,12 @@ enum NodeType : unsigned {
164164
// value. The fourth and fifth operands are the mask and VL operands.
165165
VSLIDE1UP_VL,
166166
VSLIDE1DOWN_VL,
167+
// Matches the semantics of vfslide1up/vfslide1down. The first operand is
168+
// passthru operand, the second is source vector, third is a scalar value
169+
// whose type matches the element type of the vectors. The fourth and fifth
170+
// operands are the mask and VL operands.
171+
VFSLIDE1UP_VL,
172+
VFSLIDE1DOWN_VL,
167173
// Matches the semantics of the vid.v instruction, with a mask and VL
168174
// operand.
169175
VID_VL,

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2456,11 +2456,18 @@ def SDTRVVSlide1 : SDTypeProfile<1, 5, [
24562456
SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>,
24572457
SDTCisVT<5, XLenVT>
24582458
]>;
2459+
def SDTRVVFSlide1 : SDTypeProfile<1, 5, [
2460+
SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisFP<0>,
2461+
SDTCisEltOfVec<3, 0>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>,
2462+
SDTCisVT<5, XLenVT>
2463+
]>;
24592464

24602465
def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>;
24612466
def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>;
24622467
def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>;
24632468
def riscv_slide1down_vl : SDNode<"RISCVISD::VSLIDE1DOWN_VL", SDTRVVSlide1, []>;
2469+
def riscv_fslide1up_vl : SDNode<"RISCVISD::VFSLIDE1UP_VL", SDTRVVFSlide1, []>;
2470+
def riscv_fslide1down_vl : SDNode<"RISCVISD::VFSLIDE1DOWN_VL", SDTRVVFSlide1, []>;
24642471

24652472
foreach vti = AllIntegerVectors in {
24662473
let Predicates = GetVTypePredicates<vti>.Predicates in {
@@ -2495,6 +2502,35 @@ foreach vti = AllIntegerVectors in {
24952502
}
24962503
}
24972504

2505+
foreach vti = AllFloatVectors in {
2506+
let Predicates = GetVTypePredicates<vti>.Predicates in {
2507+
def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector undef),
2508+
(vti.Vector vti.RegClass:$rs1),
2509+
vti.Scalar:$rs2, (vti.Mask true_mask),
2510+
VLOpFrag)),
2511+
(!cast<Instruction>("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
2512+
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
2513+
def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector vti.RegClass:$rd),
2514+
(vti.Vector vti.RegClass:$rs1),
2515+
vti.Scalar:$rs2, (vti.Mask true_mask),
2516+
VLOpFrag)),
2517+
(!cast<Instruction>("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_TU")
2518+
vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
2519+
def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector undef),
2520+
(vti.Vector vti.RegClass:$rs1),
2521+
vti.Scalar:$rs2, (vti.Mask true_mask),
2522+
VLOpFrag)),
2523+
(!cast<Instruction>("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
2524+
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
2525+
def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector vti.RegClass:$rd),
2526+
(vti.Vector vti.RegClass:$rs1),
2527+
vti.Scalar:$rs2, (vti.Mask true_mask),
2528+
VLOpFrag)),
2529+
(!cast<Instruction>("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_TU")
2530+
vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
2531+
}
2532+
}
2533+
24982534
foreach vti = AllVectors in {
24992535
let Predicates = GetVTypePredicates<vti>.Predicates in {
25002536
def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 12 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -291,14 +291,9 @@ define dso_local void @splat_load_licm(float* %0) {
291291
define <2 x half> @buildvec_v2f16(half %a, half %b) {
292292
; CHECK-LABEL: buildvec_v2f16:
293293
; CHECK: # %bb.0:
294-
; CHECK-NEXT: addi sp, sp, -16
295-
; CHECK-NEXT: .cfi_def_cfa_offset 16
296-
; CHECK-NEXT: fsh fa1, 14(sp)
297-
; CHECK-NEXT: fsh fa0, 12(sp)
298-
; CHECK-NEXT: addi a0, sp, 12
299294
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
300-
; CHECK-NEXT: vle16.v v8, (a0)
301-
; CHECK-NEXT: addi sp, sp, 16
295+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
296+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
302297
; CHECK-NEXT: ret
303298
%v1 = insertelement <2 x half> poison, half %a, i64 0
304299
%v2 = insertelement <2 x half> %v1, half %b, i64 1
@@ -308,14 +303,9 @@ define <2 x half> @buildvec_v2f16(half %a, half %b) {
308303
define <2 x float> @buildvec_v2f32(float %a, float %b) {
309304
; CHECK-LABEL: buildvec_v2f32:
310305
; CHECK: # %bb.0:
311-
; CHECK-NEXT: addi sp, sp, -16
312-
; CHECK-NEXT: .cfi_def_cfa_offset 16
313-
; CHECK-NEXT: fsw fa1, 12(sp)
314-
; CHECK-NEXT: fsw fa0, 8(sp)
315-
; CHECK-NEXT: addi a0, sp, 8
316306
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
317-
; CHECK-NEXT: vle32.v v8, (a0)
318-
; CHECK-NEXT: addi sp, sp, 16
307+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
308+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
319309
; CHECK-NEXT: ret
320310
%v1 = insertelement <2 x float> poison, float %a, i64 0
321311
%v2 = insertelement <2 x float> %v1, float %b, i64 1
@@ -325,14 +315,9 @@ define <2 x float> @buildvec_v2f32(float %a, float %b) {
325315
define <2 x double> @buildvec_v2f64(double %a, double %b) {
326316
; CHECK-LABEL: buildvec_v2f64:
327317
; CHECK: # %bb.0:
328-
; CHECK-NEXT: addi sp, sp, -16
329-
; CHECK-NEXT: .cfi_def_cfa_offset 16
330-
; CHECK-NEXT: fsd fa1, 8(sp)
331-
; CHECK-NEXT: fsd fa0, 0(sp)
332-
; CHECK-NEXT: mv a0, sp
333318
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
334-
; CHECK-NEXT: vle64.v v8, (a0)
335-
; CHECK-NEXT: addi sp, sp, 16
319+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
320+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
336321
; CHECK-NEXT: ret
337322
%v1 = insertelement <2 x double> poison, double %a, i64 0
338323
%v2 = insertelement <2 x double> %v1, double %b, i64 1
@@ -342,14 +327,9 @@ define <2 x double> @buildvec_v2f64(double %a, double %b) {
342327
define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
343328
; CHECK-LABEL: buildvec_v2f64_b:
344329
; CHECK: # %bb.0:
345-
; CHECK-NEXT: addi sp, sp, -16
346-
; CHECK-NEXT: .cfi_def_cfa_offset 16
347-
; CHECK-NEXT: fsd fa1, 8(sp)
348-
; CHECK-NEXT: fsd fa0, 0(sp)
349-
; CHECK-NEXT: mv a0, sp
350330
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
351-
; CHECK-NEXT: vle64.v v8, (a0)
352-
; CHECK-NEXT: addi sp, sp, 16
331+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
332+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
353333
; CHECK-NEXT: ret
354334
%v1 = insertelement <2 x double> poison, double %b, i64 1
355335
%v2 = insertelement <2 x double> %v1, double %a, i64 0
@@ -359,16 +339,11 @@ define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
359339
define <4 x float> @buildvec_v4f32(float %a, float %b, float %c, float %d) {
360340
; CHECK-LABEL: buildvec_v4f32:
361341
; CHECK: # %bb.0:
362-
; CHECK-NEXT: addi sp, sp, -16
363-
; CHECK-NEXT: .cfi_def_cfa_offset 16
364-
; CHECK-NEXT: fsw fa3, 12(sp)
365-
; CHECK-NEXT: fsw fa2, 8(sp)
366-
; CHECK-NEXT: fsw fa1, 4(sp)
367-
; CHECK-NEXT: fsw fa0, 0(sp)
368-
; CHECK-NEXT: mv a0, sp
369342
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
370-
; CHECK-NEXT: vle32.v v8, (a0)
371-
; CHECK-NEXT: addi sp, sp, 16
343+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
344+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
345+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
346+
; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
372347
; CHECK-NEXT: ret
373348
%v1 = insertelement <4 x float> poison, float %a, i64 0
374349
%v2 = insertelement <4 x float> %v1, float %b, i64 1

0 commit comments

Comments
 (0)