Skip to content

Commit c6b7944

Browse files
[RISCV] Split single width floating point fused multiple-add pseudo instructions by SEW
Co-authored-by: Wang Pengcheng <[email protected]>
1 parent d309d7e commit c6b7944

File tree

7 files changed

+246
-150
lines changed

7 files changed

+246
-150
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 162 additions & 92 deletions
Large diffs are not rendered by default.

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 56 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3234,15 +3234,17 @@ multiclass VPseudoTernaryWithPolicyRoundingMode<VReg RetClass,
32343234
DAGOperand Op2Class,
32353235
LMULInfo MInfo,
32363236
string Constraint = "",
3237+
int sew = 0,
32373238
bit Commutable = 0,
32383239
int TargetConstraintType = 1> {
32393240
let VLMul = MInfo.value in {
3241+
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
32403242
let isCommutable = Commutable in
3241-
def "_" # MInfo.MX :
3243+
def suffix :
32423244
VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class,
32433245
Op2Class, Constraint,
32443246
TargetConstraintType>;
3245-
def "_" # MInfo.MX # "_MASK" :
3247+
def suffix # "_MASK" :
32463248
VPseudoBinaryMaskPolicyRoundingMode<RetClass, Op1Class,
32473249
Op2Class, Constraint,
32483250
UsesVXRM_=0,
@@ -3256,9 +3258,9 @@ multiclass VPseudoTernaryV_VV_AAXA<LMULInfo m, string Constraint = ""> {
32563258
Constraint, Commutable=1>;
32573259
}
32583260

3259-
multiclass VPseudoTernaryV_VV_AAXA_RM<LMULInfo m, string Constraint = ""> {
3261+
multiclass VPseudoTernaryV_VV_AAXA_RM<LMULInfo m, string Constraint = "", int sew = 0> {
32603262
defm _VV : VPseudoTernaryWithPolicyRoundingMode<m.vrclass, m.vrclass, m.vrclass, m,
3261-
Constraint, Commutable=1>;
3263+
Constraint, sew, Commutable=1>;
32623264
}
32633265

32643266
multiclass VPseudoTernaryV_VX_AAXA<LMULInfo m, string Constraint = ""> {
@@ -3272,10 +3274,11 @@ multiclass VPseudoTernaryV_VF_AAXA<LMULInfo m, FPR_Info f, string Constraint = "
32723274
Commutable=1>;
32733275
}
32743276

3275-
multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f, string Constraint = ""> {
3277+
multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f,
3278+
string Constraint = "", int sew = 0> {
32763279
defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode<m.vrclass, f.fprclass,
32773280
m.vrclass, m, Constraint,
3278-
Commutable=1>;
3281+
sew, Commutable=1>;
32793282
}
32803283

32813284
multiclass VPseudoTernaryW_VV<LMULInfo m> {
@@ -3345,16 +3348,17 @@ multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> {
33453348

33463349
multiclass VPseudoVMAC_VV_VF_AAXA_RM<string Constraint = ""> {
33473350
foreach m = MxListF in {
3348-
defm "" : VPseudoTernaryV_VV_AAXA_RM<m, Constraint>,
3349-
SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
3350-
"ReadVFMulAddV", m.MX>;
3351+
foreach e = SchedSEWSet<m.MX, isF=1>.val in
3352+
defm "" : VPseudoTernaryV_VV_AAXA_RM<m, Constraint, sew=e>,
3353+
SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
3354+
"ReadVFMulAddV", m.MX, e>;
33513355
}
33523356

33533357
foreach f = FPList in {
33543358
foreach m = f.MxList in {
3355-
defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, Constraint>,
3359+
defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, Constraint, sew=f.SEW>,
33563360
SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
3357-
"ReadVFMulAddV", m.MX>;
3361+
"ReadVFMulAddV", m.MX, f.SEW>;
33583362
}
33593363
}
33603364
}
@@ -4461,23 +4465,26 @@ class VPatTernaryNoMaskWithPolicyRoundingMode<string intrinsic,
44614465
ValueType result_type,
44624466
ValueType op1_type,
44634467
ValueType op2_type,
4464-
int sew,
4468+
int log2sew,
44654469
LMULInfo vlmul,
44664470
VReg result_reg_class,
44674471
RegisterClass op1_reg_class,
4468-
DAGOperand op2_kind> :
4472+
DAGOperand op2_kind,
4473+
bit isSEWAware = 0> :
44694474
Pat<(result_type (!cast<Intrinsic>(intrinsic)
44704475
(result_type result_reg_class:$rs3),
44714476
(op1_type op1_reg_class:$rs1),
44724477
(op2_type op2_kind:$rs2),
44734478
(XLenVT timm:$round),
44744479
VLOpFrag, (XLenVT timm:$policy))),
4475-
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
4480+
(!cast<Instruction>(!if(isSEWAware,
4481+
inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew),
4482+
inst#"_"#kind#"_"#vlmul.MX))
44764483
result_reg_class:$rs3,
44774484
(op1_type op1_reg_class:$rs1),
44784485
op2_kind:$rs2,
44794486
(XLenVT timm:$round),
4480-
GPR:$vl, sew, (XLenVT timm:$policy))>;
4487+
GPR:$vl, log2sew, (XLenVT timm:$policy))>;
44814488

44824489
class VPatTernaryMask<string intrinsic,
44834490
string inst,
@@ -4536,25 +4543,28 @@ class VPatTernaryMaskPolicyRoundingMode<string intrinsic,
45364543
ValueType op1_type,
45374544
ValueType op2_type,
45384545
ValueType mask_type,
4539-
int sew,
4546+
int log2sew,
45404547
LMULInfo vlmul,
45414548
VReg result_reg_class,
45424549
RegisterClass op1_reg_class,
4543-
DAGOperand op2_kind> :
4550+
DAGOperand op2_kind,
4551+
bit isSEWAware = 0> :
45444552
Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask")
45454553
(result_type result_reg_class:$rs3),
45464554
(op1_type op1_reg_class:$rs1),
45474555
(op2_type op2_kind:$rs2),
45484556
(mask_type V0),
45494557
(XLenVT timm:$round),
45504558
VLOpFrag, (XLenVT timm:$policy))),
4551-
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX # "_MASK")
4559+
(!cast<Instruction>(!if(isSEWAware,
4560+
inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew) # "_MASK",
4561+
inst#"_"#kind#"_"#vlmul.MX # "_MASK"))
45524562
result_reg_class:$rs3,
45534563
(op1_type op1_reg_class:$rs1),
45544564
op2_kind:$rs2,
45554565
(mask_type V0),
45564566
(XLenVT timm:$round),
4557-
GPR:$vl, sew, (XLenVT timm:$policy))>;
4567+
GPR:$vl, log2sew, (XLenVT timm:$policy))>;
45584568

45594569
class VPatTernaryMaskTA<string intrinsic,
45604570
string inst,
@@ -5564,15 +5574,16 @@ multiclass VPatTernaryWithPolicyRoundingMode<string intrinsic,
55645574
LMULInfo vlmul,
55655575
VReg result_reg_class,
55665576
RegisterClass op1_reg_class,
5567-
DAGOperand op2_kind> {
5577+
DAGOperand op2_kind,
5578+
bit isSEWAware = 0> {
55685579
def : VPatTernaryNoMaskWithPolicyRoundingMode<intrinsic, inst, kind, result_type,
55695580
op1_type, op2_type, sew, vlmul,
55705581
result_reg_class, op1_reg_class,
5571-
op2_kind>;
5582+
op2_kind, isSEWAware>;
55725583
def : VPatTernaryMaskPolicyRoundingMode<intrinsic, inst, kind, result_type, op1_type,
55735584
op2_type, mask_type, sew, vlmul,
55745585
result_reg_class, op1_reg_class,
5575-
op2_kind>;
5586+
op2_kind, isSEWAware>;
55765587
}
55775588

55785589
multiclass VPatTernaryTA<string intrinsic,
@@ -5626,13 +5637,13 @@ multiclass VPatTernaryV_VV_AAXA<string intrinsic, string instruction,
56265637
}
56275638

56285639
multiclass VPatTernaryV_VV_AAXA_RM<string intrinsic, string instruction,
5629-
list<VTypeInfo> vtilist> {
5640+
list<VTypeInfo> vtilist, bit isSEWAware = 0> {
56305641
foreach vti = vtilist in
56315642
let Predicates = GetVTypePredicates<vti>.Predicates in
56325643
defm : VPatTernaryWithPolicyRoundingMode<intrinsic, instruction, "VV",
56335644
vti.Vector, vti.Vector, vti.Vector, vti.Mask,
56345645
vti.Log2SEW, vti.LMul, vti.RegClass,
5635-
vti.RegClass, vti.RegClass>;
5646+
vti.RegClass, vti.RegClass, isSEWAware>;
56365647
}
56375648

56385649
multiclass VPatTernaryV_VX<string intrinsic, string instruction,
@@ -5657,14 +5668,14 @@ multiclass VPatTernaryV_VX_AAXA<string intrinsic, string instruction,
56575668
}
56585669

56595670
multiclass VPatTernaryV_VX_AAXA_RM<string intrinsic, string instruction,
5660-
list<VTypeInfo> vtilist> {
5671+
list<VTypeInfo> vtilist, bit isSEWAware = 0> {
56615672
foreach vti = vtilist in
56625673
let Predicates = GetVTypePredicates<vti>.Predicates in
56635674
defm : VPatTernaryWithPolicyRoundingMode<intrinsic, instruction,
56645675
"V"#vti.ScalarSuffix,
56655676
vti.Vector, vti.Scalar, vti.Vector, vti.Mask,
56665677
vti.Log2SEW, vti.LMul, vti.RegClass,
5667-
vti.ScalarRegClass, vti.RegClass>;
5678+
vti.ScalarRegClass, vti.RegClass, isSEWAware>;
56685679
}
56695680

56705681
multiclass VPatTernaryV_VI<string intrinsic, string instruction,
@@ -5742,9 +5753,9 @@ multiclass VPatTernaryV_VV_VX_AAXA<string intrinsic, string instruction,
57425753
VPatTernaryV_VX_AAXA<intrinsic, instruction, vtilist>;
57435754

57445755
multiclass VPatTernaryV_VV_VX_AAXA_RM<string intrinsic, string instruction,
5745-
list<VTypeInfo> vtilist>
5746-
: VPatTernaryV_VV_AAXA_RM<intrinsic, instruction, vtilist>,
5747-
VPatTernaryV_VX_AAXA_RM<intrinsic, instruction, vtilist>;
5756+
list<VTypeInfo> vtilist, bit isSEWAware = 0>
5757+
: VPatTernaryV_VV_AAXA_RM<intrinsic, instruction, vtilist, isSEWAware>,
5758+
VPatTernaryV_VX_AAXA_RM<intrinsic, instruction, vtilist, isSEWAware>;
57485759

57495760
multiclass VPatTernaryV_VX_VI<string intrinsic, string instruction,
57505761
list<VTypeInfo> vtilist, Operand Imm_type = simm5>
@@ -7122,14 +7133,22 @@ defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwmul", "PseudoVFWMUL",
71227133
//===----------------------------------------------------------------------===//
71237134
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
71247135
//===----------------------------------------------------------------------===//
7125-
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmacc", "PseudoVFMACC", AllFloatVectors>;
7126-
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmacc", "PseudoVFNMACC", AllFloatVectors>;
7127-
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsac", "PseudoVFMSAC", AllFloatVectors>;
7128-
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsac", "PseudoVFNMSAC", AllFloatVectors>;
7129-
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmadd", "PseudoVFMADD", AllFloatVectors>;
7130-
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmadd", "PseudoVFNMADD", AllFloatVectors>;
7131-
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsub", "PseudoVFMSUB", AllFloatVectors>;
7132-
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB", AllFloatVectors>;
7136+
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmacc", "PseudoVFMACC",
7137+
AllFloatVectors, isSEWAware=1>;
7138+
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmacc", "PseudoVFNMACC",
7139+
AllFloatVectors, isSEWAware=1>;
7140+
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsac", "PseudoVFMSAC",
7141+
AllFloatVectors, isSEWAware=1>;
7142+
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsac", "PseudoVFNMSAC",
7143+
AllFloatVectors, isSEWAware=1>;
7144+
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmadd", "PseudoVFMADD",
7145+
AllFloatVectors, isSEWAware=1>;
7146+
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmadd", "PseudoVFNMADD",
7147+
AllFloatVectors, isSEWAware=1>;
7148+
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsub", "PseudoVFMSUB",
7149+
AllFloatVectors, isSEWAware=1>;
7150+
defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB",
7151+
AllFloatVectors, isSEWAware=1>;
71337152

71347153
//===----------------------------------------------------------------------===//
71357154
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions

llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1224,7 +1224,7 @@ defm : VPatWidenBinaryFPSDNode_VV_VF_RM<fmul, "PseudoVFWMUL">;
12241224
foreach fvti = AllFloatVectors in {
12251225
// NOTE: We choose VFMADD because it has the most commuting freedom. So it
12261226
// works best with how TwoAddressInstructionPass tries commuting.
1227-
defvar suffix = fvti.LMul.MX;
1227+
defvar suffix = fvti.LMul.MX # "_E" # fvti.SEW;
12281228
let Predicates = GetVTypePredicates<fvti>.Predicates in {
12291229
def : Pat<(fvti.Vector (any_fma fvti.RegClass:$rs1, fvti.RegClass:$rd,
12301230
fvti.RegClass:$rs2)),

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1850,7 +1850,7 @@ multiclass VPatFPMulAddVL_VV_VF<SDPatternOperator vop, string instruction_name>
18501850

18511851
multiclass VPatFPMulAddVL_VV_VF_RM<SDPatternOperator vop, string instruction_name> {
18521852
foreach vti = AllFloatVectors in {
1853-
defvar suffix = vti.LMul.MX;
1853+
defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
18541854
let Predicates = GetVTypePredicates<vti>.Predicates in {
18551855
def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd,
18561856
vti.RegClass:$rs2, (vti.Mask V0),
@@ -1916,7 +1916,7 @@ multiclass VPatFPMulAccVL_VV_VF<PatFrag vop, string instruction_name> {
19161916

19171917
multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> {
19181918
foreach vti = AllFloatVectors in {
1919-
defvar suffix = vti.LMul.MX;
1919+
defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
19201920
let Predicates = GetVTypePredicates<vti>.Predicates in {
19211921
def : Pat<(riscv_vmerge_vl (vti.Mask V0),
19221922
(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,

llvm/lib/Target/RISCV/RISCVSchedSiFive7.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -741,15 +741,15 @@ foreach mx = SchedMxListF in {
741741
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
742742
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
743743
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
744+
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
745+
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
744746
}
745747
}
746748
}
747749
foreach mx = SchedMxList in {
748750
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
749751
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
750752
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
751-
defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
752-
defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
753753
defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
754754
defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
755755
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
@@ -1163,8 +1163,8 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
11631163
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
11641164
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
11651165
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
1166-
defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
1167-
defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
1166+
defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
1167+
defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
11681168
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
11691169
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
11701170
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;

llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -499,13 +499,20 @@ foreach mx = SchedMxListF in {
499499
}
500500
}
501501
}
502+
503+
foreach mx = SchedMxListF in {
504+
foreach sew = SchedSEWSet<mx, isF=1>.val in {
505+
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
506+
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
507+
let Latency = 6, ReleaseAtCycles = [LMulLat] in {
508+
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
509+
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
510+
}
511+
}
512+
}
502513
foreach mx = SchedMxList in {
503514
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
504515
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
505-
let Latency = 6, ReleaseAtCycles = [LMulLat] in {
506-
defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
507-
defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, IsWorstCase>;
508-
}
509516
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
510517
defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
511518
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
@@ -952,8 +959,8 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
952959
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
953960
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
954961
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
955-
defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
956-
defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
962+
defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
963+
defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
957964
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
958965
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
959966
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;

0 commit comments

Comments
 (0)