Skip to content

Commit 597f200

Browse files
mshockwavesmallp-o-p
authored andcommitted
[RISCV] Assign separate PseudoVSHA2MS_VV opcodes for each SEW (llvm#114317)
The vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes. This is effectively a NFC change.
1 parent 67ff7f5 commit 597f200

File tree

5 files changed

+33
-24
lines changed

5 files changed

+33
-24
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4359,21 +4359,22 @@ class VPatTernaryNoMaskWithPolicy<string intrinsic,
43594359
ValueType result_type,
43604360
ValueType op1_type,
43614361
ValueType op2_type,
4362-
int sew,
4362+
int log2sew,
43634363
LMULInfo vlmul,
43644364
VReg result_reg_class,
43654365
RegisterClass op1_reg_class,
4366-
DAGOperand op2_kind> :
4366+
DAGOperand op2_kind,
4367+
bit isSEWAware = false> :
43674368
Pat<(result_type (!cast<Intrinsic>(intrinsic)
43684369
(result_type result_reg_class:$rs3),
43694370
(op1_type op1_reg_class:$rs1),
43704371
(op2_type op2_kind:$rs2),
43714372
VLOpFrag, (XLenVT timm:$policy))),
4372-
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
4373+
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#!if(isSEWAware, "_E"#!shl(1, log2sew), ""))
43734374
result_reg_class:$rs3,
43744375
(op1_type op1_reg_class:$rs1),
43754376
op2_kind:$rs2,
4376-
GPR:$vl, sew, (XLenVT timm:$policy))>;
4377+
GPR:$vl, log2sew, (XLenVT timm:$policy))>;
43774378

43784379
class VPatTernaryNoMaskWithPolicyRoundingMode<string intrinsic,
43794380
string inst,

llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,11 @@ multiclass VPseudoBinaryNoMaskPolicy_Zvk<VReg RetClass,
274274
multiclass VPseudoTernaryNoMask_Zvk<VReg RetClass,
275275
VReg Op1Class,
276276
DAGOperand Op2Class,
277-
LMULInfo MInfo> {
278-
let VLMul = MInfo.value in
279-
def "_" # MInfo.MX : VPseudoTernaryNoMask_Zvk<RetClass, Op1Class, Op2Class>;
277+
LMULInfo MInfo, int sew = 0> {
278+
let VLMul = MInfo.value, SEW = sew in {
279+
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
280+
def suffix : VPseudoTernaryNoMask_Zvk<RetClass, Op1Class, Op2Class>;
281+
}
280282
}
281283

282284
multiclass VPseudoBinaryV_V_NoMask_Zvk<LMULInfo m> {
@@ -348,12 +350,12 @@ multiclass VPseudoVSHA2CL {
348350
}
349351
}
350352

351-
multiclass VPseudoVSHA2MS {
352-
foreach m = MxListVF4 in {
353+
multiclass VPseudoVSHA2MS<int sew = 0> {
354+
foreach m = !if(!eq(sew, 64), MxListVF8, MxListVF4) in {
353355
defvar mx = m.MX;
354-
defm _VV : VPseudoTernaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m>,
356+
defm _VV : VPseudoTernaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m, sew = sew>,
355357
SchedTernary<"WriteVSHA2MSV", "ReadVSHA2MSV", "ReadVSHA2MSV",
356-
"ReadVSHA2MSV", mx>;
358+
"ReadVSHA2MSV", mx, sew>;
357359
}
358360
}
359361

@@ -564,7 +566,9 @@ let Predicates = [HasStdExtZvkned] in {
564566
let Predicates = [HasStdExtZvknhaOrZvknhb] in {
565567
defm PseudoVSHA2CH : VPseudoVSHA2CH;
566568
defm PseudoVSHA2CL : VPseudoVSHA2CL;
567-
defm PseudoVSHA2MS : VPseudoVSHA2MS;
569+
defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=32>;
570+
let Predicates = [HasStdExtZvknhb] in
571+
defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=64>;
568572
} // Predicates = [HasStdExtZvknhaOrZvknhb]
569573

570574
let Predicates = [HasStdExtZvksed] in {
@@ -944,12 +948,14 @@ multiclass VPatUnaryV_V_S_NoMask_Zvk<string intrinsic, string instruction,
944948
}
945949

946950
multiclass VPatBinaryV_VV_NoMask<string intrinsic, string instruction,
947-
list<VTypeInfo> vtilist> {
951+
list<VTypeInfo> vtilist,
952+
bit isSEWAware = false> {
948953
foreach vti = vtilist in
949954
def : VPatTernaryNoMaskWithPolicy<intrinsic, instruction, "VV",
950955
vti.Vector, vti.Vector, vti.Vector,
951956
vti.Log2SEW, vti.LMul, vti.RegClass,
952-
vti.RegClass, vti.RegClass>;
957+
vti.RegClass, vti.RegClass,
958+
isSEWAware = isSEWAware>;
953959
}
954960

955961
multiclass VPatBinaryV_VI_NoMask<string intrinsic, string instruction,
@@ -1101,13 +1107,13 @@ let Predicates = [HasStdExtZvkned] in {
11011107
let Predicates = [HasStdExtZvknha] in {
11021108
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32IntegerVectors>;
11031109
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32IntegerVectors>;
1104-
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors>;
1110+
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors, isSEWAware=true>;
11051111
} // Predicates = [HasStdExtZvknha]
11061112

11071113
let Predicates = [HasStdExtZvknhb] in {
11081114
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32I64IntegerVectors>;
11091115
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32I64IntegerVectors>;
1110-
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors>;
1116+
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors, isSEWAware=true>;
11111117
} // Predicates = [HasStdExtZvknhb]
11121118

11131119
let Predicates = [HasStdExtZvksed] in {

llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -883,7 +883,8 @@ foreach mx = SchedMxList in {
883883
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
884884
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
885885
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
886-
defm "" : LMULWriteResMX<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
886+
foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
887+
defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
887888
}
888889
// Zvkned
889890
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
@@ -1213,7 +1214,7 @@ defm "" : LMULReadAdvance<"ReadVGMULV", 0>;
12131214
// Zvknha or Zvknhb
12141215
defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
12151216
defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
1216-
defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
1217+
defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
12171218
// Zvkned
12181219
defm "" : LMULReadAdvance<"ReadVAESMVV", 0>;
12191220
defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>;

llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,8 @@ foreach mx = SchedMxList in {
791791
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
792792
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorArith], mx, IsWorstCase>;
793793
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorArith], mx, IsWorstCase>;
794-
defm "" : LMULWriteResMX<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, IsWorstCase>;
794+
foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
795+
defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
795796
}
796797
// Zvkned
797798
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
@@ -1119,7 +1120,7 @@ defm "" : LMULReadAdvance<"ReadVGMULV", 0>;
11191120
// Zvknha or Zvknhb
11201121
defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
11211122
defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
1122-
defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
1123+
defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
11231124
// Zvkned
11241125
defm "" : LMULReadAdvance<"ReadVAESMVV", 0>;
11251126
defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>;

llvm/lib/Target/RISCV/RISCVScheduleZvk.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ defm "" : LMULSchedWrites<"WriteVGMULV">;
3636
/// Zvknha or Zvknhb extensions
3737
defm "" : LMULSchedWrites<"WriteVSHA2CHV">;
3838
defm "" : LMULSchedWrites<"WriteVSHA2CLV">;
39-
defm "" : LMULSchedWrites<"WriteVSHA2MSV">;
39+
defm "" : LMULSEWSchedWrites<"WriteVSHA2MSV">;
4040

4141
/// Zvkned extension
4242
defm "" : LMULSchedWrites<"WriteVAESMVV">;
@@ -79,7 +79,7 @@ defm "" : LMULSchedReads<"ReadVGMULV">;
7979
/// Zvknha or Zvknhb extensions
8080
defm "" : LMULSchedReads<"ReadVSHA2CHV">;
8181
defm "" : LMULSchedReads<"ReadVSHA2CLV">;
82-
defm "" : LMULSchedReads<"ReadVSHA2MSV">;
82+
defm "" : LMULSEWSchedReads<"ReadVSHA2MSV">;
8383

8484
/// Zvkned extension
8585
defm "" : LMULSchedReads<"ReadVAESMVV">;
@@ -153,11 +153,11 @@ multiclass UnsupportedSchedZvknhaOrZvknhb {
153153
let Unsupported = true in {
154154
defm "" : LMULWriteRes<"WriteVSHA2CHV", []>;
155155
defm "" : LMULWriteRes<"WriteVSHA2CLV", []>;
156-
defm "" : LMULWriteRes<"WriteVSHA2MSV", []>;
156+
defm "" : LMULSEWWriteRes<"WriteVSHA2MSV", []>;
157157

158158
defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
159159
defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
160-
defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
160+
defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
161161
}
162162
}
163163

0 commit comments

Comments
 (0)