-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Assign separate PseudoVSHA2MS_VV opcodes for each SEW #114317
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Assign separate PseudoVSHA2MS_VV opcodes for each SEW #114317
Conversation
The vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes. This is effectively a NFC change.
@llvm/pr-subscribers-backend-risc-v Author: Min-Yih Hsu (mshockwave) ChangesThe vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes. This is effectively a NFC change. Full diff: https://github.com/llvm/llvm-project/pull/114317.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index d5b0fa340684b4..be8bc94a76ff0a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -4454,21 +4454,22 @@ class VPatTernaryNoMaskWithPolicy<string intrinsic,
ValueType result_type,
ValueType op1_type,
ValueType op2_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
RegisterClass op1_reg_class,
- DAGOperand op2_kind> :
+ DAGOperand op2_kind,
+ bit sew_aware = false> :
Pat<(result_type (!cast<Intrinsic>(intrinsic)
(result_type result_reg_class:$rs3),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#!if(sew_aware, "_E"#!shl(1, log2sew), ""))
result_reg_class:$rs3,
(op1_type op1_reg_class:$rs1),
op2_kind:$rs2,
- GPR:$vl, sew, (XLenVT timm:$policy))>;
+ GPR:$vl, log2sew, (XLenVT timm:$policy))>;
class VPatTernaryNoMaskWithPolicyRoundingMode<string intrinsic,
string inst,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index 782651fd6d0197..93020d3fc5aecc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -274,9 +274,11 @@ multiclass VPseudoBinaryNoMaskPolicy_Zvk<VReg RetClass,
multiclass VPseudoTernaryNoMask_Zvk<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
- LMULInfo MInfo> {
- let VLMul = MInfo.value in
- def "_" # MInfo.MX : VPseudoTernaryNoMask_Zvk<RetClass, Op1Class, Op2Class>;
+ LMULInfo MInfo, int sew = 0> {
+ let VLMul = MInfo.value, SEW = sew in {
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+ def suffix : VPseudoTernaryNoMask_Zvk<RetClass, Op1Class, Op2Class>;
+ }
}
multiclass VPseudoBinaryV_V_NoMask_Zvk<LMULInfo m> {
@@ -348,12 +350,12 @@ multiclass VPseudoVSHA2CL {
}
}
-multiclass VPseudoVSHA2MS {
- foreach m = MxListVF4 in {
+multiclass VPseudoVSHA2MS<int sew = 0> {
+ foreach m = !if(!eq(sew, 64), MxListVF8, MxListVF4) in {
defvar mx = m.MX;
- defm _VV : VPseudoTernaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m>,
+ defm _VV : VPseudoTernaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m, sew = sew>,
SchedTernary<"WriteVSHA2MSV", "ReadVSHA2MSV", "ReadVSHA2MSV",
- "ReadVSHA2MSV", mx>;
+ "ReadVSHA2MSV", mx, sew>;
}
}
@@ -564,7 +566,9 @@ let Predicates = [HasStdExtZvkned] in {
let Predicates = [HasStdExtZvknhaOrZvknhb] in {
defm PseudoVSHA2CH : VPseudoVSHA2CH;
defm PseudoVSHA2CL : VPseudoVSHA2CL;
- defm PseudoVSHA2MS : VPseudoVSHA2MS;
+ defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=32>;
+ let Predicates = [HasStdExtZvknhb] in
+ defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=64>;
} // Predicates = [HasStdExtZvknhaOrZvknhb]
let Predicates = [HasStdExtZvksed] in {
@@ -944,12 +948,14 @@ multiclass VPatUnaryV_V_S_NoMask_Zvk<string intrinsic, string instruction,
}
multiclass VPatBinaryV_VV_NoMask<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+ list<VTypeInfo> vtilist,
+ bit isSEWAware = false> {
foreach vti = vtilist in
def : VPatTernaryNoMaskWithPolicy<intrinsic, instruction, "VV",
vti.Vector, vti.Vector, vti.Vector,
vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ vti.RegClass, vti.RegClass,
+ sew_aware = isSEWAware>;
}
multiclass VPatBinaryV_VI_NoMask<string intrinsic, string instruction,
@@ -1101,13 +1107,13 @@ let Predicates = [HasStdExtZvkned] in {
let Predicates = [HasStdExtZvknha] in {
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32IntegerVectors>;
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32IntegerVectors>;
- defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors, isSEWAware=true>;
} // Predicates = [HasStdExtZvknha]
let Predicates = [HasStdExtZvknhb] in {
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32I64IntegerVectors>;
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32I64IntegerVectors>;
- defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors, isSEWAware=true>;
} // Predicates = [HasStdExtZvknhb]
let Predicates = [HasStdExtZvksed] in {
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
index 7a54d2fe108080..1af89903e0068c 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
@@ -883,7 +883,8 @@ foreach mx = SchedMxList in {
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
+ foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
}
// Zvkned
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
@@ -1213,7 +1214,7 @@ defm "" : LMULReadAdvance<"ReadVGMULV", 0>;
// Zvknha or Zvknhb
defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
-defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
// Zvkned
defm "" : LMULReadAdvance<"ReadVAESMVV", 0>;
defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index c685a6d2b094be..51aa003139fbad 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -791,7 +791,8 @@ foreach mx = SchedMxList in {
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
// Zvkned
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
@@ -1119,7 +1120,7 @@ defm "" : LMULReadAdvance<"ReadVGMULV", 0>;
// Zvknha or Zvknhb
defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
-defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
// Zvkned
defm "" : LMULReadAdvance<"ReadVAESMVV", 0>;
defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleZvk.td b/llvm/lib/Target/RISCV/RISCVScheduleZvk.td
index 640c456322f022..62d9bab0fac85f 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleZvk.td
@@ -36,7 +36,7 @@ defm "" : LMULSchedWrites<"WriteVGMULV">;
/// Zvknha or Zvknhb extensions
defm "" : LMULSchedWrites<"WriteVSHA2CHV">;
defm "" : LMULSchedWrites<"WriteVSHA2CLV">;
-defm "" : LMULSchedWrites<"WriteVSHA2MSV">;
+defm "" : LMULSEWSchedWrites<"WriteVSHA2MSV">;
/// Zvkned extension
defm "" : LMULSchedWrites<"WriteVAESMVV">;
@@ -79,7 +79,7 @@ defm "" : LMULSchedReads<"ReadVGMULV">;
/// Zvknha or Zvknhb extensions
defm "" : LMULSchedReads<"ReadVSHA2CHV">;
defm "" : LMULSchedReads<"ReadVSHA2CLV">;
-defm "" : LMULSchedReads<"ReadVSHA2MSV">;
+defm "" : LMULSEWSchedReads<"ReadVSHA2MSV">;
/// Zvkned extension
defm "" : LMULSchedReads<"ReadVAESMVV">;
@@ -153,11 +153,11 @@ multiclass UnsupportedSchedZvknhaOrZvknhb {
let Unsupported = true in {
defm "" : LMULWriteRes<"WriteVSHA2CHV", []>;
defm "" : LMULWriteRes<"WriteVSHA2CLV", []>;
-defm "" : LMULWriteRes<"WriteVSHA2MSV", []>;
+defm "" : LMULSEWWriteRes<"WriteVSHA2MSV", []>;
defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
-defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
}
}
|
@@ -564,7 +566,9 @@ let Predicates = [HasStdExtZvkned] in { | |||
let Predicates = [HasStdExtZvknhaOrZvknhb] in { | |||
defm PseudoVSHA2CH : VPseudoVSHA2CH; | |||
defm PseudoVSHA2CL : VPseudoVSHA2CL; | |||
defm PseudoVSHA2MS : VPseudoVSHA2MS; | |||
defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=32>; | |||
let Predicates = [HasStdExtZvknhb] in |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which let
wins here. This one or the one on line 566?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this one (line 570) wins.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…14317) The vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes. This is effectively a NFC change.
…14317) The vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes. This is effectively a NFC change.
… models (#116893) For each RVV instruction we should have a single WriteRes assignment to the worst case scheduling class. This assignment is usually equal to that of the largest LMUL + smallest SEW. My #114317 accidentally made two of these assignments on `WriteVSHA2MSV_WorstCase`. This won't affect our MachineScheduler nor most of our llvm-mca use cases (assuming you populate the correct LMUL and SEW), yet it's not ideal either. This patch fixes this issue by assigning the correct numbers and resource mapping to `WriteVSHA2MSV_WorstCase`, which is equal to that of largest LMUL + _largest_ SEW (Zvknh's scheduling properties are special). I also added a MCA test to make sure we always pick up the correct worst case numbers for P600's scheduling model. Original issue was reported by @reidtatge
The vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes.
This is effectively a NFC change.