-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[RISCV][llvm-mca] Vector Unit Stride Loads and stores use EEW and EMU… #69409
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-tools-llvm-mca @llvm/pr-subscribers-backend-risc-v Author: Michael Maitland (michaelmaitland) Changes…L based on instruction EEW Vector Unit Stride Loads and stores EEW and EMUL depend on the EEW given in the instruction name. llvm-mca needs some help to correctly report this information. Patch is 87.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69409.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index 16f9c113e29a915..17eda5e68f796d5 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -17,6 +17,8 @@
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
+#include <numeric>
+#include <set>
#define DEBUG_TYPE "llvm-mca-riscv-custombehaviour"
@@ -185,6 +187,109 @@ RISCVInstrumentManager::createInstruments(const MCInst &Inst) {
return SmallVector<UniqueInstrument>();
}
+/// Return EMUL = (EEW / SEW) * LMUL
+inline static std::pair<unsigned, bool>
+getEMULEqualsEEWDivSEWTimesLMUL(unsigned EEW, unsigned SEW,
+ RISCVII::VLMUL VLMUL) {
+ // Calculate (EEW/SEW)*LMUL preserving fractions less than 1. Use GCD
+ // to put fraction in simplest form.
+ auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
+ unsigned Num = EEW, Denom = SEW;
+ int GCD =
+ Fractional ? std::gcd(Num, Denom * LMUL) : std::gcd(Num * LMUL, Denom);
+ Num = Fractional ? Num / GCD : Num * LMUL / GCD;
+ Denom = Fractional ? Denom * LMUL / GCD : Denom / GCD;
+ return std::make_pair(Num > Denom ? Num : Denom, Denom > Num);
+}
+
+static std::pair<uint8_t, uint8_t>
+getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, uint8_t LMUL,
+ uint8_t SEW) {
+ uint8_t EEW;
+ switch (Opcode) {
+ case RISCV::VLM_V:
+ case RISCV::VSM_V:
+ case RISCV::VLE8_V:
+ case RISCV::VSE8_V:
+ EEW = 8;
+ break;
+ case RISCV::VLE16_V:
+ case RISCV::VSE16_V:
+ EEW = 16;
+ break;
+ case RISCV::VLE32_V:
+ case RISCV::VSE32_V:
+ EEW = 32;
+ break;
+ case RISCV::VLE64_V:
+ case RISCV::VSE64_V:
+ EEW = 64;
+ break;
+ default:
+ llvm_unreachable("Opcode is not a vector unit stride load nor store");
+ }
+
+ RISCVII::VLMUL VLMUL;
+ switch (LMUL) {
+ case 0b000:
+ VLMUL = RISCVII::LMUL_1;
+ break;
+ case 0b001:
+ VLMUL = RISCVII::LMUL_2;
+ break;
+ case 0b010:
+ VLMUL = RISCVII::LMUL_4;
+ break;
+ case 0b011:
+ VLMUL = RISCVII::LMUL_8;
+ break;
+ case 0b111:
+ VLMUL = RISCVII::LMUL_F2;
+ break;
+ case 0b110:
+ VLMUL = RISCVII::LMUL_F4;
+ break;
+ case 0b101:
+ VLMUL = RISCVII::LMUL_F8;
+ break;
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("LMUL cannot be LMUL_RESERVED");
+ }
+
+ auto [EMULPart, Fractional] =
+ getEMULEqualsEEWDivSEWTimesLMUL(EEW, SEW, VLMUL);
+ assert(RISCVVType::isValidLMUL(EMULPart, Fractional) &&
+ "Unexpected EEW from instruction used with LMUL and SEW");
+
+ uint8_t EMUL;
+ switch (RISCVVType::encodeLMUL(EMULPart, Fractional)) {
+ case RISCVII::LMUL_1:
+ EMUL = 0b000;
+ break;
+ case RISCVII::LMUL_2:
+ EMUL = 0b001;
+ break;
+ case RISCVII::LMUL_4:
+ EMUL = 0b010;
+ break;
+ case RISCVII::LMUL_8:
+ EMUL = 0b011;
+ break;
+ case RISCVII::LMUL_F2:
+ EMUL = 0b111;
+ break;
+ case RISCVII::LMUL_F4:
+ EMUL = 0b110;
+ break;
+ case RISCVII::LMUL_F8:
+ EMUL = 0b101;
+ break;
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("Cannot create instrument for LMUL_RESERVED");
+ }
+ return std::make_pair(EEW, EMUL);
+}
+
unsigned RISCVInstrumentManager::getSchedClassID(
const MCInstrInfo &MCII, const MCInst &MCI,
const llvm::SmallVector<Instrument *> &IVec) const {
@@ -214,12 +319,22 @@ unsigned RISCVInstrumentManager::getSchedClassID(
// or (Opcode, LMUL, SEW) if SEW instrument is active, and depends on LMUL
// and SEW, or (Opcode, LMUL, 0) if does not depend on SEW.
uint8_t SEW = SI ? SI->getSEW() : 0;
- // Check if it depends on LMUL and SEW
- const RISCVVInversePseudosTable::PseudoInfo *RVV =
- RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW);
- // Check if it depends only on LMUL
- if (!RVV)
- RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0);
+
+ const RISCVVInversePseudosTable::PseudoInfo *RVV = nullptr;
+ if (Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V ||
+ Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V ||
+ Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V ||
+ Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V ||
+ Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V) {
+ auto [EEW, EMUL] = getEEWAndEMULForUnitStrideLoadStore(Opcode, LMUL, SEW);
+ RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, EMUL, EEW);
+ } else {
+ // Check if it depends on LMUL and SEW
+ RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW);
+ // Check if it depends only on LMUL
+ if (!RVV)
+ RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0);
+ }
// Not a RVV instr
if (!RVV) {
diff --git a/llvm/test/tools/llvm-mca/RISCV/vle-vse.s b/llvm/test/tools/llvm-mca/RISCV/vle-vse.s
new file mode 100644
index 000000000000000..15b8f854c587657
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/vle-vse.s
@@ -0,0 +1,1249 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 < %s | FileCheck %s
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e8, m8, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vle8.v v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vle8.v v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vle16.v v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vle16.v v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vle32.v v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vle32.v v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vle64.v v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vle64.v v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e8, m8, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vse8.v v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vse8.v v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vse16.v v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vse16.v v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vse32.v v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vse32.v v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vse64.v v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vse64.v v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e8, m8, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vlm.v v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vlm.v v1, (a0)
+
+vsetvli zero, zero, e8, mf8, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e8, mf4, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e8, mf2, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e8, m1, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e8, m2, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e8, m4, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e8, m8, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e16, mf4, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e16, mf2, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e16, m1, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e16, m2, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e16, m4, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e16, m8, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e32, mf2, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e32, m1, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e32, m2, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e32, m4, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e32, m8, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e64, m1, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e64, m2, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e64, m4, tu, mu
+vsm.v v1, (a0)
+vsetvli zero, zero, e64, m8, tu, mu
+vsm.v v1, (a0)
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1084
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.37
+# CHECK-NEXT: Block RThroughput: 848.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, mf8, tu, mu
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, mf4, tu, mu
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, mf2, tu, mu
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, m1, tu, mu
+# CHECK-NEXT: 1 4 2.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, m2, tu, mu
+# CHECK-NEXT: 1 4 4.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, m4, tu, mu
+# CHECK-NEXT: 1 4 8.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e8, m8, tu, mu
+# CHECK-NEXT: 1 4 16.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, m1, tu, mu
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, m2, tu, mu
+# CHECK-NEXT: 1 4 2.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, m4, tu, mu
+# CHECK-NEXT: 1 4 4.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e16, m8, tu, mu
+# CHECK-NEXT: 1 4 8.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, m1, tu, mu
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, m2, tu, mu
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, m4, tu, mu
+# CHECK-NEXT: 1 4 2.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, m8, tu, mu
+# CHECK-NEXT: 1 4 4.00 * vle8.v v1, (a0)
+# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e64, m1, tu, mu
+# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a0)
+#...
[truncated]
|
…L based on instruction EEW Vector Unit Stride Loads and stores EEW and EMUL depend on the EEW given in the instruction name. llvm-mca needs some help to correctly report this information.
f7259ad
to
90bfd7a
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nit.
(Besides, what about strided/indexed load/store? Do we need to do the same thing?)
I believe that strided/indexed load/store Sched resources are LMUL & SEW aware already since their scheduling depends on SEW. Unit Stride load/store is only LMUL aware and not SEW aware since we have not had a need to differentiate behavior depending on SEW at this point. |
Don't strided load/store still need to calculate their EMUL using (EEW/SEW)*LMUL? |
Yes. We will need to do something similar for strided load/store. I think segmented too. |
…L based on instruction EEW
Vector Unit Stride Loads and stores EEW and EMUL depend on the EEW given in the instruction name. llvm-mca needs some help to correctly report this information.