Skip to content

[Exegesis][RISCV] Add initial RVV support #128767

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,44 @@ enum RoundingMode {
RNE = 1,
RDN = 2,
ROD = 3,
Invalid
};

inline static StringRef roundingModeToString(RoundingMode RndMode) {
switch (RndMode) {
default:
llvm_unreachable("Unknown vector fixed-point rounding mode");
case RISCVVXRndMode::RNU:
return "rnu";
case RISCVVXRndMode::RNE:
return "rne";
case RISCVVXRndMode::RDN:
return "rdn";
case RISCVVXRndMode::ROD:
return "rod";
}
}

inline static RoundingMode stringToRoundingMode(StringRef Str) {
return StringSwitch<RoundingMode>(Str)
.Case("rnu", RISCVVXRndMode::RNU)
.Case("rne", RISCVVXRndMode::RNE)
.Case("rdn", RISCVVXRndMode::RDN)
.Case("rod", RISCVVXRndMode::ROD)
.Default(RISCVVXRndMode::Invalid);
}

inline static bool isValidRoundingMode(unsigned Mode) {
switch (Mode) {
default:
return false;
case RISCVVXRndMode::RNU:
case RISCVVXRndMode::RNE:
case RISCVVXRndMode::RDN:
case RISCVVXRndMode::ROD:
return true;
}
}
} // namespace RISCVVXRndMode

//===----------------------------------------------------------------------===//
Expand Down
59 changes: 59 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | \
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 \
# RUN: --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT1

# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 | \
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 \
# RUN: --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT2

# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 | \
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 \
# RUN: --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT3

# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 | \
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 \
# RUN: --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT4

# These instructions are only eligible under the inverse throughput mode.

# LATENCY-NOT: PseudoVCOMPRESS_VM_M2_E8
# LATENCY-NOT: PseudoVCPOP_M_B32
# LATENCY-NOT: PseudoVRGATHEREI16_VV_M2_E32_M1
# LATENCY-NOT: PseudoVRGATHER_VI_M2
# LATENCY-NOT: PseudoVRGATHER_VV_M8_E32
# LATENCY-NOT: PseudoVRGATHER_VX_M4
# LATENCY-NOT: PseudoVSLIDE1UP_VX_M1
# LATENCY-NOT: PseudoVSLIDEUP_VI_M2
# LATENCY-NOT: PseudoVSLIDEUP_VX_M2
# LATENCY-NOT: PseudoVNCLIPU_WI_M2
# LATENCY-NOT: PseudoVNSRA_WI_M2
# LATENCY-NOT: PseudoVNSRL_WI_M2

# RTHROUGHPUT1: PseudoVCOMPRESS_VM_M2_E8
# RTHROUGHPUT1: PseudoVCPOP_M_B32
# RTHROUGHPUT2: PseudoVRGATHEREI16_VV_M2_E32_M1
# RTHROUGHPUT2: PseudoVRGATHER_VI_M2
# RTHROUGHPUT2: PseudoVRGATHER_VV_M8_E32
# RTHROUGHPUT2: PseudoVRGATHER_VX_M4
# RTHROUGHPUT3: PseudoVSLIDE1UP_VX_M1
# RTHROUGHPUT3: PseudoVSLIDEUP_VI_M2
# RTHROUGHPUT3: PseudoVSLIDEUP_VX_M2
# RTHROUGHPUT4: PseudoVNCLIPU_WI_M2
# RTHROUGHPUT4: PseudoVNSRA_WI_M2
# RTHROUGHPUT4: PseudoVNSRL_WI_M2
7 changes: 7 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s

# Make sure none of the config has SEW other than e32
# CHECK: PseudoVFWREDUSUM_VS_M1_E32
# CHECK: SEW: e32
# CHECK-NOT: SEW: e{{(8|16|64)}}
6 changes: 6 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK \
# RUN: --riscv-filter-config='vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s

# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e8, Policy: ta/mu}'
# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e16, Policy: ta/mu}'
# CHECK-NOT: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e(32|64), Policy: ta/mu}'
7 changes: 7 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVWREDSUMU_VS_M8_E32 --min-instructions=100 | \
# RUN: FileCheck %s

# Make sure reduction ops don't have alias between vd and vs1
# CHECK: instructions:
# CHECK-NEXT: PseudoVWREDSUMU_VS_M8_E32
# CHECK-NOT: V[[REG:[0-9]+]] V[[REG]] V{{[0-9]+}}M8 V[[REG]]
6 changes: 6 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVXOR_VX_M4 --min-instructions=100 | \
# RUN: FileCheck %s

# Make sure all def / use operands are the same in latency mode.
# CHECK: instructions:
# CHECK-NEXT: PseudoVXOR_VX_M4 V[[REG:[0-9]+]]M4 V[[REG]]M4 V[[REG]]M4 X{{.*}}
12 changes: 12 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVAADDU_VV_M1 \
# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VX
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFADD_VFPR16_M1_E16 \
# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FP

# VX: PseudoVAADDU_VV_M1
# VX: VXRM: rnu
# VX-NOT: VXRM: {{(rne|rdn|rod)}}

# FP: PseudoVFADD_VFPR16_M1_E16
# FP: FRM: dyn
# FP-NOT: FRM: {{(rtz|rdn|rup|rmm|rne)}}
33 changes: 33 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVAESDF_VS_M1_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVGHSH_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSM4K_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSM3C_VI_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSHA2MS_VV_M1_E32 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSHA2MS_VV_M2_E64 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSM3C_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --allow-empty --check-prefix=EMPTY

# Most vector crypto only supports SEW=32, except Zvknhb which also supports SEW=64
# ZVK-NOT: SEW: e{{(8|16)}}
# ZVK: SEW: e32
# ZVK-NOT: SEW: e64

# ZVKNH(A|B) can either have SEW=32 (EGW=128) or SEW=64 (EGW=256)

# ZVKNH-NOT: SEW: e{{(8|16)}}
# ZVKNH: SEW: e{{(32|64)}}

# EMPTY-NOT: SEW: e{{(8|16|32|64)}}
41 changes: 41 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVMUL_VV_MF4_MASK \
# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRAC-LMUL
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVFADD_VFPR16_M1_E16,PseudoVFADD_VV_M2_E16,PseudoVFCLASS_V_MF2 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=FP
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSEXT_VF8_M2,PseudoVZEXT_VF8_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=VEXT
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVFREDUSUM_VS_M1_E16 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=VFRED --allow-empty

# Make sure only the supported SEWs are generated for fractional LMUL.
# FRAC-LMUL: PseudoVMUL_VV_MF4_MASK
# FRAC-LMUL: SEW: e8
# FRAC-LMUL: SEW: e16
# FRAC-LMUL-NOT: SEW: e{{(32|64)}}

# Make sure only SEWs that are equal to the supported FLEN are generated
# FP: PseudoVFADD_VFPR16_M1_E16
# FP-NOT: SEW: e8
# FP: PseudoVFADD_VV_M2_E16
# FP-NOT: SEW: e8
# FP: PseudoVFCLASS_V_MF2
# FP-NOT: SEW: e8

# VS/ZEXT can only operate on SEW that will not lead to invalid EEW on the
# source operand.
# VEXT: PseudoVSEXT_VF8_M2
# VEXT-NOT: SEW: e8
# VEXT-NOT: SEW: e16
# VEXT-NOT: SEW: e32
# VEXT: SEW: e64
# VEXT: PseudoVZEXT_VF8_M2
# VEXT-NOT: SEW: e8
# VEXT-NOT: SEW: e16
# VEXT-NOT: SEW: e32
# VEXT: SEW: e64

# P470 doesn't have Zvfh so 16-bit vfredusum shouldn't exist
# VFRED-NOT: PseudoVFREDUSUM_VS_M1_E16
7 changes: 7 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --riscv-vlmax-for-vl --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s

# Only allow VLMAX for AVL when -riscv-vlmax-for-vl is present
# CHECK: PseudoVFWREDUSUM_VS_M1_E32
# CHECK: AVL: VLMAX
# CHECK-NOT: AVL: {{(simm5|<MCOperand: .*>)}}
13 changes: 13 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt
# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VFWREDUSUM
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVSSRL_VX_MF4 \
# RUN: --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt
# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VSSRL

# Make sure the correct VSETVL / VXRM write / FRM write instructions are generated
# VFWREDUSUM: vsetvli {{.*}}, zero, e32, m1, tu, ma
# VFWREDUSUM: fsrmi {{.*}}, 0x0

# VSSRL: vsetvli {{.*}}, zero, e8, mf4, tu, ma
# VSSRL: csrwi vxrm, 0x0
4 changes: 4 additions & 0 deletions llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ bool Operand::isTied() const { return TiedToIndex.has_value(); }

bool Operand::isVariable() const { return VariableIndex.has_value(); }

bool Operand::isEarlyClobber() const { return IsEarlyClobber; }

bool Operand::isMemory() const {
return isExplicit() &&
getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY;
Expand Down Expand Up @@ -115,6 +117,8 @@ Instruction::create(const MCInstrInfo &InstrInfo,
Operand Operand;
Operand.Index = OpIndex;
Operand.IsDef = (OpIndex < Description->getNumDefs());
Operand.IsEarlyClobber =
(Description->getOperandConstraint(OpIndex, MCOI::EARLY_CLOBBER) != -1);
// TODO(gchatelet): Handle isLookupPtrRegClass.
if (OpInfo.RegClass >= 0)
Operand.Tracker = &RATC.getRegisterClass(OpInfo.RegClass);
Expand Down
4 changes: 4 additions & 0 deletions llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ struct Operand {
bool isVariable() const;
bool isMemory() const;
bool isImmediate() const;
bool isEarlyClobber() const;
unsigned getIndex() const;
unsigned getTiedToIndex() const;
unsigned getVariableIndex() const;
Expand All @@ -82,6 +83,7 @@ struct Operand {
// Please use the accessors above and not the following fields.
std::optional<uint8_t> Index;
bool IsDef = false;
bool IsEarlyClobber = false;
const RegisterAliasingTracker *Tracker = nullptr; // Set for Register Op.
const MCOperandInfo *Info = nullptr; // Set for Explicit Op.
std::optional<uint8_t> TiedToIndex; // Set for Reg&Explicit Op.
Expand Down Expand Up @@ -115,6 +117,8 @@ struct Instruction {
Instruction &operator=(const Instruction &) = delete;
Instruction &operator=(Instruction &&) = delete;

unsigned getOpcode() const { return Description.getOpcode(); }

// Returns the Operand linked to this Variable.
// In case the Variable is tied, the primary (i.e. Def) Operand is returned.
const Operand &getPrimaryOperand(const Variable &Var) const;
Expand Down
2 changes: 2 additions & 0 deletions llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ set(LLVM_LINK_COMPONENTS
add_llvm_library(LLVMExegesisRISCV
DISABLE_LLVM_LINK_LLVM_DYLIB
STATIC
RISCVExegesisPreprocessing.cpp
RISCVExegesisPostprocessing.cpp
Target.cpp

DEPENDS
Expand Down
19 changes: 19 additions & 0 deletions llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//===- RISCVExegesisPasses.h - RISC-V specific Exegesis Passes --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_EXEGESIS_LIB_RISCV_RISCVEXEGESISPASSES_H
#define LLVM_TOOLS_EXEGESIS_LIB_RISCV_RISCVEXEGESISPASSES_H
namespace llvm {
class FunctionPass;

namespace exegesis {
FunctionPass *createRISCVPreprocessingPass();
FunctionPass *createRISCVPostprocessingPass();
} // namespace exegesis
} // namespace llvm
#endif
Loading
Loading