Skip to content

[RISCV] Make Zicclsm imply unaligned scalar and vector access #108551

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
255 changes: 128 additions & 127 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,132 @@
//
//===----------------------------------------------------------------------===//

//===----------------------------------------------------------------------===//
// LLVM specific features and extensions
//===----------------------------------------------------------------------===//

// Feature32Bit exists to mark CPUs that support RV32 to distinquish them from
// tuning CPU names.
def Feature32Bit
: SubtargetFeature<"32bit", "IsRV32", "true", "Implements RV32">;
def Feature64Bit
: SubtargetFeature<"64bit", "IsRV64", "true", "Implements RV64">;
def IsRV64 : Predicate<"Subtarget->is64Bit()">,
AssemblerPredicate<(all_of Feature64Bit),
"RV64I Base Instruction Set">;
def IsRV32 : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<(all_of (not Feature64Bit)),
"RV32I Base Instruction Set">;

defvar RV32 = DefaultMode;
def RV64 : HwMode<"+64bit", [IsRV64]>;

def FeatureRelax
: SubtargetFeature<"relax", "EnableLinkerRelax", "true",
"Enable Linker relaxation.">;

foreach i = {1-31} in
def FeatureReserveX#i :
SubtargetFeature<"reserve-x"#i, "UserReservedRegister[RISCV::X"#i#"]",
"true", "Reserve X"#i>;

def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore",
"true", "Enable save/restore.">;

def FeatureNoTrailingSeqCstFence : SubtargetFeature<"no-trailing-seq-cst-fence",
"EnableTrailingSeqCstFence",
"false",
"Disable trailing fence for seq-cst store.">;

def FeatureUnalignedScalarMem
: SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
"true", "Has reasonably performant unaligned scalar "
"loads and stores">;

def FeatureUnalignedVectorMem
: SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem",
"true", "Has reasonably performant unaligned vector "
"loads and stores">;

def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;

def FeaturePredictableSelectIsExpensive
: SubtargetFeature<"predictable-select-expensive", "PredictableSelectIsExpensive", "true",
"Prefer likely predicted branches over selects">;

def TuneOptimizedZeroStrideLoad
: SubtargetFeature<"optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
"true", "Optimized (perform fewer memory operations)"
"zero-stride vector load">;

def Experimental
: SubtargetFeature<"experimental", "HasExperimental",
"true", "Experimental intrinsics">;

// Some vector hardware implementations do not process all VLEN bits in parallel
// and instead split over multiple cycles. DLEN refers to the datapath width
// that can be done in parallel.
def TuneDLenFactor2
: SubtargetFeature<"dlen-factor-2", "DLenFactor2", "true",
"Vector unit DLEN(data path width) is half of VLEN">;

def TuneNoDefaultUnroll
: SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
"Disable default unroll preference.">;

// SiFive 7 is able to fuse integer ALU operations with a preceding branch
// instruction.
def TuneShortForwardBranchOpt
: SubtargetFeature<"short-forward-branch-opt", "HasShortForwardBranchOpt",
"true", "Enable short forward branch optimization">;
def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;

// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
def TuneNoSinkSplatOperands
: SubtargetFeature<"no-sink-splat-operands", "SinkSplatOperands",
"false", "Disable sink splat operands to enable .vx, .vf,"
".wx, and .wf instructions">;

def TunePreferWInst
: SubtargetFeature<"prefer-w-inst", "PreferWInst", "true",
"Prefer instructions with W suffix">;

def TuneConditionalCompressedMoveFusion
: SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion",
"true", "Enable branch+c.mv fusion">;
def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">;
def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()">;

def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
"SiFive 7-Series processors">;

def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
"Ventana Veyron-Series processors">;

// Assume that lock-free native-width atomics are available, even if the target
// and operating system combination would not usually provide them. The user
// is responsible for providing any necessary __sync implementations. Code
// built with this feature is not ABI-compatible with code built without this
// feature, if atomic variables are exposed across the ABI boundary.
def FeatureForcedAtomics : SubtargetFeature<
"forced-atomics", "HasForcedAtomics", "true",
"Assume that lock-free native-width atomics are available">;
def HasAtomicLdSt
: Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;

def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"AllowTaggedGlobals",
"true", "Use an instruction sequence for taking the address of a global "
"that allows a memory tag in the upper address bits">;

def FeatureForcedSWShadowStack : SubtargetFeature<
"forced-sw-shadow-stack", "HasForcedSWShadowStack", "true",
"Implement shadow stack with software.">;
def HasForcedSWShadowStack : Predicate<"Subtarget->hasForcedSWShadowStack()">;

//===----------------------------------------------------------------------===//
// RISC-V subtarget features and instruction predicates.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -104,7 +230,8 @@ def FeatureStdExtZiccif

def FeatureStdExtZicclsm
: RISCVExtension<"zicclsm", 1, 0,
"'Zicclsm' (Main Memory Supports Misaligned Loads/Stores)">;
"'Zicclsm' (Main Memory Supports Misaligned Loads/Stores)",
[FeatureUnalignedScalarMem, FeatureUnalignedVectorMem]>;

def FeatureStdExtZiccrse
: RISCVExtension<"ziccrse", 1, 0,
Expand Down Expand Up @@ -1299,129 +1426,3 @@ def HasVendorXwchc
: Predicate<"Subtarget->hasVendorXwchc()">,
AssemblerPredicate<(all_of FeatureVendorXwchc),
"'Xwchc' (WCH/QingKe additional compressed opcodes)">;

//===----------------------------------------------------------------------===//
// LLVM specific features and extensions
//===----------------------------------------------------------------------===//

// Feature32Bit exists to mark CPUs that support RV32 to distinquish them from
// tuning CPU names.
def Feature32Bit
: SubtargetFeature<"32bit", "IsRV32", "true", "Implements RV32">;
def Feature64Bit
: SubtargetFeature<"64bit", "IsRV64", "true", "Implements RV64">;
def IsRV64 : Predicate<"Subtarget->is64Bit()">,
AssemblerPredicate<(all_of Feature64Bit),
"RV64I Base Instruction Set">;
def IsRV32 : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<(all_of (not Feature64Bit)),
"RV32I Base Instruction Set">;

defvar RV32 = DefaultMode;
def RV64 : HwMode<"+64bit", [IsRV64]>;

def FeatureRelax
: SubtargetFeature<"relax", "EnableLinkerRelax", "true",
"Enable Linker relaxation.">;

foreach i = {1-31} in
def FeatureReserveX#i :
SubtargetFeature<"reserve-x"#i, "UserReservedRegister[RISCV::X"#i#"]",
"true", "Reserve X"#i>;

def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore",
"true", "Enable save/restore.">;

def FeatureNoTrailingSeqCstFence : SubtargetFeature<"no-trailing-seq-cst-fence",
"EnableTrailingSeqCstFence",
"false",
"Disable trailing fence for seq-cst store.">;

def FeatureUnalignedScalarMem
: SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
"true", "Has reasonably performant unaligned scalar "
"loads and stores">;

def FeatureUnalignedVectorMem
: SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem",
"true", "Has reasonably performant unaligned vector "
"loads and stores">;

def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;

def FeaturePredictableSelectIsExpensive
: SubtargetFeature<"predictable-select-expensive", "PredictableSelectIsExpensive", "true",
"Prefer likely predicted branches over selects">;

def TuneOptimizedZeroStrideLoad
: SubtargetFeature<"optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
"true", "Optimized (perform fewer memory operations)"
"zero-stride vector load">;

def Experimental
: SubtargetFeature<"experimental", "HasExperimental",
"true", "Experimental intrinsics">;

// Some vector hardware implementations do not process all VLEN bits in parallel
// and instead split over multiple cycles. DLEN refers to the datapath width
// that can be done in parallel.
def TuneDLenFactor2
: SubtargetFeature<"dlen-factor-2", "DLenFactor2", "true",
"Vector unit DLEN(data path width) is half of VLEN">;

def TuneNoDefaultUnroll
: SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
"Disable default unroll preference.">;

// SiFive 7 is able to fuse integer ALU operations with a preceding branch
// instruction.
def TuneShortForwardBranchOpt
: SubtargetFeature<"short-forward-branch-opt", "HasShortForwardBranchOpt",
"true", "Enable short forward branch optimization">;
def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;

// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
def TuneNoSinkSplatOperands
: SubtargetFeature<"no-sink-splat-operands", "SinkSplatOperands",
"false", "Disable sink splat operands to enable .vx, .vf,"
".wx, and .wf instructions">;

def TunePreferWInst
: SubtargetFeature<"prefer-w-inst", "PreferWInst", "true",
"Prefer instructions with W suffix">;

def TuneConditionalCompressedMoveFusion
: SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion",
"true", "Enable branch+c.mv fusion">;
def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">;
def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()">;

def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
"SiFive 7-Series processors">;

def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
"Ventana Veyron-Series processors">;

// Assume that lock-free native-width atomics are available, even if the target
// and operating system combination would not usually provide them. The user
// is responsible for providing any necessary __sync implementations. Code
// built with this feature is not ABI-compatible with code built without this
// feature, if atomic variables are exposed across the ABI boundary.
def FeatureForcedAtomics : SubtargetFeature<
"forced-atomics", "HasForcedAtomics", "true",
"Assume that lock-free native-width atomics are available">;
def HasAtomicLdSt
: Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;

def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"AllowTaggedGlobals",
"true", "Use an instruction sequence for taking the address of a global "
"that allows a memory tag in the upper address bits">;

def FeatureForcedSWShadowStack : SubtargetFeature<
"forced-sw-shadow-stack", "HasForcedSWShadowStack", "true",
"Implement shadow stack with software.">;
def HasForcedSWShadowStack : Predicate<"Subtarget->hasForcedSWShadowStack()">;
Loading