-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Remove support for pre-RA vsetvli insertion #110796
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesNow that LLVM 19.1.1 has been out for a while with post-vector-RA vsetvli insertion enabled by default, this proposes to remove the flag that restores the old pre-RA behaviour so we only have one configuration going forward. That flag was mainly meant as a fallback in case users ran into issues, but I haven't seen anything reported so far. Patch is 22.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110796.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index b8539a5d1add14..7f7a20edad5a5a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -179,17 +179,10 @@ bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
case RISCV::VMV_S_X:
case RISCV::VFMV_S_F:
case RISCV::VID_V:
- if (MI.getOperand(1).isUndef() &&
- /* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl
- and vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
- i.e. -riscv-vsetvl-after-rvv-regalloc=true */
- !MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
- return true;
- break;
+ return MI.getOperand(1).isUndef();
default:
- break;
+ return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
- return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 2dcac1320417c2..488a11c1a58e0a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -99,11 +99,6 @@ static cl::opt<bool> EnableMISchedLoadClustering(
cl::desc("Enable load clustering in the machine scheduler"),
cl::init(true));
-static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
- "riscv-vsetvl-after-rvv-regalloc", cl::Hidden,
- cl::desc("Insert vsetvls after vector register allocation"),
- cl::init(true));
-
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -405,8 +400,7 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
- if (EnableVSETVLIAfterRVVRegAlloc)
- addPass(createRISCVInsertVSETVLIPass());
+ addPass(createRISCVInsertVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
addPass(createRISCVDeadRegisterDefinitionsPass());
@@ -416,8 +410,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
- if (EnableVSETVLIAfterRVVRegAlloc)
- addPass(createRISCVInsertVSETVLIPass());
+ addPass(createRISCVInsertVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
addPass(createRISCVDeadRegisterDefinitionsPass());
@@ -564,15 +557,6 @@ void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVInsertReadWriteCSRPass());
addPass(createRISCVInsertWriteVXRMPass());
addPass(createRISCVLandingPadSetupPass());
-
- // Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after
- // register coalescing so needVSETVLIPHI doesn't need to look through COPYs.
- if (!EnableVSETVLIAfterRVVRegAlloc) {
- if (TM->getOptLevel() == CodeGenOptLevel::None)
- insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
- else
- insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
- }
}
void RISCVPassConfig::addFastRegAlloc() {
diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll
index 4f58ccb5188d31..64c59769546fb7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/remat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll
@@ -1,53 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,POSTRA
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-vsetvl-after-rvv-regalloc=false -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,PRERA
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
define void @vid(ptr %p) {
-; POSTRA-LABEL: vid:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; POSTRA-NEXT: vid.v v8
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vid.v v8
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vid:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a1, vlenb
-; PRERA-NEXT: slli a1, a1, 3
-; PRERA-NEXT: sub sp, sp, a1
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; PRERA-NEXT: vid.v v8
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a1, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vid:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: ret
%vid = call <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(<vscale x 8 x i64> poison, i64 -1)
store volatile <vscale x 8 x i64> %vid, ptr %p
@@ -111,51 +81,22 @@ define void @vid_passthru(ptr %p, <vscale x 8 x i64> %v) {
}
define void @vmv.v.i(ptr %p) {
-; POSTRA-LABEL: vmv.v.i:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; POSTRA-NEXT: vmv.v.i v8, 1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vmv.v.i v8, 1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vmv.v.i:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a1, vlenb
-; PRERA-NEXT: slli a1, a1, 3
-; PRERA-NEXT: sub sp, sp, a1
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; PRERA-NEXT: vmv.v.i v8, 1
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a1, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vmv.v.i:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vmv.v.i v8, 1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: ret
%vmv.v.i = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 1, i64 -1)
store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p
@@ -172,66 +113,35 @@ define void @vmv.v.i(ptr %p) {
ret void
}
-; The live range of %x needs extended down to the use of vmv.v.x at the end of
-; the block.
define void @vmv.v.x_needs_extended(ptr %p, i64 %x) {
-; POSTRA-LABEL: vmv.v.x_needs_extended:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: addi sp, sp, -16
-; POSTRA-NEXT: .cfi_def_cfa_offset 16
-; POSTRA-NEXT: csrr a2, vlenb
-; POSTRA-NEXT: slli a2, a2, 3
-; POSTRA-NEXT: sub sp, sp, a2
-; POSTRA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; POSTRA-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; POSTRA-NEXT: vmv.v.x v8, a1
-; POSTRA-NEXT: addi a1, sp, 16
-; POSTRA-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: csrr a0, vlenb
-; POSTRA-NEXT: slli a0, a0, 3
-; POSTRA-NEXT: add sp, sp, a0
-; POSTRA-NEXT: addi sp, sp, 16
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vmv.v.x_needs_extended:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a2, vlenb
-; PRERA-NEXT: slli a2, a2, 3
-; PRERA-NEXT: sub sp, sp, a2
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; PRERA-NEXT: vmv.v.x v8, a1
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a1, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vmv.v.x_needs_extended:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
%vmv.v.x = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 %x, i64 -1)
store volatile <vscale x 8 x i64> %vmv.v.x, ptr %p
@@ -249,53 +159,23 @@ define void @vmv.v.x_needs_extended(ptr %p, i64 %x) {
}
define void @vmv.v.x_live(ptr %p, i64 %x) {
-; POSTRA-LABEL: vmv.v.x_live:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; POSTRA-NEXT: vmv.v.x v8, a1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vmv.v.x v8, a1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: sd a1, 0(a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vmv.v.x_live:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a2, vlenb
-; PRERA-NEXT: slli a2, a2, 3
-; PRERA-NEXT: sub sp, sp, a2
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; PRERA-NEXT: vmv.v.x v8, a1
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a2, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: sd a1, 0(a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vmv.v.x_live:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
%vmv.v.x = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 %x, i64 -1)
store volatile <vscale x 8 x i64> %vmv.v.x, ptr %p
@@ -314,53 +194,23 @@ define void @vmv.v.x_live(ptr %p, i64 %x) {
}
define void @vfmv.v.f(ptr %p, double %x) {
-; POSTRA-LABEL: vfmv.v.f:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; POSTRA-NEXT: vfmv.v.f v8, fa0
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vfmv.v.f v8, fa0
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: fsd fa0, 0(a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vfmv.v.f:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a1, vlenb
-; PRERA-NEXT: slli a1, a1, 3
-; PRERA-NEXT: sub sp, sp, a1
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; PRERA-NEXT: vfmv.v.f v8, fa0
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a1, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: fsd fa0, 0(a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vfmv.v.f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: fsd fa0, 0(a0)
+; CHECK-NEXT: ret
%vfmv.v.f = call <vscale x 8 x double> @llvm.riscv.vfmv.v.f.nxv8f64(<vscale x 8 x double> poison, double %x, i64 -1)
store volatile <vscale x 8 x double> %vfmv.v.f, ptr %p
@@ -379,53 +229,23 @@ define void @vfmv.v.f(ptr %p, double %x) {
}
define void @vmv.s.x(ptr %p, i64 %x) {
-; POSTRA-LABEL: vmv.s.x:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; POSTRA-NEXT: vmv.s.x v8, a1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vmv.s.x v8, a1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: sd a1, 0(a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vmv.s.x:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a2, vlenb
-; PRERA-NEXT: slli a2, a2, 3
-; PRERA-NEXT: sub sp, sp, a2
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; PRERA-NEXT: vmv.s.x v8, a1
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a2, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: sd a1, 0(a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vmv.s.x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT:...
[truncated]
|
19.1.1 was released yesterday. Maybe you meant 19.1.0? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Woops yes, will fix the PR description |
Now that LLVM 19.1.1 has been out for a while with post-vector-RA vsetvli insertion enabled by default, this proposes to remove the flag that restores the old pre-RA behaviour so we only have one configuration going forward. That flag was mainly meant as a fallback in case users ran into issues, but I haven't seen anything reported so far.
b5ce483
to
1321450
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/134/builds/7571 Here is the relevant piece of the build log for the reference
|
Now that LLVM 19.1.0 has been out for a while with post-vector-RA vsetvli insertion enabled by default, this proposes to remove the flag that restores the old pre-RA behaviour so we only have one configuration going forward. That flag was mainly meant as a fallback in case users ran into issues, but I haven't seen anything reported so far.
Now that LLVM 19.1.0 has been out for a while with post-vector-RA vsetvli insertion enabled by default, this proposes to remove the flag that restores the old pre-RA behaviour so we only have one configuration going forward.
That flag was mainly meant as a fallback in case users ran into issues, but I haven't seen anything reported so far.