-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Rematerialize vfmv.v.f #108007
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Rematerialize vfmv.v.f #108007
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesThis is the same principle as vmv.v.x in #107993, but for floats.
Full diff: https://github.com/llvm/llvm-project/pull/108007.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 325a50c9f48a1c..4d9bb401995fd9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -169,6 +169,7 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
const MachineInstr &MI) const {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ case RISCV::VFMV_V_F:
case RISCV::VMV_V_I:
case RISCV::VID_V:
if (MI.getOperand(1).isUndef() &&
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index e11f176bfe6041..572cf158e9c77f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -6557,6 +6557,7 @@ defm PseudoVFMERGE : VPseudoVMRG_FM;
//===----------------------------------------------------------------------===//
// 13.16. Vector Floating-Point Move Instruction
//===----------------------------------------------------------------------===//
+let isReMaterializable = 1 in
defm PseudoVFMV_V : VPseudoVMV_F;
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll
index 2b12249378eb1f..afaab9214fe92b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/remat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll
@@ -171,3 +171,68 @@ define void @vmv.v.i(ptr %p) {
store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p
ret void
}
+
+define void @vfmv.v.f(ptr %p, double %x) {
+; POSTRA-LABEL: vfmv.v.f:
+; POSTRA: # %bb.0:
+; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; POSTRA-NEXT: vfmv.v.f v8, fa0
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: vl8re64.v v16, (a0)
+; POSTRA-NEXT: vl8re64.v v24, (a0)
+; POSTRA-NEXT: vl8re64.v v0, (a0)
+; POSTRA-NEXT: vl8re64.v v8, (a0)
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: vs8r.v v0, (a0)
+; POSTRA-NEXT: vs8r.v v24, (a0)
+; POSTRA-NEXT: vs8r.v v16, (a0)
+; POSTRA-NEXT: vfmv.v.f v8, fa0
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: fsd fa0, 0(a0)
+; POSTRA-NEXT: ret
+;
+; PRERA-LABEL: vfmv.v.f:
+; PRERA: # %bb.0:
+; PRERA-NEXT: addi sp, sp, -16
+; PRERA-NEXT: .cfi_def_cfa_offset 16
+; PRERA-NEXT: csrr a1, vlenb
+; PRERA-NEXT: slli a1, a1, 3
+; PRERA-NEXT: sub sp, sp, a1
+; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; PRERA-NEXT: vfmv.v.f v8, fa0
+; PRERA-NEXT: vs8r.v v8, (a0)
+; PRERA-NEXT: vl8re64.v v16, (a0)
+; PRERA-NEXT: addi a1, sp, 16
+; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; PRERA-NEXT: vl8re64.v v24, (a0)
+; PRERA-NEXT: vl8re64.v v0, (a0)
+; PRERA-NEXT: vl8re64.v v16, (a0)
+; PRERA-NEXT: vs8r.v v16, (a0)
+; PRERA-NEXT: vs8r.v v0, (a0)
+; PRERA-NEXT: vs8r.v v24, (a0)
+; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; PRERA-NEXT: vs8r.v v16, (a0)
+; PRERA-NEXT: vs8r.v v8, (a0)
+; PRERA-NEXT: fsd fa0, 0(a0)
+; PRERA-NEXT: csrr a0, vlenb
+; PRERA-NEXT: slli a0, a0, 3
+; PRERA-NEXT: add sp, sp, a0
+; PRERA-NEXT: addi sp, sp, 16
+; PRERA-NEXT: ret
+ %vfmv.v.f = call <vscale x 8 x double> @llvm.riscv.vfmv.v.f.nxv8f64(<vscale x 8 x double> poison, double %x, i64 -1)
+ store volatile <vscale x 8 x double> %vfmv.v.f, ptr %p
+
+ %a = load volatile <vscale x 8 x double>, ptr %p
+ %b = load volatile <vscale x 8 x double>, ptr %p
+ %c = load volatile <vscale x 8 x double>, ptr %p
+ %d = load volatile <vscale x 8 x double>, ptr %p
+ store volatile <vscale x 8 x double> %d, ptr %p
+ store volatile <vscale x 8 x double> %c, ptr %p
+ store volatile <vscale x 8 x double> %b, ptr %p
+ store volatile <vscale x 8 x double> %a, ptr %p
+
+ store volatile <vscale x 8 x double> %vfmv.v.f, ptr %p
+ store volatile double %x, ptr %p
+ ret void
+}
|
Continuing with llvm#107993 and llvm#108007, this handles the last of the main rematerializable vector instructions. Program regalloc.NumSpills regalloc.NumReloads regalloc.NumRemats lhs rhs diff lhs rhs diff lhs rhs diff 508.namd_r 6598.00 6598.00 0.0% 15509.00 15509.00 0.0% 2387.00 2387.00 0.0% 505.mcf_r 141.00 141.00 0.0% 372.00 372.00 0.0% 36.00 36.00 0.0% 641.leela_s 356.00 356.00 0.0% 525.00 525.00 0.0% 117.00 117.00 0.0% 631.deepsjeng_s 353.00 353.00 0.0% 682.00 682.00 0.0% 124.00 124.00 0.0% 623.xalancbmk_s 1548.00 1548.00 0.0% 2466.00 2466.00 0.0% 620.00 620.00 0.0% 620.omnetpp_s 946.00 946.00 0.0% 1485.00 1485.00 0.0% 1178.00 1178.00 0.0% 605.mcf_s 141.00 141.00 0.0% 372.00 372.00 0.0% 36.00 36.00 0.0% 557.xz_r 289.00 289.00 0.0% 505.00 505.00 0.0% 172.00 172.00 0.0% 541.leela_r 356.00 356.00 0.0% 525.00 525.00 0.0% 117.00 117.00 0.0% 531.deepsjeng_r 353.00 353.00 0.0% 682.00 682.00 0.0% 124.00 124.00 0.0% 520.omnetpp_r 946.00 946.00 0.0% 1485.00 1485.00 0.0% 1178.00 1178.00 0.0% 523.xalancbmk_r 1548.00 1548.00 0.0% 2466.00 2466.00 0.0% 620.00 620.00 0.0% 619.lbm_s 68.00 68.00 0.0% 70.00 70.00 0.0% 1.00 1.00 0.0% 519.lbm_r 73.00 73.00 0.0% 75.00 75.00 0.0% 1.00 1.00 0.0% 657.xz_s 289.00 289.00 0.0% 505.00 505.00 0.0% 172.00 172.00 0.0% 511.povray_r 1937.00 1936.00 -0.1% 3629.00 3628.00 -0.0% 517.00 518.00 0.2% 502.gcc_r 12450.00 12442.00 -0.1% 27328.00 27317.00 -0.0% 9409.00 9409.00 0.0% 602.gcc_s 12450.00 12442.00 -0.1% 27328.00 27317.00 -0.0% 9409.00 9409.00 0.0% 638.imagick_s 4181.00 4178.00 -0.1% 11342.00 11338.00 -0.0% 3366.00 3368.00 0.1% 538.imagick_r 4181.00 4178.00 -0.1% 11342.00 11338.00 -0.0% 3366.00 3368.00 0.1% 500.perlbench_r 4178.00 4175.00 -0.1% 9162.00 9159.00 -0.0% 2410.00 2410.00 0.0% 600.perlbench_s 4178.00 4175.00 -0.1% 9162.00 9159.00 -0.0% 2410.00 2410.00 0.0% 525.x264_r 1886.00 1884.00 -0.1% 4561.00 4559.00 -0.0% 471.00 471.00 0.0% 625.x264_s 1886.00 1884.00 -0.1% 4561.00 4559.00 -0.0% 471.00 471.00 0.0% 510.parest_r 42740.00 42689.00 -0.1% 82400.00 82252.00 -0.2% 5612.00 5620.00 0.1% 644.nab_s 753.00 752.00 -0.1% 1183.00 1182.00 -0.1% 318.00 318.00 0.0% 544.nab_r 753.00 752.00 -0.1% 1183.00 1182.00 -0.1% 318.00 318.00 0.0% 526.blender_r 13105.00 13084.00 -0.2% 26478.00 26442.00 -0.1% 18991.00 18989.00 -0.0% Geomean difference -0.0% -0.0% 0.0% There's an extra spill in one of the test cases, but it's likely noise from the spill weights and isn't an issue in practice.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM~
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This is the same principle as vmv.v.x in llvm#107993, but for floats. Program regalloc.NumSpills regalloc.NumReloads regalloc.NumRemats lhs rhs diff lhs rhs diff lhs rhs diff 519.lbm_r 73.00 73.00 0.0% 75.00 75.00 0.0% 1.00 1.00 0.0% 544.nab_r 753.00 753.00 0.0% 1183.00 1183.00 0.0% 318.00 318.00 0.0% 619.lbm_s 68.00 68.00 0.0% 70.00 70.00 0.0% 1.00 1.00 0.0% 644.nab_s 753.00 753.00 0.0% 1183.00 1183.00 0.0% 318.00 318.00 0.0% 508.namd_r 6598.00 6597.00 -0.0% 15509.00 15503.00 -0.0% 2387.00 2393.00 0.3% 526.blender_r 13105.00 13084.00 -0.2% 26478.00 26443.00 -0.1% 18991.00 18996.00 0.0% 510.parest_r 42740.00 42665.00 -0.2% 82400.00 82309.00 -0.1% 5612.00 5648.00 0.6% 511.povray_r 1937.00 1929.00 -0.4% 3629.00 3620.00 -0.2% 517.00 525.00 1.5% 538.imagick_r 4181.00 4150.00 -0.7% 11342.00 11125.00 -1.9% 3366.00 3366.00 0.0% 638.imagick_s 4181.00 4150.00 -0.7% 11342.00 11125.00 -1.9% 3366.00 3366.00 0.0% Geomean difference -0.2% -0.4% 0.2%
287fba4
to
b19c5df
Compare
Continuing with llvm#107993 and llvm#108007, this handles the last of the main rematerializable vector instructions. Program regalloc.NumSpills regalloc.NumReloads regalloc.NumRemats lhs rhs diff lhs rhs diff lhs rhs diff 508.namd_r 6598.00 6598.00 0.0% 15509.00 15509.00 0.0% 2387.00 2387.00 0.0% 505.mcf_r 141.00 141.00 0.0% 372.00 372.00 0.0% 36.00 36.00 0.0% 641.leela_s 356.00 356.00 0.0% 525.00 525.00 0.0% 117.00 117.00 0.0% 631.deepsjeng_s 353.00 353.00 0.0% 682.00 682.00 0.0% 124.00 124.00 0.0% 623.xalancbmk_s 1548.00 1548.00 0.0% 2466.00 2466.00 0.0% 620.00 620.00 0.0% 620.omnetpp_s 946.00 946.00 0.0% 1485.00 1485.00 0.0% 1178.00 1178.00 0.0% 605.mcf_s 141.00 141.00 0.0% 372.00 372.00 0.0% 36.00 36.00 0.0% 557.xz_r 289.00 289.00 0.0% 505.00 505.00 0.0% 172.00 172.00 0.0% 541.leela_r 356.00 356.00 0.0% 525.00 525.00 0.0% 117.00 117.00 0.0% 531.deepsjeng_r 353.00 353.00 0.0% 682.00 682.00 0.0% 124.00 124.00 0.0% 520.omnetpp_r 946.00 946.00 0.0% 1485.00 1485.00 0.0% 1178.00 1178.00 0.0% 523.xalancbmk_r 1548.00 1548.00 0.0% 2466.00 2466.00 0.0% 620.00 620.00 0.0% 619.lbm_s 68.00 68.00 0.0% 70.00 70.00 0.0% 1.00 1.00 0.0% 519.lbm_r 73.00 73.00 0.0% 75.00 75.00 0.0% 1.00 1.00 0.0% 657.xz_s 289.00 289.00 0.0% 505.00 505.00 0.0% 172.00 172.00 0.0% 511.povray_r 1937.00 1936.00 -0.1% 3629.00 3628.00 -0.0% 517.00 518.00 0.2% 502.gcc_r 12450.00 12442.00 -0.1% 27328.00 27317.00 -0.0% 9409.00 9409.00 0.0% 602.gcc_s 12450.00 12442.00 -0.1% 27328.00 27317.00 -0.0% 9409.00 9409.00 0.0% 638.imagick_s 4181.00 4178.00 -0.1% 11342.00 11338.00 -0.0% 3366.00 3368.00 0.1% 538.imagick_r 4181.00 4178.00 -0.1% 11342.00 11338.00 -0.0% 3366.00 3368.00 0.1% 500.perlbench_r 4178.00 4175.00 -0.1% 9162.00 9159.00 -0.0% 2410.00 2410.00 0.0% 600.perlbench_s 4178.00 4175.00 -0.1% 9162.00 9159.00 -0.0% 2410.00 2410.00 0.0% 525.x264_r 1886.00 1884.00 -0.1% 4561.00 4559.00 -0.0% 471.00 471.00 0.0% 625.x264_s 1886.00 1884.00 -0.1% 4561.00 4559.00 -0.0% 471.00 471.00 0.0% 510.parest_r 42740.00 42689.00 -0.1% 82400.00 82252.00 -0.2% 5612.00 5620.00 0.1% 644.nab_s 753.00 752.00 -0.1% 1183.00 1182.00 -0.1% 318.00 318.00 0.0% 544.nab_r 753.00 752.00 -0.1% 1183.00 1182.00 -0.1% 318.00 318.00 0.0% 526.blender_r 13105.00 13084.00 -0.2% 26478.00 26442.00 -0.1% 18991.00 18989.00 -0.0% Geomean difference -0.0% -0.0% 0.0% There's an extra spill in one of the test cases, but it's likely noise from the spill weights and isn't an issue in practice.
This is the same principle as vmv.v.x in #107993, but for floats.