-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Remove x7 from fastcc list. #96729
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Yeting Kuo (yetingk) ChangesLike #93321, this patch also tries to solve the conflict usage of x7 for fastcc and Zicfilp. But this patch removes x7 from fastcc directly. Its purpose is to reduce the code complexity of #93321, and we also found that it at most increase 0.02% instruction count for most benchmarks and it might be benefit for benchmarks. Patch is 69.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96729.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index af3950773e4d0..bdb0ecdb27b64 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18696,13 +18696,12 @@ static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
// for save-restore libcall, so we don't use them.
static const MCPhysReg FastCCIGPRs[] = {
RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
- RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
- RISCV::X29, RISCV::X30, RISCV::X31};
+ RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29,
+ RISCV::X30, RISCV::X31};
// The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
- RISCV::X13, RISCV::X14, RISCV::X15,
- RISCV::X7};
+ RISCV::X13, RISCV::X14, RISCV::X15};
if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
return ArrayRef(FastCCEGPRs);
diff --git a/llvm/test/CodeGen/RISCV/fastcc-int.ll b/llvm/test/CodeGen/RISCV/fastcc-int.ll
index e4c41a1aa890f..75046b701b235 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-int.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-int.ll
@@ -32,16 +32,17 @@ define i32 @caller(<16 x i32> %A) nounwind {
; RV32-NEXT: lw a5, 20(a0)
; RV32-NEXT: lw a6, 24(a0)
; RV32-NEXT: lw a7, 28(a0)
-; RV32-NEXT: lw t2, 32(a0)
-; RV32-NEXT: lw t3, 36(a0)
-; RV32-NEXT: lw t4, 40(a0)
-; RV32-NEXT: lw t5, 44(a0)
-; RV32-NEXT: lw t6, 48(a0)
-; RV32-NEXT: lw t1, 52(a0)
+; RV32-NEXT: lw t3, 32(a0)
+; RV32-NEXT: lw t4, 36(a0)
+; RV32-NEXT: lw t5, 40(a0)
+; RV32-NEXT: lw t6, 44(a0)
+; RV32-NEXT: lw t1, 48(a0)
+; RV32-NEXT: lw t2, 52(a0)
; RV32-NEXT: lw s0, 56(a0)
; RV32-NEXT: lw a0, 60(a0)
-; RV32-NEXT: sw a0, 8(sp)
-; RV32-NEXT: sw s0, 4(sp)
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw s0, 8(sp)
+; RV32-NEXT: sw t2, 4(sp)
; RV32-NEXT: sw t1, 0(sp)
; RV32-NEXT: mv a0, t0
; RV32-NEXT: call callee
@@ -63,16 +64,17 @@ define i32 @caller(<16 x i32> %A) nounwind {
; RV64-NEXT: ld a5, 40(a0)
; RV64-NEXT: ld a6, 48(a0)
; RV64-NEXT: ld a7, 56(a0)
-; RV64-NEXT: ld t2, 64(a0)
-; RV64-NEXT: ld t3, 72(a0)
-; RV64-NEXT: ld t4, 80(a0)
-; RV64-NEXT: ld t5, 88(a0)
-; RV64-NEXT: ld t6, 96(a0)
-; RV64-NEXT: ld t1, 104(a0)
+; RV64-NEXT: ld t3, 64(a0)
+; RV64-NEXT: ld t4, 72(a0)
+; RV64-NEXT: ld t5, 80(a0)
+; RV64-NEXT: ld t6, 88(a0)
+; RV64-NEXT: ld t1, 96(a0)
+; RV64-NEXT: ld t2, 104(a0)
; RV64-NEXT: ld s0, 112(a0)
; RV64-NEXT: ld a0, 120(a0)
-; RV64-NEXT: sd a0, 16(sp)
-; RV64-NEXT: sd s0, 8(sp)
+; RV64-NEXT: sd a0, 24(sp)
+; RV64-NEXT: sd s0, 16(sp)
+; RV64-NEXT: sd t2, 8(sp)
; RV64-NEXT: sd t1, 0(sp)
; RV64-NEXT: mv a0, t0
; RV64-NEXT: call callee
diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index a44d31dff09cc..1dbb060fc35fa 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -288,29 +288,30 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX32-NEXT: lh t2, 196(sp)
; ZHINX32-NEXT: lh t1, 200(sp)
; ZHINX32-NEXT: lh t0, 204(sp)
-; ZHINX32-NEXT: sh t0, 36(sp)
-; ZHINX32-NEXT: sh t1, 34(sp)
-; ZHINX32-NEXT: sh t2, 32(sp)
-; ZHINX32-NEXT: sh t3, 30(sp)
-; ZHINX32-NEXT: sh ra, 28(sp)
-; ZHINX32-NEXT: sh s11, 26(sp)
-; ZHINX32-NEXT: sh s10, 24(sp)
-; ZHINX32-NEXT: sh s9, 22(sp)
-; ZHINX32-NEXT: sh s8, 20(sp)
-; ZHINX32-NEXT: sh s7, 18(sp)
-; ZHINX32-NEXT: sh s6, 16(sp)
-; ZHINX32-NEXT: sh s5, 14(sp)
-; ZHINX32-NEXT: sh s4, 12(sp)
-; ZHINX32-NEXT: sh s3, 10(sp)
-; ZHINX32-NEXT: sh s2, 8(sp)
-; ZHINX32-NEXT: sh s1, 6(sp)
-; ZHINX32-NEXT: sh s0, 4(sp)
-; ZHINX32-NEXT: sh t4, 2(sp)
-; ZHINX32-NEXT: sh t5, 0(sp)
-; ZHINX32-NEXT: lw t2, 56(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t3, 52(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t4, 48(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t5, 44(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: sh t0, 38(sp)
+; ZHINX32-NEXT: sh t1, 36(sp)
+; ZHINX32-NEXT: sh t2, 34(sp)
+; ZHINX32-NEXT: sh t3, 32(sp)
+; ZHINX32-NEXT: sh ra, 30(sp)
+; ZHINX32-NEXT: sh s11, 28(sp)
+; ZHINX32-NEXT: sh s10, 26(sp)
+; ZHINX32-NEXT: sh s9, 24(sp)
+; ZHINX32-NEXT: sh s8, 22(sp)
+; ZHINX32-NEXT: sh s7, 20(sp)
+; ZHINX32-NEXT: sh s6, 18(sp)
+; ZHINX32-NEXT: sh s5, 16(sp)
+; ZHINX32-NEXT: sh s4, 14(sp)
+; ZHINX32-NEXT: sh s3, 12(sp)
+; ZHINX32-NEXT: sh s2, 10(sp)
+; ZHINX32-NEXT: sh s1, 8(sp)
+; ZHINX32-NEXT: sh s0, 6(sp)
+; ZHINX32-NEXT: sh t4, 4(sp)
+; ZHINX32-NEXT: sh t5, 2(sp)
+; ZHINX32-NEXT: sh t6, 0(sp)
+; ZHINX32-NEXT: lw t3, 56(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw t4, 52(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw t5, 48(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw t6, 44(sp) # 4-byte Folded Reload
; ZHINX32-NEXT: call callee_half_32
; ZHINX32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload
; ZHINX32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload
@@ -372,29 +373,30 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX64-NEXT: lh t2, 344(sp)
; ZHINX64-NEXT: lh t1, 352(sp)
; ZHINX64-NEXT: lh t0, 360(sp)
-; ZHINX64-NEXT: sh t0, 36(sp)
-; ZHINX64-NEXT: sh t1, 34(sp)
-; ZHINX64-NEXT: sh t2, 32(sp)
-; ZHINX64-NEXT: sh t3, 30(sp)
-; ZHINX64-NEXT: sh ra, 28(sp)
-; ZHINX64-NEXT: sh s11, 26(sp)
-; ZHINX64-NEXT: sh s10, 24(sp)
-; ZHINX64-NEXT: sh s9, 22(sp)
-; ZHINX64-NEXT: sh s8, 20(sp)
-; ZHINX64-NEXT: sh s7, 18(sp)
-; ZHINX64-NEXT: sh s6, 16(sp)
-; ZHINX64-NEXT: sh s5, 14(sp)
-; ZHINX64-NEXT: sh s4, 12(sp)
-; ZHINX64-NEXT: sh s3, 10(sp)
-; ZHINX64-NEXT: sh s2, 8(sp)
-; ZHINX64-NEXT: sh s1, 6(sp)
-; ZHINX64-NEXT: sh s0, 4(sp)
-; ZHINX64-NEXT: sh t4, 2(sp)
-; ZHINX64-NEXT: sh t5, 0(sp)
-; ZHINX64-NEXT: ld t2, 64(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld t3, 56(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld t4, 48(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld t5, 40(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: sh t0, 38(sp)
+; ZHINX64-NEXT: sh t1, 36(sp)
+; ZHINX64-NEXT: sh t2, 34(sp)
+; ZHINX64-NEXT: sh t3, 32(sp)
+; ZHINX64-NEXT: sh ra, 30(sp)
+; ZHINX64-NEXT: sh s11, 28(sp)
+; ZHINX64-NEXT: sh s10, 26(sp)
+; ZHINX64-NEXT: sh s9, 24(sp)
+; ZHINX64-NEXT: sh s8, 22(sp)
+; ZHINX64-NEXT: sh s7, 20(sp)
+; ZHINX64-NEXT: sh s6, 18(sp)
+; ZHINX64-NEXT: sh s5, 16(sp)
+; ZHINX64-NEXT: sh s4, 14(sp)
+; ZHINX64-NEXT: sh s3, 12(sp)
+; ZHINX64-NEXT: sh s2, 10(sp)
+; ZHINX64-NEXT: sh s1, 8(sp)
+; ZHINX64-NEXT: sh s0, 6(sp)
+; ZHINX64-NEXT: sh t4, 4(sp)
+; ZHINX64-NEXT: sh t5, 2(sp)
+; ZHINX64-NEXT: sh t6, 0(sp)
+; ZHINX64-NEXT: ld t3, 64(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld t4, 56(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld t5, 48(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld t6, 40(sp) # 8-byte Folded Reload
; ZHINX64-NEXT: call callee_half_32
; ZHINX64-NEXT: ld ra, 168(sp) # 8-byte Folded Reload
; ZHINX64-NEXT: ld s0, 160(sp) # 8-byte Folded Reload
@@ -414,38 +416,38 @@ define half @caller_half_32(<32 x half> %A) nounwind {
;
; ZFINX32-LABEL: caller_half_32:
; ZFINX32: # %bb.0:
-; ZFINX32-NEXT: addi sp, sp, -144
-; ZFINX32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: addi sp, sp, -160
+; ZFINX32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw t0, 0(a0)
; ZFINX32-NEXT: lw a1, 4(a0)
-; ZFINX32-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw a1, 8(a0)
-; ZFINX32-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw a1, 12(a0)
-; ZFINX32-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw a1, 16(a0)
-; ZFINX32-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw a5, 20(a0)
; ZFINX32-NEXT: lw a6, 24(a0)
; ZFINX32-NEXT: lw a7, 28(a0)
-; ZFINX32-NEXT: lw t2, 32(a0)
-; ZFINX32-NEXT: lw t3, 36(a0)
-; ZFINX32-NEXT: lw t4, 40(a0)
-; ZFINX32-NEXT: lw t5, 44(a0)
-; ZFINX32-NEXT: lw t6, 48(a0)
-; ZFINX32-NEXT: lw t1, 52(a0)
+; ZFINX32-NEXT: lw t3, 32(a0)
+; ZFINX32-NEXT: lw t4, 36(a0)
+; ZFINX32-NEXT: lw t5, 40(a0)
+; ZFINX32-NEXT: lw t6, 44(a0)
+; ZFINX32-NEXT: lw t1, 48(a0)
+; ZFINX32-NEXT: lw t2, 52(a0)
; ZFINX32-NEXT: lw s0, 56(a0)
; ZFINX32-NEXT: lw s1, 60(a0)
; ZFINX32-NEXT: lw s2, 64(a0)
@@ -464,83 +466,84 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZFINX32-NEXT: lw a2, 116(a0)
; ZFINX32-NEXT: lw a1, 120(a0)
; ZFINX32-NEXT: lw a0, 124(a0)
-; ZFINX32-NEXT: sw a0, 72(sp)
-; ZFINX32-NEXT: sw a1, 68(sp)
-; ZFINX32-NEXT: sw a2, 64(sp)
-; ZFINX32-NEXT: sw a3, 60(sp)
-; ZFINX32-NEXT: sw a4, 56(sp)
-; ZFINX32-NEXT: sw ra, 52(sp)
-; ZFINX32-NEXT: sw s11, 48(sp)
-; ZFINX32-NEXT: sw s10, 44(sp)
-; ZFINX32-NEXT: sw s9, 40(sp)
-; ZFINX32-NEXT: sw s8, 36(sp)
-; ZFINX32-NEXT: sw s7, 32(sp)
-; ZFINX32-NEXT: sw s6, 28(sp)
-; ZFINX32-NEXT: sw s5, 24(sp)
-; ZFINX32-NEXT: sw s4, 20(sp)
-; ZFINX32-NEXT: sw s3, 16(sp)
-; ZFINX32-NEXT: sw s2, 12(sp)
-; ZFINX32-NEXT: sw s1, 8(sp)
-; ZFINX32-NEXT: sw s0, 4(sp)
+; ZFINX32-NEXT: sw a0, 76(sp)
+; ZFINX32-NEXT: sw a1, 72(sp)
+; ZFINX32-NEXT: sw a2, 68(sp)
+; ZFINX32-NEXT: sw a3, 64(sp)
+; ZFINX32-NEXT: sw a4, 60(sp)
+; ZFINX32-NEXT: sw ra, 56(sp)
+; ZFINX32-NEXT: sw s11, 52(sp)
+; ZFINX32-NEXT: sw s10, 48(sp)
+; ZFINX32-NEXT: sw s9, 44(sp)
+; ZFINX32-NEXT: sw s8, 40(sp)
+; ZFINX32-NEXT: sw s7, 36(sp)
+; ZFINX32-NEXT: sw s6, 32(sp)
+; ZFINX32-NEXT: sw s5, 28(sp)
+; ZFINX32-NEXT: sw s4, 24(sp)
+; ZFINX32-NEXT: sw s3, 20(sp)
+; ZFINX32-NEXT: sw s2, 16(sp)
+; ZFINX32-NEXT: sw s1, 12(sp)
+; ZFINX32-NEXT: sw s0, 8(sp)
+; ZFINX32-NEXT: sw t2, 4(sp)
; ZFINX32-NEXT: sw t1, 0(sp)
; ZFINX32-NEXT: mv a0, t0
-; ZFINX32-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw a2, 84(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw a3, 80(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw a4, 76(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a2, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: call callee_half_32
; ZFINX32-NEXT: lui a1, 1048560
; ZFINX32-NEXT: or a0, a0, a1
-; ZFINX32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: addi sp, sp, 144
+; ZFINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: addi sp, sp, 160
; ZFINX32-NEXT: ret
;
; ZFINX64-LABEL: caller_half_32:
; ZFINX64: # %bb.0:
-; ZFINX64-NEXT: addi sp, sp, -288
-; ZFINX64-NEXT: sd ra, 280(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s0, 272(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s1, 264(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s2, 256(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s3, 248(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s4, 240(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s5, 232(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s6, 224(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s7, 216(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s8, 208(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s9, 200(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s10, 192(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s11, 184(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: addi sp, sp, -304
+; ZFINX64-NEXT: sd ra, 296(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s0, 288(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s1, 280(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s2, 272(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s3, 264(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s4, 256(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s5, 248(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s6, 240(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s7, 232(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s8, 224(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s9, 216(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s10, 208(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s11, 200(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: ld t0, 0(a0)
; ZFINX64-NEXT: ld a1, 8(a0)
-; ZFINX64-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: ld a1, 16(a0)
-; ZFINX64-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: ld a1, 24(a0)
-; ZFINX64-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: ld a1, 32(a0)
-; ZFINX64-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: ld a5, 40(a0)
; ZFINX64-NEXT: ld a6, 48(a0)
; ZFINX64-NEXT: ld a7, 56(a0)
-; ZFINX64-NEXT: ld t2, 64(a0)
-; ZFINX64-NEXT: ld t3, 72(a0)
-; ZFINX64-NEXT: ld t4, 80(a0)
-; ZFINX64-NEXT: ld t5, 88(a0)
-; ZFINX64-NEXT: ld t6, 96(a0)
-; ZFINX64-NEXT: ld t1, 104(a0)
+; ZFINX64-NEXT: ld t3, 64(a0)
+; ZFINX64-NEXT: ld t4, 72(a0)
+; ZFINX64-NEXT: ld t5, 80(a0)
+; ZFINX64-NEXT: ld t6, 88(a0)
+; ZFINX64-NEXT: ld t1, 96(a0)
+; ZFINX64-NEXT: ld t2, 104(a0)
; ZFINX64-NEXT: ld s0, 112(a0)
; ZFINX64-NEXT: ld s1, 120(a0)
; ZFINX64-NEXT: ld s2, 128(a0)
@@ -559,83 +562,84 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZFINX64-NEXT: ld a2, 232(a0)
; ZFINX64-NEXT: ld a1, 240(a0)
; ZFINX64-NEXT: ld a0, 248(a0)
-; ZFINX64-NEXT: sd a0, 144(sp)
-; ZFINX64-NEXT: sd a1, 136(sp)
-; ZFINX64-NEXT: sd a2, 128(sp)
-; ZFINX64-NEXT: sd a3, 120(sp)
-; ZFINX64-NEXT: sd a4, 112(sp)
-; ZFINX64-NEXT: sd ra, 104(sp)
-; ZFINX64-NEXT: sd s11, 96(sp)
-; ZFINX64-NEXT: sd s10, 88(sp)
-; ZFINX64-NEXT: sd s9, 80(sp)
-; ZFINX64-NEXT: sd s8, 72(sp)
-; ZFINX64-NEXT: sd s7, 64(sp)
-; ZFINX64-NEXT: sd s6, 56(sp)
-; ZFINX64-NEXT: sd s5, 48(sp)
-; ZFINX64-NEXT: sd s4, 40(sp)
-; ZFINX64-NEXT: sd s3, 32(sp)
-; ZFINX64-NEXT: sd s2, 24(sp)
-; ZFINX64-NEXT: sd s1, 16(sp)
-; ZFINX64-NEXT: sd s0, 8(sp)
+; ZFINX64-NEXT: sd a0, 152(sp)
+; ZFINX64-NEXT: sd a1, 144(sp)
+; ZFINX64-NEXT: sd a2, 136(sp)
+; ZFINX64-NEXT: sd a3, 128(sp)
+; ZFINX64-NEXT: sd a4, 120(sp)
+; ZFINX64-NEXT: sd ra, 112(sp)
+; ZFINX64-NEXT: sd s11, 104(sp)
+; ZFINX64-NEXT: sd s10, 96(sp)
+; ZFINX64-NEXT: sd s9, 88(sp)
+; ZFINX64-NEXT: sd s8, 80(sp)
+; ZFINX64-NEXT: sd s7, 72(sp)
+; ZFINX64-NEXT: sd s6, 64(sp)
+; ZFINX64-NEXT: sd s5, 56(sp)
+; ZFINX64-NEXT: sd s4, 48(sp)
+; ZFINX64-NEXT: sd s3, 40(sp)
+; ZFINX64-NEXT: sd s2, 32(sp)
+; ZFINX64-NEXT: sd s1, 24(sp)
+; ZFINX64-NEXT: sd s0, 16(sp)
+; ZFINX64-NEXT: sd t2, 8(sp)
; ZFINX64-NEXT: sd t1, 0(sp)
; ZFINX64-NEXT: mv a0, t0
-; ZFINX64-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld a2, 168(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld a3, 160(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld a4, 152(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a2, 184(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: call callee_half_32
; ZFINX64-NEXT: lui a1, 1048560
; ZFINX64-NEXT: or a0, a0, a1
-; ZFINX64-NEXT: ld ra, 280(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s0, 272(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s1, 264(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s2, 256(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s3, 248(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s4, 240(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s5, 232(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s6, 224(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s7, 216(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s8, 208(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s9, 200(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s10, 192(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s11, 184(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: addi sp, sp, 288
+; ZFINX64-NEXT: ld ra, 296(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s0, 288(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s1, 280(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s2, 272(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s3, 264(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s4, 256(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s5, 248(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s6, 240(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s7, 232(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s8,...
[truncated]
|
@@ -18696,13 +18696,12 @@ static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) { | |||
// for save-restore libcall, so we don't use them. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please update the comment to indicate why we don't use X7.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated.
Like llvm#93321, this patch also tries to solve the conflict usage of x7 for fastcc and Zicfilp. But this patch removes x7 from fastcc directly. Its purpose is to reduce the code complexity of llvm#93321, and we also found that it at most increase 0.02% instruction count for most benchmarks and it might be benefit to overall benchmarks.
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Summary: Like #93321, this patch also tries to solve the conflict usage of x7 for fastcc and Zicfilp. But this patch removes x7 from fastcc directly. Its purpose is to reduce the code complexity of #93321, and we also found that it at most increase 0.02% instruction count for most benchmarks and it might be benefit for benchmarks. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250997
Like #93321, this patch also tries to solve the conflict usage of x7 for fastcc and Zicfilp. But this patch removes x7 from fastcc directly. Its purpose is to reduce the code complexity of #93321, and we also found that it at most increase 0.02% instruction count for most benchmarks and it might be benefit for benchmarks.