Skip to content

[RISCV] Don't use x7 as input argument for fastcc when Zicfilp enabled. #93321

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18402,7 +18402,8 @@ ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {
return ArrayRef(ArgIGPRs);
}

static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI,
bool HasZicfilp) {
// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
// for save-restore libcall, so we don't use them.
static const MCPhysReg FastCCIGPRs[] = {
Expand All @@ -18415,10 +18416,18 @@ static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
RISCV::X13, RISCV::X14, RISCV::X15,
RISCV::X7};

// Zicfilp needs needs x7(t2) as the landing pad label register.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How much do we lose if we just never use X7 for fastcc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will do some test in sifive internal.

static const MCPhysReg FastCCIGPRsNonX7[] = {
RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15,
RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31};

static const MCPhysReg FastCCEGPRsNonX7[] = {
RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15};

if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
return ArrayRef(FastCCEGPRs);
return HasZicfilp ? ArrayRef(FastCCEGPRsNonX7) : ArrayRef(FastCCEGPRs);

return ArrayRef(FastCCIGPRs);
return HasZicfilp ? ArrayRef(FastCCIGPRsNonX7) : ArrayRef(FastCCIGPRs);
}

// Pass a 2*XLEN argument that has been split into two XLEN values through
Expand Down Expand Up @@ -18962,15 +18971,16 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
RVVArgDispatcher &RVVDispatcher) {
const RISCVSubtarget &Subtarget = TLI.getSubtarget();
bool HasZicfilp = Subtarget.hasStdExtZicfilp();

if (LocVT == MVT::i32 || LocVT == MVT::i64) {
if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI, HasZicfilp))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}

const RISCVSubtarget &Subtarget = TLI.getSubtarget();

if (LocVT == MVT::f16 &&
(Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
static const MCPhysReg FPR16List[] = {
Expand Down Expand Up @@ -19014,7 +19024,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
(LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
(LocVT == MVT::f64 && Subtarget.is64Bit() &&
Subtarget.hasStdExtZdinx())) {
if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI, HasZicfilp))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
Expand Down Expand Up @@ -19049,7 +19059,8 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
} else {
// Try and pass the address via a "fast" GPR.
if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
if (unsigned GPRReg =
State.AllocateReg(getFastCCArgGPRs(ABI, HasZicfilp))) {
LocInfo = CCValAssign::Indirect;
LocVT = TLI.getSubtarget().getXLenVT();
State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
Expand Down
192 changes: 130 additions & 62 deletions llvm/test/CodeGen/RISCV/fastcc-int.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32 %s
; RUN: | FileCheck -check-prefixes=RV32,RV32-NOCFILP %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64 %s
; RUN: | FileCheck -check-prefixes=RV64,RV64-NOCFILP %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zicfilp -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32,RV32-CFILP %s
; RUN: llc -mtriple=riscv64 -mattr=+experimental-zicfilp -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64,RV64-CFILP %s

define fastcc i32 @callee(<16 x i32> %A) nounwind {
; RV32-LABEL: callee:
Expand All @@ -19,67 +23,131 @@ define fastcc i32 @callee(<16 x i32> %A) nounwind {
; With the fastcc, arguments will be passed by a0-a7 and t2-t6.
; The rest will be pushed on the stack.
define i32 @caller(<16 x i32> %A) nounwind {
; RV32-LABEL: caller:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: lw t0, 0(a0)
; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: lw a2, 8(a0)
; RV32-NEXT: lw a3, 12(a0)
; RV32-NEXT: lw a4, 16(a0)
; RV32-NEXT: lw a5, 20(a0)
; RV32-NEXT: lw a6, 24(a0)
; RV32-NEXT: lw a7, 28(a0)
; RV32-NEXT: lw t2, 32(a0)
; RV32-NEXT: lw t3, 36(a0)
; RV32-NEXT: lw t4, 40(a0)
; RV32-NEXT: lw t5, 44(a0)
; RV32-NEXT: lw t6, 48(a0)
; RV32-NEXT: lw t1, 52(a0)
; RV32-NEXT: lw s0, 56(a0)
; RV32-NEXT: lw a0, 60(a0)
; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: sw s0, 4(sp)
; RV32-NEXT: sw t1, 0(sp)
; RV32-NEXT: mv a0, t0
; RV32-NEXT: call callee
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV32-NOCFILP-LABEL: caller:
; RV32-NOCFILP: # %bb.0:
; RV32-NOCFILP-NEXT: addi sp, sp, -32
; RV32-NOCFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NOCFILP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NOCFILP-NEXT: lw t0, 0(a0)
; RV32-NOCFILP-NEXT: lw a1, 4(a0)
; RV32-NOCFILP-NEXT: lw a2, 8(a0)
; RV32-NOCFILP-NEXT: lw a3, 12(a0)
; RV32-NOCFILP-NEXT: lw a4, 16(a0)
; RV32-NOCFILP-NEXT: lw a5, 20(a0)
; RV32-NOCFILP-NEXT: lw a6, 24(a0)
; RV32-NOCFILP-NEXT: lw a7, 28(a0)
; RV32-NOCFILP-NEXT: lw t2, 32(a0)
; RV32-NOCFILP-NEXT: lw t3, 36(a0)
; RV32-NOCFILP-NEXT: lw t4, 40(a0)
; RV32-NOCFILP-NEXT: lw t5, 44(a0)
; RV32-NOCFILP-NEXT: lw t6, 48(a0)
; RV32-NOCFILP-NEXT: lw t1, 52(a0)
; RV32-NOCFILP-NEXT: lw s0, 56(a0)
; RV32-NOCFILP-NEXT: lw a0, 60(a0)
; RV32-NOCFILP-NEXT: sw a0, 8(sp)
; RV32-NOCFILP-NEXT: sw s0, 4(sp)
; RV32-NOCFILP-NEXT: sw t1, 0(sp)
; RV32-NOCFILP-NEXT: mv a0, t0
; RV32-NOCFILP-NEXT: call callee
; RV32-NOCFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NOCFILP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NOCFILP-NEXT: addi sp, sp, 32
; RV32-NOCFILP-NEXT: ret
;
; RV64-LABEL: caller:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -48
; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64-NEXT: ld t0, 0(a0)
; RV64-NEXT: ld a1, 8(a0)
; RV64-NEXT: ld a2, 16(a0)
; RV64-NEXT: ld a3, 24(a0)
; RV64-NEXT: ld a4, 32(a0)
; RV64-NEXT: ld a5, 40(a0)
; RV64-NEXT: ld a6, 48(a0)
; RV64-NEXT: ld a7, 56(a0)
; RV64-NEXT: ld t2, 64(a0)
; RV64-NEXT: ld t3, 72(a0)
; RV64-NEXT: ld t4, 80(a0)
; RV64-NEXT: ld t5, 88(a0)
; RV64-NEXT: ld t6, 96(a0)
; RV64-NEXT: ld t1, 104(a0)
; RV64-NEXT: ld s0, 112(a0)
; RV64-NEXT: ld a0, 120(a0)
; RV64-NEXT: sd a0, 16(sp)
; RV64-NEXT: sd s0, 8(sp)
; RV64-NEXT: sd t1, 0(sp)
; RV64-NEXT: mv a0, t0
; RV64-NEXT: call callee
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 48
; RV64-NEXT: ret
; RV64-NOCFILP-LABEL: caller:
; RV64-NOCFILP: # %bb.0:
; RV64-NOCFILP-NEXT: addi sp, sp, -48
; RV64-NOCFILP-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64-NOCFILP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64-NOCFILP-NEXT: ld t0, 0(a0)
; RV64-NOCFILP-NEXT: ld a1, 8(a0)
; RV64-NOCFILP-NEXT: ld a2, 16(a0)
; RV64-NOCFILP-NEXT: ld a3, 24(a0)
; RV64-NOCFILP-NEXT: ld a4, 32(a0)
; RV64-NOCFILP-NEXT: ld a5, 40(a0)
; RV64-NOCFILP-NEXT: ld a6, 48(a0)
; RV64-NOCFILP-NEXT: ld a7, 56(a0)
; RV64-NOCFILP-NEXT: ld t2, 64(a0)
; RV64-NOCFILP-NEXT: ld t3, 72(a0)
; RV64-NOCFILP-NEXT: ld t4, 80(a0)
; RV64-NOCFILP-NEXT: ld t5, 88(a0)
; RV64-NOCFILP-NEXT: ld t6, 96(a0)
; RV64-NOCFILP-NEXT: ld t1, 104(a0)
; RV64-NOCFILP-NEXT: ld s0, 112(a0)
; RV64-NOCFILP-NEXT: ld a0, 120(a0)
; RV64-NOCFILP-NEXT: sd a0, 16(sp)
; RV64-NOCFILP-NEXT: sd s0, 8(sp)
; RV64-NOCFILP-NEXT: sd t1, 0(sp)
; RV64-NOCFILP-NEXT: mv a0, t0
; RV64-NOCFILP-NEXT: call callee
; RV64-NOCFILP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NOCFILP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64-NOCFILP-NEXT: addi sp, sp, 48
; RV64-NOCFILP-NEXT: ret
;
; RV32-CFILP-LABEL: caller:
; RV32-CFILP: # %bb.0:
; RV32-CFILP-NEXT: addi sp, sp, -32
; RV32-CFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-CFILP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-CFILP-NEXT: lw t0, 0(a0)
; RV32-CFILP-NEXT: lw a1, 4(a0)
; RV32-CFILP-NEXT: lw a2, 8(a0)
; RV32-CFILP-NEXT: lw a3, 12(a0)
; RV32-CFILP-NEXT: lw a4, 16(a0)
; RV32-CFILP-NEXT: lw a5, 20(a0)
; RV32-CFILP-NEXT: lw a6, 24(a0)
; RV32-CFILP-NEXT: lw a7, 28(a0)
; RV32-CFILP-NEXT: lw t3, 32(a0)
; RV32-CFILP-NEXT: lw t4, 36(a0)
; RV32-CFILP-NEXT: lw t5, 40(a0)
; RV32-CFILP-NEXT: lw t6, 44(a0)
; RV32-CFILP-NEXT: lw t1, 48(a0)
; RV32-CFILP-NEXT: lw t2, 52(a0)
; RV32-CFILP-NEXT: lw s0, 56(a0)
; RV32-CFILP-NEXT: lw a0, 60(a0)
; RV32-CFILP-NEXT: sw a0, 12(sp)
; RV32-CFILP-NEXT: sw s0, 8(sp)
; RV32-CFILP-NEXT: sw t2, 4(sp)
; RV32-CFILP-NEXT: sw t1, 0(sp)
; RV32-CFILP-NEXT: mv a0, t0
; RV32-CFILP-NEXT: call callee
; RV32-CFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-CFILP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-CFILP-NEXT: addi sp, sp, 32
; RV32-CFILP-NEXT: ret
;
; RV64-CFILP-LABEL: caller:
; RV64-CFILP: # %bb.0:
; RV64-CFILP-NEXT: addi sp, sp, -48
; RV64-CFILP-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64-CFILP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64-CFILP-NEXT: ld t0, 0(a0)
; RV64-CFILP-NEXT: ld a1, 8(a0)
; RV64-CFILP-NEXT: ld a2, 16(a0)
; RV64-CFILP-NEXT: ld a3, 24(a0)
; RV64-CFILP-NEXT: ld a4, 32(a0)
; RV64-CFILP-NEXT: ld a5, 40(a0)
; RV64-CFILP-NEXT: ld a6, 48(a0)
; RV64-CFILP-NEXT: ld a7, 56(a0)
; RV64-CFILP-NEXT: ld t3, 64(a0)
; RV64-CFILP-NEXT: ld t4, 72(a0)
; RV64-CFILP-NEXT: ld t5, 80(a0)
; RV64-CFILP-NEXT: ld t6, 88(a0)
; RV64-CFILP-NEXT: ld t1, 96(a0)
; RV64-CFILP-NEXT: ld t2, 104(a0)
; RV64-CFILP-NEXT: ld s0, 112(a0)
; RV64-CFILP-NEXT: ld a0, 120(a0)
; RV64-CFILP-NEXT: sd a0, 24(sp)
; RV64-CFILP-NEXT: sd s0, 16(sp)
; RV64-CFILP-NEXT: sd t2, 8(sp)
; RV64-CFILP-NEXT: sd t1, 0(sp)
; RV64-CFILP-NEXT: mv a0, t0
; RV64-CFILP-NEXT: call callee
; RV64-CFILP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-CFILP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64-CFILP-NEXT: addi sp, sp, 48
; RV64-CFILP-NEXT: ret
%C = call fastcc i32 @callee(<16 x i32> %A)
ret i32 %C
}
Loading