Skip to content

Commit d5ef3f8

Browse files
authored
[RISCV] Support fastcc passing scalable vectors indirectly with no free GPRs. (#107623)
We can still pass indirectly by putting the pointer on the stack. This is what we do in the normal calling convention.
1 parent adf44d5 commit d5ef3f8

File tree

2 files changed

+142
-17
lines changed

2 files changed

+142
-17
lines changed

llvm/lib/Target/RISCV/RISCVCallingConv.cpp

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -610,25 +610,34 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
610610
if (ValVT.isFixedLengthVector())
611611
LocVT = TLI.getContainerForFixedLengthVector(LocVT);
612612
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
613-
} else {
614-
// Try and pass the address via a "fast" GPR.
615-
if (MCRegister GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
616-
LocInfo = CCValAssign::Indirect;
617-
LocVT = Subtarget.getXLenVT();
618-
State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
619-
} else if (ValVT.isFixedLengthVector()) {
620-
auto StackAlign =
621-
MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
622-
unsigned StackOffset =
623-
State.AllocateStack(ValVT.getStoreSize(), StackAlign);
624-
State.addLoc(
625-
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
626-
} else {
627-
// Can't pass scalable vectors on the stack.
628-
return true;
629-
}
613+
return false;
630614
}
631615

616+
// Try and pass the address via a "fast" GPR.
617+
if (MCRegister GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
618+
LocInfo = CCValAssign::Indirect;
619+
LocVT = Subtarget.getXLenVT();
620+
State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
621+
return false;
622+
}
623+
624+
// Pass scalable vectors indirectly by storing the pointer on the stack.
625+
if (ValVT.isScalableVector()) {
626+
LocInfo = CCValAssign::Indirect;
627+
LocVT = Subtarget.getXLenVT();
628+
unsigned XLen = Subtarget.getXLen();
629+
unsigned StackOffset = State.AllocateStack(XLen / 8, Align(XLen / 8));
630+
State.addLoc(
631+
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
632+
return false;
633+
}
634+
635+
// Pass fixed-length vectors on the stack.
636+
auto StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
637+
unsigned StackOffset =
638+
State.AllocateStack(ValVT.getStoreSize(), StackAlign);
639+
State.addLoc(
640+
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
632641
return false;
633642
}
634643

llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,3 +633,119 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
633633
%s = call fastcc <vscale x 32 x i32> @vector_arg_indirect_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 8)
634634
ret <vscale x 32 x i32> %s
635635
}
636+
637+
; Test case where we are out of registers for the vector and all GPRs are used.
638+
define fastcc <vscale x 16 x i32> @vector_arg_indirect_stack_no_gpr(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, <vscale x 16 x i32> %x, <vscale x 16 x i32> %y, <vscale x 16 x i32> %z) {
639+
; RV32-LABEL: vector_arg_indirect_stack_no_gpr:
640+
; RV32: # %bb.0:
641+
; RV32-NEXT: lw a0, 0(sp)
642+
; RV32-NEXT: vl8re32.v v16, (a0)
643+
; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
644+
; RV32-NEXT: vadd.vv v8, v8, v16
645+
; RV32-NEXT: ret
646+
;
647+
; RV64-LABEL: vector_arg_indirect_stack_no_gpr:
648+
; RV64: # %bb.0:
649+
; RV64-NEXT: ld a0, 0(sp)
650+
; RV64-NEXT: vl8re32.v v16, (a0)
651+
; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma
652+
; RV64-NEXT: vadd.vv v8, v8, v16
653+
; RV64-NEXT: ret
654+
%s = add <vscale x 16 x i32> %x, %z
655+
ret <vscale x 16 x i32> %s
656+
}
657+
658+
; Calling the function above. Ensure we pass the arguments correctly.
659+
define fastcc <vscale x 16 x i32> @pass_vector_arg_indirect_stack_no_gpr(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y, <vscale x 16 x i32> %z) {
660+
; RV32-LABEL: pass_vector_arg_indirect_stack_no_gpr:
661+
; RV32: # %bb.0:
662+
; RV32-NEXT: addi sp, sp, -80
663+
; RV32-NEXT: .cfi_def_cfa_offset 80
664+
; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
665+
; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
666+
; RV32-NEXT: sw s1, 68(sp) # 4-byte Folded Spill
667+
; RV32-NEXT: .cfi_offset ra, -4
668+
; RV32-NEXT: .cfi_offset s0, -8
669+
; RV32-NEXT: .cfi_offset s1, -12
670+
; RV32-NEXT: addi s0, sp, 80
671+
; RV32-NEXT: .cfi_def_cfa s0, 0
672+
; RV32-NEXT: csrr a0, vlenb
673+
; RV32-NEXT: slli a0, a0, 3
674+
; RV32-NEXT: sub sp, sp, a0
675+
; RV32-NEXT: andi sp, sp, -64
676+
; RV32-NEXT: mv s1, sp
677+
; RV32-NEXT: addi sp, sp, -16
678+
; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
679+
; RV32-NEXT: vmv.v.i v8, 0
680+
; RV32-NEXT: addi a0, s1, 64
681+
; RV32-NEXT: vs8r.v v8, (a0)
682+
; RV32-NEXT: li a1, 1
683+
; RV32-NEXT: li a2, 2
684+
; RV32-NEXT: li a3, 3
685+
; RV32-NEXT: li a4, 4
686+
; RV32-NEXT: li a5, 5
687+
; RV32-NEXT: li a6, 6
688+
; RV32-NEXT: li a7, 7
689+
; RV32-NEXT: li t3, 8
690+
; RV32-NEXT: li t4, 9
691+
; RV32-NEXT: li t5, 10
692+
; RV32-NEXT: li t6, 11
693+
; RV32-NEXT: sw a0, 0(sp)
694+
; RV32-NEXT: li a0, 0
695+
; RV32-NEXT: vmv.v.i v16, 0
696+
; RV32-NEXT: call vector_arg_indirect_stack_no_gpr
697+
; RV32-NEXT: addi sp, sp, 16
698+
; RV32-NEXT: addi sp, s0, -80
699+
; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
700+
; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
701+
; RV32-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
702+
; RV32-NEXT: addi sp, sp, 80
703+
; RV32-NEXT: ret
704+
;
705+
; RV64-LABEL: pass_vector_arg_indirect_stack_no_gpr:
706+
; RV64: # %bb.0:
707+
; RV64-NEXT: addi sp, sp, -96
708+
; RV64-NEXT: .cfi_def_cfa_offset 96
709+
; RV64-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
710+
; RV64-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
711+
; RV64-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
712+
; RV64-NEXT: .cfi_offset ra, -8
713+
; RV64-NEXT: .cfi_offset s0, -16
714+
; RV64-NEXT: .cfi_offset s1, -24
715+
; RV64-NEXT: addi s0, sp, 96
716+
; RV64-NEXT: .cfi_def_cfa s0, 0
717+
; RV64-NEXT: csrr a0, vlenb
718+
; RV64-NEXT: slli a0, a0, 3
719+
; RV64-NEXT: sub sp, sp, a0
720+
; RV64-NEXT: andi sp, sp, -64
721+
; RV64-NEXT: mv s1, sp
722+
; RV64-NEXT: addi sp, sp, -16
723+
; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma
724+
; RV64-NEXT: vmv.v.i v8, 0
725+
; RV64-NEXT: addi a0, s1, 64
726+
; RV64-NEXT: vs8r.v v8, (a0)
727+
; RV64-NEXT: li a1, 1
728+
; RV64-NEXT: li a2, 2
729+
; RV64-NEXT: li a3, 3
730+
; RV64-NEXT: li a4, 4
731+
; RV64-NEXT: li a5, 5
732+
; RV64-NEXT: li a6, 6
733+
; RV64-NEXT: li a7, 7
734+
; RV64-NEXT: li t3, 8
735+
; RV64-NEXT: li t4, 9
736+
; RV64-NEXT: li t5, 10
737+
; RV64-NEXT: li t6, 11
738+
; RV64-NEXT: sd a0, 0(sp)
739+
; RV64-NEXT: li a0, 0
740+
; RV64-NEXT: vmv.v.i v16, 0
741+
; RV64-NEXT: call vector_arg_indirect_stack_no_gpr
742+
; RV64-NEXT: addi sp, sp, 16
743+
; RV64-NEXT: addi sp, s0, -96
744+
; RV64-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
745+
; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
746+
; RV64-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
747+
; RV64-NEXT: addi sp, sp, 96
748+
; RV64-NEXT: ret
749+
%s = call fastcc <vscale x 16 x i32> @vector_arg_indirect_stack_no_gpr(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer)
750+
ret <vscale x 16 x i32> %s
751+
}

0 commit comments

Comments
 (0)