Skip to content

[PowerPC][AIX] Support ByVals with greater alignment then pointer size #93341

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 30 additions & 18 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6855,7 +6855,9 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
State.getMachineFunction().getSubtarget());
const bool IsPPC64 = Subtarget.isPPC64();
const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
const unsigned PtrSize = IsPPC64 ? 8 : 4;
const Align PtrAlign(PtrSize);
const Align StackAlign(16);
const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;

if (ValVT == MVT::f128)
Expand All @@ -6876,12 +6878,16 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
PPC::V6, PPC::V7, PPC::V8, PPC::V9,
PPC::V10, PPC::V11, PPC::V12, PPC::V13};

const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;

if (ArgFlags.isByVal()) {
if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
if (ByValAlign > StackAlign)
report_fatal_error("Pass-by-value arguments with alignment greater than "
"register width are not supported.");
"16 are not supported.");

const unsigned ByValSize = ArgFlags.getByValSize();
const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;

// An empty aggregate parameter takes up no storage and no registers,
// but needs a MemLoc for a stack slot for the formal arguments side.
Expand All @@ -6891,11 +6897,23 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
}

const unsigned StackSize = alignTo(ByValSize, PtrAlign);
unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
for (const unsigned E = Offset + StackSize; Offset < E;
Offset += PtrAlign.value()) {
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
// Shadow allocate any registers that are not properly aligned.
unsigned NextReg = State.getFirstUnallocated(GPRs);
while (NextReg != GPRs.size() &&
!isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
// Shadow allocate next registers since its aligment is not strict enough.
unsigned Reg = State.AllocateReg(GPRs);
// Allocate the stack space shadowed by said register.
State.AllocateStack(PtrSize, PtrAlign);
assert(Reg && "Alocating register unexpectedly failed.");
(void)Reg;
NextReg = State.getFirstUnallocated(GPRs);
}

const unsigned StackSize = alignTo(ByValSize, ObjAlign);
unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
if (unsigned Reg = State.AllocateReg(GPRs))
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
else {
State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
Expand All @@ -6917,12 +6935,12 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
[[fallthrough]];
case MVT::i1:
case MVT::i32: {
const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
// AIX integer arguments are always passed in register width.
if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
: CCValAssign::LocInfo::ZExt;
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
if (unsigned Reg = State.AllocateReg(GPRs))
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
else
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
Expand All @@ -6942,8 +6960,8 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));

// Reserve and initialize GPRs or initialize the PSA as required.
for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
for (unsigned I = 0; I < StoreSize; I += PtrSize) {
if (unsigned Reg = State.AllocateReg(GPRs)) {
assert(FReg && "An FPR should be available when a GPR is reserved.");
if (State.isVarArg()) {
// Successfully reserved GPRs are only initialized for vararg calls.
Expand Down Expand Up @@ -6995,9 +7013,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
}

const unsigned PtrSize = IsPPC64 ? 8 : 4;
ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;

unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
// Burn any underaligned registers and their shadowed stack space until
// we reach the required alignment.
Expand Down Expand Up @@ -7347,9 +7362,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
const MCPhysReg ArgReg = VA.getLocReg();
const PPCFrameLowering *FL = Subtarget.getFrameLowering();

if (Flags.getNonZeroByValAlign() > PtrByteSize)
report_fatal_error("Over aligned byvals not supported yet.");

const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
const int FI = MF.getFrameInfo().CreateFixedObject(
StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ entry:

declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, ptr byval(%struct.S) align 32)

; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than register width are not supported.
; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than 16 are not supported.
40 changes: 40 additions & 0 deletions llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
; RUN: FileCheck --check-prefix=32BIT %s

; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
; RUN: FileCheck --check-prefix=64BIT %s

%struct.vec_struct = type { <4 x i32> }

; Function Attrs: norecurse nounwind readonly
define i32 @vec_struct_test(i32 %i, ptr nocapture readonly byval(%struct.vec_struct) align 16 %vs) {
; 32BIT-LABEL: name: vec_struct_test
; 32BIT: bb.0.entry:
; 32BIT-NEXT: liveins: $r3, $r5, $r6, $r7, $r8
; 32BIT-NEXT: {{ $}}
; 32BIT-NEXT: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8)
; 32BIT-NEXT: STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4)
; 32BIT-NEXT: STW renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16)
; 32BIT-NEXT: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12)
; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r5, killed renamable $r3
; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3
;
; 64BIT-LABEL: name: vec_struct_test
; 64BIT: bb.0.entry:
; 64BIT-NEXT: liveins: $x3, $x5, $x6
; 64BIT-NEXT: {{ $}}
; 64BIT-NEXT: STD renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16)
; 64BIT-NEXT: STD killed renamable $x6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8)
; 64BIT-NEXT: renamable $x4 = RLDICL killed renamable $x5, 32, 32
; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4, implicit-def $x3
; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
entry:
%vsi = getelementptr inbounds i8, ptr %vs, i32 0
%0 = load <4 x i32>, ptr %vsi, align 16
%vecext = extractelement <4 x i32> %0, i32 0
%add = add nsw i32 %vecext, %i
ret i32 %add
}
49 changes: 49 additions & 0 deletions llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
; RUN: FileCheck --check-prefix=32BIT %s

; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
; RUN: FileCheck --check-prefix=64BIT %s

%struct.B = type { <8 x i16>, i32, i32, [8 x i8] }

; Function Attrs: nounwind
define i32 @caller() {
; 32BIT-LABEL: name: caller
; 32BIT: bb.0.entry:
; 32BIT-NEXT: renamable $r3 = LWZ 28, %stack.0.vs :: (load (s32) from unknown-address + 4)
; 32BIT-NEXT: STW killed renamable $r3, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 16)
; 32BIT-NEXT: renamable $r3 = LWZ 24, %stack.0.vs :: (load (s32) from %stack.0.vs + 24, align 8, basealign 16)
; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32), align 16)
; 32BIT-NEXT: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1
; 32BIT-NEXT: renamable $r10 = LWZ 20, %stack.0.vs :: (load (s32) from %stack.0.vs + 20)
; 32BIT-NEXT: renamable $r9 = LWZ 16, %stack.0.vs :: (load (s32) from %stack.0.vs + 16, align 16)
; 32BIT-NEXT: renamable $r8 = LWZ 12, %stack.0.vs :: (load (s32) from %stack.0.vs + 12)
; 32BIT-NEXT: renamable $r7 = LWZ 8, %stack.0.vs :: (load (s32) from %stack.0.vs + 8, align 8)
; 32BIT-NEXT: renamable $r6 = LWZ 4, %stack.0.vs :: (load (s32) from %stack.0.vs + 4)
; 32BIT-NEXT: renamable $r5 = LWZ 0, %stack.0.vs :: (load (s32) from %stack.0.vs, align 16)
; 32BIT-NEXT: $r3 = LI 0
; 32BIT-NEXT: BL_NOP <mcsymbol .vec_struct_test[PR]>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def $r3
; 32BIT-NEXT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1
; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3
;
; 64BIT-LABEL: name: caller
; 64BIT: bb.0.entry:
; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
; 64BIT-NEXT: renamable $x8 = LD 24, %stack.0.vs :: (load (s64) from %stack.0.vs + 24)
; 64BIT-NEXT: renamable $x7 = LD 16, %stack.0.vs :: (load (s64) from %stack.0.vs + 16, align 16)
; 64BIT-NEXT: renamable $x6 = LD 8, %stack.0.vs :: (load (s64) from %stack.0.vs + 8)
; 64BIT-NEXT: renamable $x5 = LD 0, %stack.0.vs :: (load (s64) from %stack.0.vs, align 16)
; 64BIT-NEXT: $x3 = LI8 0
; 64BIT-NEXT: BL8_NOP <mcsymbol .vec_struct_test[PR]>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x2, implicit-def $r1, implicit-def $x3
; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
entry:
%vs = alloca %struct.B, align 16
%call = tail call i32 @vec_struct_test(i32 0, ptr nonnull byval(%struct.B) align 16 %vs)
ret i32 %call
}

declare i32 @vec_struct_test(i32, ptr byval(%struct.B) align 16)
Loading