Skip to content

[Hexagon] Order objects on the stack by their alignments #81280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2688,3 +2688,67 @@ bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const {

return false;
}

namespace {
// Struct used by orderFrameObjects to help sort the stack objects.
struct HexagonFrameSortingObject {
bool IsValid = false;
unsigned Index = 0; // Index of Object into MFI list.
unsigned Size = 0;
Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
};

struct HexagonFrameSortingComparator {
inline bool operator()(const HexagonFrameSortingObject &A,
const HexagonFrameSortingObject &B) const {
return std::make_tuple(!A.IsValid, A.ObjectAlignment, A.Size) <
std::make_tuple(!B.IsValid, B.ObjectAlignment, B.Size);
}
};
} // namespace

// Sort objects on the stack by alignment value and then by size to minimize
// padding.
void HexagonFrameLowering::orderFrameObjects(
const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {

if (ObjectsToAllocate.empty())
return;

const MachineFrameInfo &MFI = MF.getFrameInfo();
int NObjects = ObjectsToAllocate.size();

// Create an array of all MFI objects.
SmallVector<HexagonFrameSortingObject> SortingObjects(
MFI.getObjectIndexEnd());

for (int i = 0, j = 0, e = MFI.getObjectIndexEnd(); i < e && j != NObjects;
++i) {
if (i != ObjectsToAllocate[j])
continue;
j++;

// A variable size object has size equal to 0. Since Hexagon sets
// getUseLocalStackAllocationBlock() to true, a local block is allocated
// earlier. This case is not handled here for now.
int Size = MFI.getObjectSize(i);
if (Size == 0)
return;

SortingObjects[i].IsValid = true;
SortingObjects[i].Index = i;
SortingObjects[i].Size = Size;
SortingObjects[i].ObjectAlignment = MFI.getObjectAlign(i);
}

// Sort objects by alignment and then by size.
llvm::stable_sort(SortingObjects, HexagonFrameSortingComparator());

// Modify the original list to represent the final order.
int i = NObjects;
for (auto &Obj : SortingObjects) {
if (i == 0)
break;
ObjectsToAllocate[--i] = Obj.Index;
}
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ class HexagonFrameLowering : public TargetFrameLowering {
explicit HexagonFrameLowering()
: TargetFrameLowering(StackGrowsDown, Align(8), 0, Align(1), true) {}

void
orderFrameObjects(const MachineFunction &MF,
SmallVectorImpl<int> &ObjectsToAllocate) const override;

// All of the prolog/epilog functionality, including saving and restoring
// callee-saved registers is handled in emitPrologue. This is to have the
// logic for shrink-wrapping in one place.
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/Hexagon/hvx-reuse-fi-base.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ define dso_local void @f2() #0 {
; CHECK-NEXT: } // 8-byte Folded Spill
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vsplat(r16)
; CHECK-NEXT: vmem(r29+#6) = v0.new
; CHECK-NEXT: vmem(r29+#2) = v0.new
; CHECK-NEXT: } // 128-byte Folded Spill
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v0,r0)
Expand All @@ -56,7 +56,7 @@ define dso_local void @f2() #0 {
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vand(q0,r17)
; CHECK-NEXT: r19 = ##g0+128
; CHECK-NEXT: vmem(r29+#7) = v0.new
; CHECK-NEXT: vmem(r29+#1) = v0.new
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r20 = ##g0
Expand All @@ -78,15 +78,15 @@ define dso_local void @f2() #0 {
; CHECK-NEXT: vmem(r20+#0) = v30
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmem(r29+#6)
; CHECK-NEXT: v0 = vmem(r29+#2)
; CHECK-NEXT: } // 128-byte Folded Reload
; CHECK-NEXT: {
; CHECK-NEXT: v1:0.h = vadd(v0.ub,v0.ub)
; CHECK-NEXT: r0 = ##g2
; CHECK-NEXT: vmem(r29+#1) = v0.new
; CHECK-NEXT: vmem(r29+#6) = v0.new
; CHECK-NEXT: } // 256-byte Folded Spill
; CHECK-NEXT: {
; CHECK-NEXT: vmem(r29+#2) = v1
; CHECK-NEXT: vmem(r29+#7) = v1
; CHECK-NEXT: } // 256-byte Folded Spill
; CHECK-NEXT: {
; CHECK-NEXT: v1:0.uw = vrmpy(v1:0.ub,r17.ub,#0)
Expand All @@ -98,10 +98,10 @@ define dso_local void @f2() #0 {
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##2147483647
; CHECK-NEXT: v0 = vmem(r29+#1)
; CHECK-NEXT: v0 = vmem(r29+#6)
; CHECK-NEXT: } // 256-byte Folded Reload
; CHECK-NEXT: {
; CHECK-NEXT: v1 = vmem(r29+#2)
; CHECK-NEXT: v1 = vmem(r29+#7)
; CHECK-NEXT: } // 256-byte Folded Reload
; CHECK-NEXT: {
; CHECK-NEXT: v1:0.uw = vrmpy(v1:0.ub,r0.ub,#1)
Expand Down Expand Up @@ -142,7 +142,7 @@ define dso_local void @f2() #0 {
; CHECK-NEXT: vmem(r20+#0) = v0
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmem(r29+#6)
; CHECK-NEXT: v0 = vmem(r29+#2)
; CHECK-NEXT: } // 128-byte Folded Reload
; CHECK-NEXT: {
; CHECK-NEXT: v1 = vmem(r29+#3)
Expand Down
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/Hexagon/order-stack-object.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; RUN: llc -march=hexagon -mattr=+hvxv68,+hvx-length128b < %s | FileCheck %s

; Check that ordering objects on the stack from the largest to the smallest has
; decreased the space allocated on the stack by 512 Bytes.

; CHECK: allocframe(r29,#2432):raw

define void @test(ptr nocapture readonly %arg, ptr nocapture writeonly %arg1, i32 %arg2) local_unnamed_addr {
bb:
%shl = shl i32 %arg2, 5
br label %bb3

bb3:
%phi = phi i32 [ 0, %bb ], [ %add13, %bb3 ]
%add = add i32 %phi, %shl
%sext = sext i32 %add to i64
%getelementptr = getelementptr float, ptr %arg, i64 %sext
%load = load <32 x float>, ptr %getelementptr, align 4
%fmul = fmul <32 x float> %load, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
%fmul4 = fmul <32 x float> %load, <float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000, float 0x3FE9884620000000>
%fmul5 = fmul <32 x float> %load, <float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000, float 0x3FA2444180000000>
%fmul6 = fmul <32 x float> %load, %fmul5
%fmul7 = fmul <32 x float> %load, %fmul6
%fadd = fadd <32 x float> %fmul4, %fmul7
%fmul8 = fmul <32 x float> %fadd, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
%call = tail call <32 x float> @llvm.exp.v32f32(<32 x float> %fmul8)
%fsub = fsub <32 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %call
%fadd9 = fadd <32 x float> %call, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
%fdiv = fdiv <32 x float> %fsub, %fadd9
%fadd10 = fadd <32 x float> %fdiv, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
%fmul11 = fmul <32 x float> %fmul, %fadd10
%getelementptr12 = getelementptr float, ptr %arg1, i64 %sext
store <32 x float> %fmul11, ptr %getelementptr12, align 128
%add13 = add nuw nsw i32 %phi, 128
%icmp = icmp ult i32 %phi, 8064
br i1 %icmp, label %bb3, label %bb14

bb14:
ret void
}

declare <32 x float> @llvm.exp.v32f32(<32 x float>)
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/Hexagon/store-imm-stack-object.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
target triple = "hexagon"

; CHECK-LABEL: test1:
; CHECK-DAG: memw(r29+#4) = ##875770417
; CHECK-DAG: memw(r29+#12) = ##875770417
; CHECK-DAG: memw(r29+#8) = #51
; CHECK-DAG: memh(r29+#12) = #50
; CHECK-DAG: memb(r29+#15) = #49
; CHECK-DAG: memh(r29+#6) = #50
; CHECK-DAG: memb(r29+#5) = #49
define void @test1() {
b0:
%v1 = alloca [1 x i8], align 1
Expand All @@ -30,9 +30,9 @@ b0:
}

; CHECK-LABEL: test2:
; CHECK-DAG: memw(r29+#208) = #51
; CHECK-DAG: memh(r29+#212) = r{{[0-9]+}}
; CHECK-DAG: memb(r29+#215) = r{{[0-9]+}}
; CHECK-DAG: memw(r29+#8) = #51
; CHECK-DAG: memh(r29+#6) = r{{[0-9]+}}
; CHECK-DAG: memb(r29+#5) = r{{[0-9]+}}
define void @test2() {
b0:
%v1 = alloca [1 x i8], align 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,31 +75,31 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: .cfi_offset r31, -4
; CHECK-NEXT: .cfi_offset r30, -8
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#20) = #0
; CHECK-NEXT: memw(r29+#4) = #0
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#16) = #0
; CHECK-NEXT: memw(r29+#16) = #1
; CHECK-NEXT: memw(r29+#8) = #0
; CHECK-NEXT: memw(r29+#8) = #1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1 = memw(r29+#16)
; CHECK-NEXT: r1 = memw(r29+#8)
; CHECK-NEXT: memw(r29+#12) = #2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#8) = #3
; CHECK-NEXT: memw(r29+#4) = #4
; CHECK-NEXT: memw(r29+#16) = #3
; CHECK-NEXT: memw(r29+#20) = #4
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = cmp.eq(r1,#0)
; CHECK-NEXT: if (p0.new) memw(r29+#8) = #3
; CHECK-NEXT: if (p0.new) memw(r29+#16) = #3
; CHECK-NEXT: if (p0.new) memw(r29+#12) = #2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (p0) memw(r29+#4) = #4
; CHECK-NEXT: if (p0) memw(r29+#16) = #1
; CHECK-NEXT: if (p0) memw(r29+#20) = #4
; CHECK-NEXT: if (p0) memw(r29+#8) = #1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (!p0) memw(r29+#8) = #1
; CHECK-NEXT: if (!p0) memw(r29+#16) = #1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
Expand All @@ -116,27 +116,27 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: .cfi_offset r31, -4
; CHECK-NEXT: .cfi_offset r30, -8
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#20) = #0
; CHECK-NEXT: memw(r29+#4) = #0
; CHECK-NEXT: memw(r0+#0) = #1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#16) = #1
; CHECK-NEXT: memw(r29+#8) = #1
; CHECK-NEXT: memw(r29+#12) = #2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#8) = #3
; CHECK-NEXT: memw(r29+#4) = #4
; CHECK-NEXT: memw(r29+#16) = #3
; CHECK-NEXT: memw(r29+#20) = #4
; CHECK-NEXT: }
; CHECK-NEXT: //# InlineAsm Start
; CHECK-NEXT: //# InlineAsm End
; CHECK-NEXT: {
; CHECK-NEXT: r0 = #0
; CHECK-NEXT: memw(r29+#16) = #1
; CHECK-NEXT: memw(r29+#8) = #1
; CHECK-NEXT: memw(r29+#12) = #2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#8) = #3
; CHECK-NEXT: memw(r29+#4) = #4
; CHECK-NEXT: memw(r29+#16) = #3
; CHECK-NEXT: memw(r29+#20) = #4
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,31 @@ define dso_local i32 @check_boundaries() #0 {
; CHECK-NEXT: .cfi_offset r31, -4
; CHECK-NEXT: .cfi_offset r30, -8
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#20) = #0
; CHECK-NEXT: memw(r29+#4) = #0
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#16) = #0
; CHECK-NEXT: memw(r29+#16) = #1
; CHECK-NEXT: memw(r29+#8) = #0
; CHECK-NEXT: memw(r29+#8) = #1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1 = memw(r29+#16)
; CHECK-NEXT: r1 = memw(r29+#8)
; CHECK-NEXT: memw(r29+#12) = #2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#8) = #3
; CHECK-NEXT: memw(r29+#4) = #4
; CHECK-NEXT: memw(r29+#16) = #3
; CHECK-NEXT: memw(r29+#20) = #4
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = cmp.eq(r1,#0)
; CHECK-NEXT: if (p0.new) memw(r29+#8) = #3
; CHECK-NEXT: if (p0.new) memw(r29+#16) = #3
; CHECK-NEXT: if (p0.new) memw(r29+#12) = #2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (p0) memw(r29+#4) = #4
; CHECK-NEXT: if (p0) memw(r29+#16) = #1
; CHECK-NEXT: if (p0) memw(r29+#20) = #4
; CHECK-NEXT: if (p0) memw(r29+#8) = #1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (!p0) memw(r29+#8) = #1
; CHECK-NEXT: if (!p0) memw(r29+#16) = #1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
Expand Down Expand Up @@ -93,27 +93,27 @@ define dso_local i32 @main() #0 {
; CHECK-NEXT: .cfi_offset r31, -4
; CHECK-NEXT: .cfi_offset r30, -8
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#20) = #0
; CHECK-NEXT: memw(r29+#4) = #0
; CHECK-NEXT: memw(r0+#0) = #1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#16) = #1
; CHECK-NEXT: memw(r29+#8) = #1
; CHECK-NEXT: memw(r29+#12) = #2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#8) = #3
; CHECK-NEXT: memw(r29+#4) = #4
; CHECK-NEXT: memw(r29+#16) = #3
; CHECK-NEXT: memw(r29+#20) = #4
; CHECK-NEXT: }
; CHECK-NEXT: //# InlineAsm Start
; CHECK-NEXT: //# InlineAsm End
; CHECK-NEXT: {
; CHECK-NEXT: r0 = #0
; CHECK-NEXT: memw(r29+#16) = #1
; CHECK-NEXT: memw(r29+#8) = #1
; CHECK-NEXT: memw(r29+#12) = #2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memw(r29+#8) = #3
; CHECK-NEXT: memw(r29+#4) = #4
; CHECK-NEXT: memw(r29+#16) = #3
; CHECK-NEXT: memw(r29+#20) = #4
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
Expand Down