Skip to content

[MachineBlockPlacement][X86] Use max of MDAlign and TLIAlign to align Loops. #71026

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ Changes to the CodeGen infrastructure

Changes to the Metadata Info
---------------------------------
* Added a new loop metadata `!{!"llvm.loop.align", i32 64}`

Changes to the Debug Info
---------------------------------
Expand Down
11 changes: 10 additions & 1 deletion llvm/include/llvm/CodeGen/MachineLoopInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/GenericLoopInfo.h"

Expand All @@ -57,7 +58,7 @@ class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
/// loop test. This will return the latch block if it's one of the exiting
/// blocks. Otherwise, return the exiting block. Return 'null' when
/// multiple exiting blocks are present.
MachineBasicBlock *findLoopControlBlock();
MachineBasicBlock *findLoopControlBlock() const;

/// Return the debug location of the start of this loop.
/// This looks for a BB terminating instruction with a known debug
Expand All @@ -66,6 +67,14 @@ class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
/// it returns an unknown location.
DebugLoc getStartLoc() const;

/// Find the llvm.loop metadata for this loop.
/// If each branch to the header of this loop contains the same llvm.loop
/// metadata, then this metadata node is returned. Otherwise, if any
/// latch instruction does not contain the llvm.loop metadata or
/// multiple latch instructions contain different llvm.loop metadata nodes,
/// then null is returned.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why null? Should we use the max value of alignment if multiple latch instructions contain different llvm.loop metadata nodes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is allowed to not set this loop metadata node from front end and patch wants to set a default alignment for such situation.

MDNode *getLoopID() const;

/// Returns true if the instruction is loop invariant.
/// I.e., all virtual register operands are defined outside of the loop,
/// physical registers aren't accessed explicitly, and there are no side
Expand Down
30 changes: 26 additions & 4 deletions llvm/lib/CodeGen/MachineBlockPlacement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2919,8 +2919,30 @@ void MachineBlockPlacement::alignBlocks() {
if (!L)
continue;

const Align Align = TLI->getPrefLoopAlignment(L);
if (Align == 1)
const Align TLIAlign = TLI->getPrefLoopAlignment(L);
unsigned MDAlign = 1;
MDNode *LoopID = L->getLoopID();
if (LoopID) {
for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
if (MD == nullptr)
continue;
MDString *S = dyn_cast<MDString>(MD->getOperand(0));
if (S == nullptr)
continue;
if (S->getString() == "llvm.loop.align") {
assert(MD->getNumOperands() == 2 &&
"per-loop align metadata should have two operands.");
MDAlign =
mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
assert(MDAlign >= 1 && "per-loop align value must be positive.");
}
}
}

// Use max of the TLIAlign and MDAlign
const Align LoopAlign = std::max(TLIAlign, Align(MDAlign));
if (LoopAlign == 1)
continue; // Don't care about loop alignment.

// If the block is cold relative to the function entry don't waste space
Expand Down Expand Up @@ -2959,7 +2981,7 @@ void MachineBlockPlacement::alignBlocks() {
// Force alignment if all the predecessors are jumps. We already checked
// that the block isn't cold above.
if (!LayoutPred->isSuccessor(ChainBB)) {
ChainBB->setAlignment(Align);
ChainBB->setAlignment(LoopAlign);
DetermineMaxAlignmentPadding();
continue;
}
Expand All @@ -2972,7 +2994,7 @@ void MachineBlockPlacement::alignBlocks() {
MBPI->getEdgeProbability(LayoutPred, ChainBB);
BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
if (LayoutEdgeFreq <= (Freq * ColdProb)) {
ChainBB->setAlignment(Align);
ChainBB->setAlignment(LoopAlign);
DetermineMaxAlignmentPadding();
}
}
Expand Down
49 changes: 48 additions & 1 deletion llvm/lib/CodeGen/MachineLoopInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
return BotMBB;
}

MachineBasicBlock *MachineLoop::findLoopControlBlock() {
MachineBasicBlock *MachineLoop::findLoopControlBlock() const {
if (MachineBasicBlock *Latch = getLoopLatch()) {
if (isLoopExiting(Latch))
return Latch;
Expand Down Expand Up @@ -151,6 +151,53 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,
return Preheader;
}

MDNode *MachineLoop::getLoopID() const {
MDNode *LoopID = nullptr;
if (const auto *MBB = findLoopControlBlock()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Downstream we noticed that the LoopID can't be found for some loops because findLoopControlBlock() returns the exiting block. Should this not be getLoopLatch()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From my understanding, if the Loop has one existing BB, the loop metadata is supposed to be attached there?

Copy link
Contributor

@kortbeek-snps kortbeek-snps Mar 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the quick response, @FreddyLeaf!

I don't see this reflected in the Loop::setLoopID function in the middle-end llvm/lib/Analysis/LoopInfo.cpp#L526, and the llvm.loop documentation also only mentions that the loop metadata is connected to the branch instruction in the loop latch (https://llvm.org/docs/LangRef.html#llvm-loop). Is this exception documented somewhere?

Edit: It might be best to mirror the Loop::getLoopID() implementation: llvm/lib/Analysis/LoopInfo.cpp#L502

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Emm, looks like my understanding is outdated. I agree to mirror the middle end's implementation. Would you help do a refinement on this?

// If there is a single latch block, then the metadata
// node is attached to its terminating instruction.
const auto *BB = MBB->getBasicBlock();
if (!BB)
return nullptr;
if (const auto *TI = BB->getTerminator())
LoopID = TI->getMetadata(LLVMContext::MD_loop);
} else if (const auto *MBB = getHeader()) {
// There seem to be multiple latch blocks, so we have to
// visit all predecessors of the loop header and check
// their terminating instructions for the metadata.
if (const auto *Header = MBB->getBasicBlock()) {
// Walk over all blocks in the loop.
for (const auto *MBB : this->blocks()) {
const auto *BB = MBB->getBasicBlock();
if (!BB)
return nullptr;
const auto *TI = BB->getTerminator();
if (!TI)
return nullptr;
MDNode *MD = nullptr;
// Check if this terminating instruction jumps to the loop header.
for (const auto *Succ : successors(TI)) {
if (Succ == Header) {
// This is a jump to the header - gather the metadata from it.
MD = TI->getMetadata(LLVMContext::MD_loop);
break;
}
}
if (!MD)
return nullptr;
if (!LoopID)
LoopID = MD;
else if (MD != LoopID)
return nullptr;
}
}
}
if (LoopID &&
(LoopID->getNumOperands() == 0 || LoopID->getOperand(0) != LoopID))
LoopID = nullptr;
return LoopID;
}

bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
MachineFunction *MF = I.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
Expand Down
105 changes: 105 additions & 0 deletions llvm/test/CodeGen/X86/code-align-loops.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s -check-prefixes=CHECK,ALIGN
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=32 | FileCheck %s -check-prefixes=CHECK,ALIGN32
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=256 | FileCheck %s -check-prefixes=CHECK,ALIGN256

; This test is to check if .p2align can be correctly generated by considerring
; 1. -align-loops=N from llc option
; 2. loop metadata node !{!"llvm.loop.align", i32 64}
; The test IR is generated from below simple C file:
; $ clang -S -emit-llvm loop.c
; $ cat loop.c
; void bar(void);
; void var(void);
; void foo(int a) {
; for (int i = 0; i < a; ++i)
; bar();
; for (int i = 0; i < a; ++i)
; var();
; }
; The difference between test1 and test2 is test2 only set one loop metadata node for the second loop.

; CHECK-LABEL: test1:
; ALIGN: .p2align 6, 0x90
; ALIGN-NEXT: .LBB0_2: # %for.body
; ALIGN: .p2align 9, 0x90
; ALIGN-NEXT: .LBB0_3: # %for.body

; ALIGN32: .p2align 6, 0x90
; ALIGN32-NEXT: .LBB0_2: # %for.body
; ALIGN32: .p2align 9, 0x90
; ALIGN32-NEXT: .LBB0_3: # %for.body

; ALIGN256: .p2align 8, 0x90
; ALIGN256-NEXT: .LBB0_2: # %for.body
; ALIGN256: .p2align 9, 0x90
; ALIGN256-NEXT: .LBB0_3: # %for.body

define void @test1(i32 %a) nounwind {
entry:
%cmp12 = icmp sgt i32 %a, 0
br i1 %cmp12, label %for.body, label %for.cond.cleanup4

for.body: ; preds = %entry, %for.body
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
tail call void @bar()
%inc = add nuw nsw i32 %i.013, 1
%exitcond.not = icmp eq i32 %inc, %a
br i1 %exitcond.not, label %for.body5, label %for.body, !llvm.loop !0

for.cond.cleanup4: ; preds = %for.body5, %entry
ret void

for.body5: ; preds = %for.body, %for.body5
%i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
tail call void @var()
%inc7 = add nuw nsw i32 %i1.015, 1
%exitcond16.not = icmp eq i32 %inc7, %a
br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
}

; CHECK-LABEL: test2:
; ALIGN: .p2align 4, 0x90
; ALIGN-NEXT: .LBB1_2: # %for.body
; ALIGN: .p2align 9, 0x90
; ALIGN-NEXT: .LBB1_3: # %for.body

; ALIGN32: .p2align 5, 0x90
; ALIGN32-NEXT: .LBB1_2: # %for.body
; ALIGN32: .p2align 9, 0x90
; ALIGN32-NEXT: .LBB1_3: # %for.body

; ALIGN256: .p2align 8, 0x90
; ALIGN256-NEXT: .LBB1_2: # %for.body
; ALIGN256: .p2align 9, 0x90
; ALIGN256-NEXT: .LBB1_3: # %for.body
define void @test2(i32 %a) nounwind {
entry:
%cmp12 = icmp sgt i32 %a, 0
br i1 %cmp12, label %for.body, label %for.cond.cleanup4

for.body: ; preds = %entry, %for.body
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
tail call void @bar()
%inc = add nuw nsw i32 %i.013, 1
%exitcond.not = icmp eq i32 %inc, %a
br i1 %exitcond.not, label %for.body5, label %for.body

for.cond.cleanup4: ; preds = %for.body5, %entry
ret void

for.body5: ; preds = %for.body, %for.body5
%i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
tail call void @var()
%inc7 = add nuw nsw i32 %i1.015, 1
%exitcond16.not = icmp eq i32 %inc7, %a
br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
}

declare void @bar()
declare void @var()

!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.align", i32 64}
!2 = distinct !{!2, !3}
!3 = !{!"llvm.loop.align", i32 512}