Skip to content

Commit 82e7581

Browse files
committed
[MachineBlockPlacement][X86] Use max of MDAlign and TLIAlign to align Loops.
This patch added backend consumption on a new loop metadata: !1 = !{!"llvm.loop.align", i32 64} which is generated from clang's new loop attribute: [[clang::code_align()]] clang patch: #70762
1 parent c0d78c4 commit 82e7581

File tree

4 files changed

+190
-6
lines changed

4 files changed

+190
-6
lines changed

llvm/include/llvm/CodeGen/MachineLoopInfo.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#include "llvm/CodeGen/MachineBasicBlock.h"
3333
#include "llvm/CodeGen/MachineFunctionPass.h"
34+
#include "llvm/IR/CFG.h"
3435
#include "llvm/IR/DebugLoc.h"
3536
#include "llvm/Support/GenericLoopInfo.h"
3637

@@ -57,7 +58,7 @@ class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
5758
/// loop test. This will return the latch block if it's one of the exiting
5859
/// blocks. Otherwise, return the exiting block. Return 'null' when
5960
/// multiple exiting blocks are present.
60-
MachineBasicBlock *findLoopControlBlock();
61+
MachineBasicBlock *findLoopControlBlock() const;
6162

6263
/// Return the debug location of the start of this loop.
6364
/// This looks for a BB terminating instruction with a known debug
@@ -66,6 +67,14 @@ class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
6667
/// it returns an unknown location.
6768
DebugLoc getStartLoc() const;
6869

70+
/// \brief Find the llvm.loop metadata for this loop.
71+
/// If each branch to the header of this loop contains the same llvm.loop
72+
/// metadata, then this metadata node is returned. Otherwise, if any
73+
/// latch instruction does not contain the llvm.loop metadata or
74+
/// multiple latch instructions contain different llvm.loop metadata nodes,
75+
/// then null is returned.
76+
MDNode *getLoopID() const;
77+
6978
/// Returns true if the instruction is loop invariant.
7079
/// I.e., all virtual register operands are defined outside of the loop,
7180
/// physical registers aren't accessed explicitly, and there are no side

llvm/lib/CodeGen/MachineBlockPlacement.cpp

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2919,8 +2919,30 @@ void MachineBlockPlacement::alignBlocks() {
29192919
if (!L)
29202920
continue;
29212921

2922-
const Align Align = TLI->getPrefLoopAlignment(L);
2923-
if (Align == 1)
2922+
const Align TLIAlign = TLI->getPrefLoopAlignment(L);
2923+
unsigned MDAlign = 1;
2924+
MDNode *LoopID = L->getLoopID();
2925+
if (LoopID) {
2926+
for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
2927+
MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
2928+
if (MD == nullptr)
2929+
continue;
2930+
MDString *S = dyn_cast<MDString>(MD->getOperand(0));
2931+
if (S == nullptr)
2932+
continue;
2933+
if (S->getString() == "llvm.loop.align") {
2934+
assert(MD->getNumOperands() == 2 &&
2935+
"per-loop align metadata should have two operands.");
2936+
MDAlign =
2937+
mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
2938+
assert(MDAlign >= 1 && "per-loop align value must be positive.");
2939+
}
2940+
}
2941+
}
2942+
2943+
// Use max of the TLIAlign and MDAlign
2944+
const Align LoopAlign = std::max(TLIAlign, Align(MDAlign));
2945+
if (LoopAlign == 1)
29242946
continue; // Don't care about loop alignment.
29252947

29262948
// If the block is cold relative to the function entry don't waste space
@@ -2959,7 +2981,7 @@ void MachineBlockPlacement::alignBlocks() {
29592981
// Force alignment if all the predecessors are jumps. We already checked
29602982
// that the block isn't cold above.
29612983
if (!LayoutPred->isSuccessor(ChainBB)) {
2962-
ChainBB->setAlignment(Align);
2984+
ChainBB->setAlignment(LoopAlign);
29632985
DetermineMaxAlignmentPadding();
29642986
continue;
29652987
}
@@ -2972,7 +2994,7 @@ void MachineBlockPlacement::alignBlocks() {
29722994
MBPI->getEdgeProbability(LayoutPred, ChainBB);
29732995
BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
29742996
if (LayoutEdgeFreq <= (Freq * ColdProb)) {
2975-
ChainBB->setAlignment(Align);
2997+
ChainBB->setAlignment(LoopAlign);
29762998
DetermineMaxAlignmentPadding();
29772999
}
29783000
}

llvm/lib/CodeGen/MachineLoopInfo.cpp

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
8888
return BotMBB;
8989
}
9090

91-
MachineBasicBlock *MachineLoop::findLoopControlBlock() {
91+
MachineBasicBlock *MachineLoop::findLoopControlBlock() const {
9292
if (MachineBasicBlock *Latch = getLoopLatch()) {
9393
if (isLoopExiting(Latch))
9494
return Latch;
@@ -151,6 +151,54 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,
151151
return Preheader;
152152
}
153153

154+
MDNode *MachineLoop::getLoopID() const {
155+
MDNode *LoopID = nullptr;
156+
if (auto *MBB = findLoopControlBlock()) {
157+
// If there is a single latch block, then the metadata
158+
// node is attached to its terminating instruction.
159+
const auto *BB = MBB->getBasicBlock();
160+
if (!BB)
161+
return nullptr;
162+
if (const auto *TI = BB->getTerminator())
163+
LoopID = TI->getMetadata(LLVMContext::MD_loop);
164+
} else if (auto *MBB = getHeader()) {
165+
// There seem to be multiple latch blocks, so we have to
166+
// visit all predecessors of the loop header and check
167+
// their terminating instructions for the metadata.
168+
if (const auto *H = MBB->getBasicBlock()) {
169+
// Walk over all blocks in the loop.
170+
for (auto *MBB : this->blocks()) {
171+
const auto *BB = MBB->getBasicBlock();
172+
if (!BB)
173+
return nullptr;
174+
const auto *TI = BB->getTerminator();
175+
if (!TI)
176+
return nullptr;
177+
MDNode *MD = nullptr;
178+
// Check if this terminating instruction jumps to the loop header.
179+
for (const auto *S : successors(TI)) {
180+
if (S == H) {
181+
// This is a jump to the header - gather the metadata from it.
182+
MD = TI->getMetadata(LLVMContext::MD_loop);
183+
break;
184+
}
185+
}
186+
if (!MD)
187+
return nullptr;
188+
if (!LoopID)
189+
LoopID = MD;
190+
else if (MD != LoopID)
191+
return nullptr;
192+
}
193+
}
194+
}
195+
if (LoopID &&
196+
(LoopID->getNumOperands() == 0 || LoopID->getOperand(0) != LoopID)) {
197+
LoopID = nullptr;
198+
}
199+
return LoopID;
200+
}
201+
154202
bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
155203
MachineFunction *MF = I.getParent()->getParent();
156204
MachineRegisterInfo *MRI = &MF->getRegInfo();
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s -check-prefixes=CHECK,ALIGN
3+
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=32 | FileCheck %s -check-prefixes=CHECK,ALIGN32
4+
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=256 | FileCheck %s -check-prefixes=CHECK,ALIGN256
5+
6+
; This test is to check if .p2align can be correctly generated by considerring
7+
; 1. -align-loops=N from llc option
8+
; 2. loop metadata node !{!"llvm.loop.align", i32 64}
9+
; The test IR is generated from below simple C file:
10+
; $ clang -S -emit-llvm loop.c
11+
; $ cat loop.c
12+
; void bar();
13+
; void var();
14+
; void foo(int a) {
15+
; for (int i = 0; i < a; ++i)
16+
; bar();
17+
; for (int i = 0; i < a; ++i)
18+
; var();
19+
; }
20+
; The difference between test1 and test2 is test2 only set one loop metadata node for the second loop.
21+
22+
; CHECK-LABEL: test1:
23+
; ALIGN: .p2align 6, 0x90
24+
; ALIGN-NEXT: .LBB0_2: # %for.body
25+
; ALIGN: .p2align 9, 0x90
26+
; ALIGN-NEXT: .LBB0_3: # %for.body
27+
28+
; ALIGN32: .p2align 6, 0x90
29+
; ALIGN32-NEXT: .LBB0_2: # %for.body
30+
; ALIGN32: .p2align 9, 0x90
31+
; ALIGN32-NEXT: .LBB0_3: # %for.body
32+
33+
; ALIGN256: .p2align 8, 0x90
34+
; ALIGN256-NEXT: .LBB0_2: # %for.body
35+
; ALIGN256: .p2align 9, 0x90
36+
; ALIGN256-NEXT: .LBB0_3: # %for.body
37+
38+
define void @test1(i32 %a) nounwind {
39+
entry:
40+
%cmp12 = icmp sgt i32 %a, 0
41+
br i1 %cmp12, label %for.body, label %for.cond.cleanup4
42+
43+
for.body: ; preds = %entry, %for.body
44+
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
45+
tail call void (...) @bar()
46+
%inc = add nuw nsw i32 %i.013, 1
47+
%exitcond.not = icmp eq i32 %inc, %a
48+
br i1 %exitcond.not, label %for.body5, label %for.body, !llvm.loop !0
49+
50+
for.cond.cleanup4: ; preds = %for.body5, %entry
51+
ret void
52+
53+
for.body5: ; preds = %for.body, %for.body5
54+
%i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
55+
tail call void (...) @var()
56+
%inc7 = add nuw nsw i32 %i1.015, 1
57+
%exitcond16.not = icmp eq i32 %inc7, %a
58+
br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
59+
}
60+
61+
; CHECK-LABEL: test2:
62+
; ALIGN: .p2align 4, 0x90
63+
; ALIGN-NEXT: .LBB1_2: # %for.body
64+
; ALIGN: .p2align 9, 0x90
65+
; ALIGN-NEXT: .LBB1_3: # %for.body
66+
67+
; ALIGN32: .p2align 5, 0x90
68+
; ALIGN32-NEXT: .LBB1_2: # %for.body
69+
; ALIGN32: .p2align 9, 0x90
70+
; ALIGN32-NEXT: .LBB1_3: # %for.body
71+
72+
; ALIGN256: .p2align 8, 0x90
73+
; ALIGN256-NEXT: .LBB1_2: # %for.body
74+
; ALIGN256: .p2align 9, 0x90
75+
; ALIGN256-NEXT: .LBB1_3: # %for.body
76+
define void @test2(i32 %a) nounwind {
77+
entry:
78+
%cmp12 = icmp sgt i32 %a, 0
79+
br i1 %cmp12, label %for.body, label %for.cond.cleanup4
80+
81+
for.body: ; preds = %entry, %for.body
82+
%i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
83+
tail call void (...) @bar()
84+
%inc = add nuw nsw i32 %i.013, 1
85+
%exitcond.not = icmp eq i32 %inc, %a
86+
br i1 %exitcond.not, label %for.body5, label %for.body
87+
88+
for.cond.cleanup4: ; preds = %for.body5, %entry
89+
ret void
90+
91+
for.body5: ; preds = %for.body, %for.body5
92+
%i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
93+
tail call void (...) @var()
94+
%inc7 = add nuw nsw i32 %i1.015, 1
95+
%exitcond16.not = icmp eq i32 %inc7, %a
96+
br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
97+
}
98+
99+
declare void @bar(...)
100+
declare void @var(...)
101+
102+
!0 = distinct !{!0, !1}
103+
!1 = !{!"llvm.loop.align", i32 64}
104+
!2 = distinct !{!2, !3}
105+
!3 = !{!"llvm.loop.align", i32 512}

0 commit comments

Comments
 (0)