Skip to content

Commit d6bb96e

Browse files
committed
[X86] Add experimental option to separately tune alignment of innermost loops
We already have an experimental option to tune loop alignment. Its impact is very wide (and there is a suspicion that it's not always profitable). We want to have something more narrow to play with. This patch adds similar option that overrides preferred alignment for innermost loops. This is for experimental purposes, default values do not change the existing behavior. Differential Revision: https://reviews.llvm.org/D94895 Reviewed By: pengfei
1 parent a8b96ea commit d6bb96e

File tree

3 files changed

+77
-0
lines changed

3 files changed

+77
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "llvm/CodeGen/MachineFunction.h"
3636
#include "llvm/CodeGen/MachineInstrBuilder.h"
3737
#include "llvm/CodeGen/MachineJumpTableInfo.h"
38+
#include "llvm/CodeGen/MachineLoopInfo.h"
3839
#include "llvm/CodeGen/MachineModuleInfo.h"
3940
#include "llvm/CodeGen/MachineRegisterInfo.h"
4041
#include "llvm/CodeGen/TargetLowering.h"
@@ -76,6 +77,14 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
7677
" of the loop header PC will be 0)."),
7778
cl::Hidden);
7879

80+
static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
81+
"x86-experimental-pref-innermost-loop-alignment", cl::init(4),
82+
cl::desc(
83+
"Sets the preferable loop alignment for experiments (as log2 bytes) "
84+
"for innermost loops only. If specified, this option overrides "
85+
"alignment set by x86-experimental-pref-loop-alignment."),
86+
cl::Hidden);
87+
7988
static cl::opt<bool> MulConstantOptimization(
8089
"mul-constant-optimization", cl::init(true),
8190
cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -51696,3 +51705,10 @@ X86TargetLowering::getStackProbeSize(MachineFunction &MF) const {
5169651705
.getAsInteger(0, StackProbeSize);
5169751706
return StackProbeSize;
5169851707
}
51708+
51709+
Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
51710+
if (ML->isInnermost() &&
51711+
ExperimentalPrefInnermostLoopAlignment.getNumOccurrences())
51712+
return Align(1ULL << ExperimentalPrefInnermostLoopAlignment);
51713+
return TargetLowering::getPrefLoopAlignment();
51714+
}

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,8 @@ namespace llvm {
14081408
SDValue Addr, SelectionDAG &DAG)
14091409
const override;
14101410

1411+
Align getPrefLoopAlignment(MachineLoop *ML) const override;
1412+
14111413
protected:
14121414
std::pair<const TargetRegisterClass *, uint8_t>
14131415
findRepresentativeClass(const TargetRegisterInfo *TRI,
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=DEFAULT
3+
; RUN: llc < %s -mtriple=i686-pc-linux-gnu -x86-experimental-pref-innermost-loop-alignment=5 | FileCheck %s -check-prefix=ALIGN32
4+
; RUN: llc < %s -mtriple=i686-pc-linux-gnu -x86-experimental-pref-loop-alignment=5 -x86-experimental-pref-innermost-loop-alignment=6 | FileCheck %s -check-prefix=ALIGN64
5+
6+
declare void @foo()
7+
8+
define void @test(i32 %n, i32 %m) {
9+
; DEFAULT-LABEL: test:
10+
; DEFAULT: .p2align 4, 0x90
11+
; DEFAULT-NEXT: .LBB0_1: # %outer
12+
; DEFAULT-NEXT: # =>This Loop Header: Depth=1
13+
; DEFAULT-NEXT: # Child Loop BB0_2 Depth 2
14+
; DEFAULT: .p2align 4, 0x90
15+
; DEFAULT-NEXT: .LBB0_2: # %inner
16+
; DEFAULT-NEXT: # Parent Loop BB0_1 Depth=1
17+
18+
; ALIGN32-LABEL: test:
19+
; ALIGN32: .p2align 4, 0x90
20+
; ALIGN32-NEXT: .LBB0_1: # %outer
21+
; ALIGN32-NEXT: # =>This Loop Header: Depth=1
22+
; ALIGN32-NEXT: # Child Loop BB0_2 Depth 2
23+
; ALIGN32: .p2align 5, 0x90
24+
; ALIGN32-NEXT: .LBB0_2: # %inner
25+
; ALIGN32-NEXT: # Parent Loop BB0_1 Depth=1
26+
; ALIGN32-NEXT: # => This Inner Loop Header: Depth=2
27+
28+
; ALIGN64-LABEL: test:
29+
; ALIGN64: .p2align 5, 0x90
30+
; ALIGN64-NEXT: .LBB0_1: # %outer
31+
; ALIGN64-NEXT: # =>This Loop Header: Depth=1
32+
; ALIGN64-NEXT: # Child Loop BB0_2 Depth 2
33+
; ALIGN64: .p2align 6, 0x90
34+
; ALIGN64-NEXT: .LBB0_2: # %inner
35+
; ALIGN64-NEXT: # Parent Loop BB0_1 Depth=1
36+
; ALIGN64-NEXT: # => This Inner Loop Header: Depth=2
37+
38+
entry:
39+
br label %outer
40+
41+
outer:
42+
%outer.iv = phi i32 [0, %entry], [%outer.iv.next, %outer_bb]
43+
br label %inner
44+
45+
inner:
46+
%inner.iv = phi i32 [0, %outer], [%inner.iv.next, %inner]
47+
call void @foo()
48+
%inner.iv.next = add i32 %inner.iv, 1
49+
%inner.cond = icmp ne i32 %inner.iv.next, %m
50+
br i1 %inner.cond, label %inner, label %outer_bb
51+
52+
outer_bb:
53+
%outer.iv.next = add i32 %outer.iv, 1
54+
%outer.cond = icmp ne i32 %outer.iv.next, %n
55+
br i1 %outer.cond, label %outer, label %exit
56+
57+
exit:
58+
ret void
59+
}

0 commit comments

Comments
 (0)