Skip to content

Commit 29ee66f

Browse files
authored
[RISCV] Macro-fusion support for veyron-v1 CPU. (#70012)
Support was added for the following fusions: auipc-addi, slli-srli, ld-add Some parts of the code became repetative, so small refactoring of existing lui-addi fusion was done.
1 parent 86fa4b2 commit 29ee66f

File tree

5 files changed

+285
-21
lines changed

5 files changed

+285
-21
lines changed

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,16 @@ def TuneLUIADDIFusion
970970
: SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
971971
"true", "Enable LUI+ADDI macrofusion">;
972972

973+
def TuneAUIPCADDIFusion
974+
: SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion",
975+
"true", "Enable AUIPC+ADDI macrofusion">;
976+
def TuneShiftedZExtFusion
977+
: SubtargetFeature<"shifted-zext-fusion", "HasShiftedZExtFusion",
978+
"true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension">;
979+
def TuneLDADDFusion
980+
: SubtargetFeature<"ld-add-fusion", "HasLDADDFusion",
981+
"true", "Enable LD+ADD macrofusion.">;
982+
973983
def TuneNoDefaultUnroll
974984
: SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
975985
"Disable default unroll preference.">;
@@ -987,9 +997,12 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
987997
[TuneNoDefaultUnroll,
988998
TuneShortForwardBranchOpt]>;
989999

990-
def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
991-
"Ventana-Veyron Series processors",
992-
[TuneLUIADDIFusion]>;
1000+
def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
1001+
"Ventana Veyron-Series processors",
1002+
[TuneLUIADDIFusion,
1003+
TuneAUIPCADDIFusion,
1004+
TuneShiftedZExtFusion,
1005+
TuneLDADDFusion]>;
9931006

9941007
// Assume that lock-free native-width atomics are available, even if the target
9951008
// and operating system combination would not usually provide them. The user

llvm/lib/Target/RISCV/RISCVMacroFusion.cpp

Lines changed: 105 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,101 @@
1818

1919
using namespace llvm;
2020

21+
static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) {
22+
if (!SecondMI.getOperand(1).isReg())
23+
return false;
24+
25+
if (SecondMI.getOperand(1).getReg() != FirstDest)
26+
return false;
27+
28+
// If the input is virtual make sure this is the only user.
29+
if (FirstDest.isVirtual()) {
30+
auto &MRI = SecondMI.getMF()->getRegInfo();
31+
return MRI.hasOneNonDBGUse(FirstDest);
32+
}
33+
34+
return SecondMI.getOperand(0).getReg() == FirstDest;
35+
}
36+
37+
// Fuse load with add:
38+
// add rd, rs1, rs2
39+
// ld rd, 0(rd)
40+
static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
41+
if (SecondMI.getOpcode() != RISCV::LD)
42+
return false;
43+
44+
if (!SecondMI.getOperand(2).isImm())
45+
return false;
46+
47+
if (SecondMI.getOperand(2).getImm() != 0)
48+
return false;
49+
50+
// Given SecondMI, when FirstMI is unspecified, we must return
51+
// if SecondMI may be part of a fused pair at all.
52+
if (!FirstMI)
53+
return true;
54+
55+
if (FirstMI->getOpcode() != RISCV::ADD)
56+
return true;
57+
58+
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
59+
}
60+
61+
// Fuse these patterns:
62+
//
63+
// slli rd, rs1, 32
64+
// srli rd, rd, x
65+
// where 0 <= x <= 32
66+
//
67+
// and
68+
//
69+
// slli rd, rs1, 48
70+
// srli rd, rd, x
71+
static bool isShiftedZExt(const MachineInstr *FirstMI,
72+
const MachineInstr &SecondMI) {
73+
if (SecondMI.getOpcode() != RISCV::SRLI)
74+
return false;
75+
76+
if (!SecondMI.getOperand(2).isImm())
77+
return false;
78+
79+
unsigned SRLIImm = SecondMI.getOperand(2).getImm();
80+
bool IsShiftBy48 = SRLIImm == 48;
81+
if (SRLIImm > 32 && !IsShiftBy48)
82+
return false;
83+
84+
// Given SecondMI, when FirstMI is unspecified, we must return
85+
// if SecondMI may be part of a fused pair at all.
86+
if (!FirstMI)
87+
return true;
88+
89+
if (FirstMI->getOpcode() != RISCV::SLLI)
90+
return false;
91+
92+
unsigned SLLIImm = FirstMI->getOperand(2).getImm();
93+
if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32))
94+
return false;
95+
96+
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
97+
}
98+
99+
// Fuse AUIPC followed by ADDI
100+
// auipc rd, imm20
101+
// addi rd, rd, imm12
102+
static bool isAUIPCADDI(const MachineInstr *FirstMI,
103+
const MachineInstr &SecondMI) {
104+
if (SecondMI.getOpcode() != RISCV::ADDI)
105+
return false;
106+
// Assume the 1st instr to be a wildcard if it is unspecified.
107+
if (!FirstMI)
108+
return true;
109+
110+
if (FirstMI->getOpcode() != RISCV::AUIPC)
111+
return false;
112+
113+
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
114+
}
115+
21116
// Fuse LUI followed by ADDI or ADDIW.
22117
// rd = imm[31:0] which decomposes to
23118
// lui rd, imm[31:12]
@@ -27,29 +122,14 @@ static bool isLUIADDI(const MachineInstr *FirstMI,
27122
if (SecondMI.getOpcode() != RISCV::ADDI &&
28123
SecondMI.getOpcode() != RISCV::ADDIW)
29124
return false;
30-
31125
// Assume the 1st instr to be a wildcard if it is unspecified.
32126
if (!FirstMI)
33127
return true;
34128

35129
if (FirstMI->getOpcode() != RISCV::LUI)
36130
return false;
37131

38-
Register FirstDest = FirstMI->getOperand(0).getReg();
39-
40-
// Destination of LUI should be the ADDI(W) source register.
41-
if (SecondMI.getOperand(1).getReg() != FirstDest)
42-
return false;
43-
44-
// If the input is virtual make sure this is the only user.
45-
if (FirstDest.isVirtual()) {
46-
auto &MRI = SecondMI.getMF()->getRegInfo();
47-
return MRI.hasOneNonDBGUse(FirstDest);
48-
}
49-
50-
// If the FirstMI destination is non-virtual, it should match the SecondMI
51-
// destination.
52-
return SecondMI.getOperand(0).getReg() == FirstDest;
132+
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
53133
}
54134

55135
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
@@ -61,6 +141,15 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
61141
if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI))
62142
return true;
63143

144+
if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI))
145+
return true;
146+
147+
if (ST.hasShiftedZExtFusion() && isShiftedZExt(FirstMI, SecondMI))
148+
return true;
149+
150+
if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI))
151+
return true;
152+
64153
return false;
65154
}
66155

llvm/lib/Target/RISCV/RISCVProcessors.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
254254
FeatureStdExtZicbop,
255255
FeatureStdExtZicboz,
256256
FeatureVendorXVentanaCondOps],
257-
[TuneVentanaVeyron]>;
257+
[TuneVeyronFusions]>;
258258

259259
def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
260260
NoSchedModel,

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
193193
return UserReservedRegister[i];
194194
}
195195

196-
bool hasMacroFusion() const { return hasLUIADDIFusion(); }
196+
bool hasMacroFusion() const {
197+
return hasLUIADDIFusion() || hasAUIPCADDIFusion() ||
198+
hasShiftedZExtFusion() || hasLDADDFusion();
199+
}
197200

198201
// Vector codegen related methods.
199202
bool hasVInstructions() const { return HasStdExtZve32x; }
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
# REQUIRES: asserts
2+
# RUN: llc -mtriple=riscv64-linux-gnu -mcpu=veyron-v1 -x=mir < %s \
3+
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
4+
# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+shifted-zext-fusion,+ld-add-fusion \
5+
# RUN: | FileCheck %s
6+
7+
# CHECK: lui_addi:%bb.0
8+
# CHECK: Macro fuse: {{.*}}LUI - ADDI
9+
---
10+
name: lui_addi
11+
tracksRegLiveness: true
12+
body: |
13+
bb.0.entry:
14+
liveins: $x10
15+
%1:gpr = COPY $x10
16+
%2:gpr = LUI 1
17+
%3:gpr = XORI %1, 2
18+
%4:gpr = ADDI %2, 3
19+
$x10 = COPY %3
20+
$x11 = COPY %4
21+
PseudoRET
22+
...
23+
24+
# CHECK: auipc_addi
25+
# CHECK: Macro fuse: {{.*}}AUIPC - ADDI
26+
---
27+
name: auipc_addi
28+
tracksRegLiveness: true
29+
body: |
30+
bb.0.entry:
31+
liveins: $x10
32+
%1:gpr = COPY $x10
33+
%2:gpr = AUIPC 1
34+
%3:gpr = XORI %1, 2
35+
%4:gpr = ADDI %2, 3
36+
$x10 = COPY %3
37+
$x11 = COPY %4
38+
PseudoRET
39+
...
40+
41+
# CHECK: slli_srli
42+
# CHECK: Macro fuse: {{.*}}SLLI - SRLI
43+
---
44+
name: slli_srli
45+
tracksRegLiveness: true
46+
body: |
47+
bb.0.entry:
48+
liveins: $x10
49+
%1:gpr = COPY $x10
50+
%2:gpr = SLLI %1, 32
51+
%3:gpr = XORI %1, 3
52+
%4:gpr = SRLI %2, 4
53+
$x10 = COPY %3
54+
$x11 = COPY %4
55+
PseudoRET
56+
...
57+
58+
# CHECK: slli_srli_48
59+
# CHECK: Macro fuse: {{.*}}SLLI - SRLI
60+
---
61+
name: slli_srli_48
62+
tracksRegLiveness: true
63+
body: |
64+
bb.0.entry:
65+
liveins: $x10
66+
%1:gpr = COPY $x10
67+
%2:gpr = SLLI %1, 48
68+
%3:gpr = XORI %1, 3
69+
%4:gpr = SRLI %2, 48
70+
$x10 = COPY %3
71+
$x11 = COPY %4
72+
PseudoRET
73+
...
74+
75+
# CHECK: slli_srli_no_fusion_0
76+
# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI
77+
---
78+
name: slli_srli_no_fusion_0
79+
tracksRegLiveness: true
80+
body: |
81+
bb.0.entry:
82+
liveins: $x10
83+
%1:gpr = COPY $x10
84+
%2:gpr = SLLI %1, 32
85+
%3:gpr = XORI %1, 3
86+
%4:gpr = SRLI %2, 33
87+
$x10 = COPY %3
88+
$x11 = COPY %4
89+
PseudoRET
90+
...
91+
92+
# CHECK: slli_srli_no_fusion_1
93+
# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI
94+
---
95+
name: slli_srli_no_fusion_1
96+
tracksRegLiveness: true
97+
body: |
98+
bb.0.entry:
99+
liveins: $x10
100+
%1:gpr = COPY $x10
101+
%2:gpr = SLLI %1, 48
102+
%3:gpr = XORI %1, 3
103+
%4:gpr = SRLI %2, 4
104+
$x10 = COPY %3
105+
$x11 = COPY %4
106+
PseudoRET
107+
...
108+
109+
# CHECK: slli_srli_no_fusion_2
110+
# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI
111+
---
112+
name: slli_srli_no_fusion_2
113+
tracksRegLiveness: true
114+
body: |
115+
bb.0.entry:
116+
liveins: $x10
117+
%1:gpr = COPY $x10
118+
%2:gpr = SLLI %1, 31
119+
%3:gpr = XORI %1, 3
120+
%4:gpr = SRLI %2, 4
121+
$x10 = COPY %3
122+
$x11 = COPY %4
123+
PseudoRET
124+
...
125+
126+
# CHECK: slli_srli_no_fusion_3
127+
# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI
128+
---
129+
name: slli_srli_no_fusion_3
130+
tracksRegLiveness: true
131+
body: |
132+
bb.0.entry:
133+
liveins: $x10
134+
%1:gpr = COPY $x10
135+
%2:gpr = SLLI %1, 31
136+
%3:gpr = XORI %1, 3
137+
%4:gpr = SRLI %2, 48
138+
$x10 = COPY %3
139+
$x11 = COPY %4
140+
PseudoRET
141+
...
142+
143+
# CHECK: ld_add
144+
# CHECK: Macro fuse: {{.*}}ADD - LD
145+
---
146+
name: ld_add
147+
tracksRegLiveness: true
148+
body: |
149+
bb.0.entry:
150+
liveins: $x10, $x11
151+
%1:gpr = COPY $x10
152+
%2:gpr = COPY $x11
153+
%3:gpr = ADD %1, %2
154+
%4:gpr = XORI %2, 3
155+
%5:gpr = LD %3, 0
156+
$x10 = COPY %4
157+
$x11 = COPY %5
158+
PseudoRET
159+
...

0 commit comments

Comments
 (0)