Skip to content

Commit 4c0fdcd

Browse files
author
Sumanth Gundapaneni
authored
[Hexagon] Generate absolute-set load/store instructions. (#82034)
The optimization finds the loads/stores of a specific form and translate the first load/store to an absolute-set form there by optimizing out the transfer and eliminate the constant extenders.
1 parent f204aee commit 4c0fdcd

File tree

5 files changed

+424
-0
lines changed

5 files changed

+424
-0
lines changed

llvm/lib/Target/Hexagon/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ add_llvm_target(HexagonCodeGen
3333
HexagonFrameLowering.cpp
3434
HexagonGenExtract.cpp
3535
HexagonGenInsert.cpp
36+
HexagonGenMemAbsolute.cpp
3637
HexagonGenMux.cpp
3738
HexagonGenPredicate.cpp
3839
HexagonHardwareLoops.cpp
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
//===--- HexagonGenMemAbsolute.cpp - Generate Load/Store Set Absolute ---===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// This pass traverses through all the basic blocks in a function and converts
10+
// an indexed load/store with offset "0" to a absolute-set load/store
11+
// instruction as long as the use of the register in the new instruction
12+
// dominates the rest of the uses and there are more than 2 uses.
13+
14+
#include "HexagonTargetMachine.h"
15+
#include "llvm/ADT/Statistic.h"
16+
#include "llvm/CodeGen/MachineDominators.h"
17+
#include "llvm/CodeGen/MachineFunctionPass.h"
18+
#include "llvm/CodeGen/MachineInstrBuilder.h"
19+
#include "llvm/CodeGen/MachineRegisterInfo.h"
20+
#include "llvm/CodeGen/Passes.h"
21+
#include "llvm/CodeGen/TargetInstrInfo.h"
22+
#include "llvm/Support/Debug.h"
23+
#include "llvm/Support/raw_ostream.h"
24+
#include "llvm/Target/TargetMachine.h"
25+
26+
#define DEBUG_TYPE "hexagon-abs"
27+
28+
using namespace llvm;
29+
30+
STATISTIC(HexagonNumLoadAbsConversions,
31+
"Number of Load instructions converted to absolute-set form");
32+
STATISTIC(HexagonNumStoreAbsConversions,
33+
"Number of Store instructions converted to absolute-set form");
34+
35+
namespace llvm {
36+
FunctionPass *createHexagonGenMemAbsolute();
37+
void initializeHexagonGenMemAbsolutePass(PassRegistry &Registry);
38+
} // namespace llvm
39+
40+
namespace {
41+
42+
class HexagonGenMemAbsolute : public MachineFunctionPass {
43+
const HexagonInstrInfo *TII;
44+
MachineRegisterInfo *MRI;
45+
const TargetRegisterInfo *TRI;
46+
47+
public:
48+
static char ID;
49+
HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {
50+
initializeHexagonGenMemAbsolutePass(*PassRegistry::getPassRegistry());
51+
}
52+
53+
StringRef getPassName() const override {
54+
return "Hexagon Generate Load/Store Set Absolute Address Instruction";
55+
}
56+
57+
void getAnalysisUsage(AnalysisUsage &AU) const override {
58+
MachineFunctionPass::getAnalysisUsage(AU);
59+
AU.addRequired<MachineDominatorTree>();
60+
AU.addPreserved<MachineDominatorTree>();
61+
}
62+
63+
bool runOnMachineFunction(MachineFunction &Fn) override;
64+
65+
private:
66+
static bool isValidIndexedLoad(int &Opcode, int &NewOpcode);
67+
static bool isValidIndexedStore(int &Opcode, int &NewOpcode);
68+
};
69+
} // namespace
70+
71+
char HexagonGenMemAbsolute::ID = 0;
72+
73+
INITIALIZE_PASS(HexagonGenMemAbsolute, "hexagon-gen-load-absolute",
74+
"Hexagon Generate Load/Store Set Absolute Address Instruction",
75+
false, false)
76+
77+
bool HexagonGenMemAbsolute::runOnMachineFunction(MachineFunction &Fn) {
78+
if (skipFunction(Fn.getFunction()))
79+
return false;
80+
81+
TII = Fn.getSubtarget<HexagonSubtarget>().getInstrInfo();
82+
MRI = &Fn.getRegInfo();
83+
TRI = Fn.getRegInfo().getTargetRegisterInfo();
84+
85+
MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
86+
87+
// Loop over all of the basic blocks
88+
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
89+
MBBb != MBBe; ++MBBb) {
90+
MachineBasicBlock *MBB = &*MBBb;
91+
// Traverse the basic block
92+
for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
93+
++MII) {
94+
MachineInstr *MI = &*MII;
95+
int Opc = MI->getOpcode();
96+
if (Opc != Hexagon::CONST32 && Opc != Hexagon::A2_tfrsi)
97+
continue;
98+
99+
const MachineOperand &MO = MI->getOperand(0);
100+
if (!MO.isReg() || !MO.isDef())
101+
continue;
102+
103+
unsigned DstReg = MO.getReg();
104+
if (MRI->use_nodbg_empty(DstReg))
105+
continue;
106+
107+
typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
108+
use_iterator NextUseMI = MRI->use_nodbg_begin(DstReg);
109+
110+
MachineInstr *NextMI = NextUseMI->getParent();
111+
int NextOpc = NextMI->getOpcode();
112+
int NewOpc;
113+
bool IsLoad = isValidIndexedLoad(NextOpc, NewOpc);
114+
115+
if (!IsLoad && !isValidIndexedStore(NextOpc, NewOpc))
116+
continue;
117+
118+
// Base and Offset positions for load and store instructions
119+
// Load R(dest), R(base), Imm -> R(dest) = mem(R(base) + Imm)
120+
// Store R(base), Imm, R (src) -> mem(R(base) + Imm) = R(src)
121+
unsigned BaseRegPos, ImmPos, RegPos;
122+
if (!TII->getBaseAndOffsetPosition(*NextMI, BaseRegPos, ImmPos))
123+
continue;
124+
RegPos = IsLoad ? 0 : 2;
125+
126+
bool IsGlobal = MI->getOperand(1).isGlobal();
127+
if (!MI->getOperand(1).isImm() && !IsGlobal)
128+
continue;
129+
130+
const MachineOperand *BaseOp = nullptr;
131+
int64_t Offset;
132+
bool Scalable;
133+
TII->getMemOperandWithOffset(*NextMI, BaseOp, Offset, Scalable, TRI);
134+
135+
// Ensure BaseOp is non-null and register type.
136+
if (!BaseOp || !BaseOp->isReg())
137+
continue;
138+
139+
if (Scalable)
140+
continue;
141+
142+
unsigned BaseReg = BaseOp->getReg();
143+
if ((DstReg != BaseReg) || (Offset != 0))
144+
continue;
145+
146+
const MachineOperand &MO0 = NextMI->getOperand(RegPos);
147+
148+
if (!MO0.isReg())
149+
continue;
150+
151+
unsigned LoadStoreReg = MO0.getReg();
152+
153+
// Store: Bail out if the src and base are same (def and use on same
154+
// register).
155+
if (LoadStoreReg == BaseReg)
156+
continue;
157+
158+
// Insert the absolute-set instruction "I" only if the use of the
159+
// BaseReg in "I" dominates the rest of the uses of BaseReg and if
160+
// there are more than 2 uses of this BaseReg.
161+
bool Dominates = true;
162+
unsigned Counter = 0;
163+
for (use_iterator I = NextUseMI, E = MRI->use_nodbg_end(); I != E; ++I) {
164+
Counter++;
165+
if (!MDT.dominates(NextMI, I->getParent()))
166+
Dominates = false;
167+
}
168+
169+
if ((!Dominates) || (Counter < 3))
170+
continue;
171+
172+
// If we reach here, we have met all the conditions required for the
173+
// replacement of the absolute instruction.
174+
LLVM_DEBUG({
175+
dbgs() << "Found a pair of instructions for absolute-set "
176+
<< (IsLoad ? "load" : "store") << "\n";
177+
dbgs() << *MI;
178+
dbgs() << *NextMI;
179+
});
180+
MachineBasicBlock *ParentBlock = NextMI->getParent();
181+
MachineInstrBuilder MIB;
182+
if (IsLoad) { // Insert absolute-set load instruction
183+
++HexagonNumLoadAbsConversions;
184+
MIB = BuildMI(*ParentBlock, NextMI, NextMI->getDebugLoc(),
185+
TII->get(NewOpc), LoadStoreReg)
186+
.addReg(DstReg, RegState::Define);
187+
} else { // Insert absolute-set store instruction
188+
++HexagonNumStoreAbsConversions;
189+
MIB = BuildMI(*ParentBlock, NextMI, NextMI->getDebugLoc(),
190+
TII->get(NewOpc), DstReg);
191+
}
192+
193+
MachineOperand ImmOperand = MI->getOperand(1);
194+
if (IsGlobal)
195+
MIB.addGlobalAddress(ImmOperand.getGlobal(), ImmOperand.getOffset(),
196+
ImmOperand.getTargetFlags());
197+
else
198+
MIB.addImm(ImmOperand.getImm());
199+
200+
if (IsLoad)
201+
MIB->getOperand(0).setSubReg(MO0.getSubReg());
202+
else
203+
MIB.addReg(LoadStoreReg, 0, MO0.getSubReg());
204+
205+
LLVM_DEBUG(dbgs() << "Replaced with " << *MIB << "\n");
206+
// Erase the instructions that got replaced.
207+
MII = MBB->erase(MI);
208+
--MII;
209+
NextMI->getParent()->erase(NextMI);
210+
}
211+
}
212+
213+
return true;
214+
}
215+
216+
bool HexagonGenMemAbsolute::isValidIndexedLoad(int &Opc, int &NewOpc) {
217+
218+
bool Result = true;
219+
switch (Opc) {
220+
case Hexagon::L2_loadrb_io:
221+
NewOpc = Hexagon::L4_loadrb_ap;
222+
break;
223+
case Hexagon::L2_loadrh_io:
224+
NewOpc = Hexagon::L4_loadrh_ap;
225+
break;
226+
case Hexagon::L2_loadri_io:
227+
NewOpc = Hexagon::L4_loadri_ap;
228+
break;
229+
case Hexagon::L2_loadrd_io:
230+
NewOpc = Hexagon::L4_loadrd_ap;
231+
break;
232+
case Hexagon::L2_loadruh_io:
233+
NewOpc = Hexagon::L4_loadruh_ap;
234+
break;
235+
case Hexagon::L2_loadrub_io:
236+
NewOpc = Hexagon::L4_loadrub_ap;
237+
break;
238+
default:
239+
Result = false;
240+
}
241+
242+
return Result;
243+
}
244+
245+
bool HexagonGenMemAbsolute::isValidIndexedStore(int &Opc, int &NewOpc) {
246+
247+
bool Result = true;
248+
switch (Opc) {
249+
case Hexagon::S2_storerd_io:
250+
NewOpc = Hexagon::S4_storerd_ap;
251+
break;
252+
case Hexagon::S2_storeri_io:
253+
NewOpc = Hexagon::S4_storeri_ap;
254+
break;
255+
case Hexagon::S2_storerh_io:
256+
NewOpc = Hexagon::S4_storerh_ap;
257+
break;
258+
case Hexagon::S2_storerb_io:
259+
NewOpc = Hexagon::S4_storerb_ap;
260+
break;
261+
default:
262+
Result = false;
263+
}
264+
265+
return Result;
266+
}
267+
268+
//===----------------------------------------------------------------------===//
269+
// Public Constructor Functions
270+
//===----------------------------------------------------------------------===//
271+
272+
FunctionPass *llvm::createHexagonGenMemAbsolute() {
273+
return new HexagonGenMemAbsolute();
274+
}

llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ static cl::opt<bool>
9292
static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
9393
cl::desc("Disable splitting double registers"));
9494

95+
static cl::opt<bool>
96+
EnableGenMemAbs("hexagon-mem-abs", cl::init(true), cl::Hidden,
97+
cl::desc("Generate absolute set instructions"));
98+
9599
static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true),
96100
cl::Hidden, cl::desc("Bit simplification"));
97101

@@ -151,6 +155,7 @@ namespace llvm {
151155
void initializeHexagonCopyToCombinePass(PassRegistry&);
152156
void initializeHexagonEarlyIfConversionPass(PassRegistry&);
153157
void initializeHexagonExpandCondsetsPass(PassRegistry&);
158+
void initializeHexagonGenMemAbsolutePass(PassRegistry &);
154159
void initializeHexagonGenMuxPass(PassRegistry&);
155160
void initializeHexagonHardwareLoopsPass(PassRegistry&);
156161
void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
@@ -177,6 +182,7 @@ namespace llvm {
177182
FunctionPass *createHexagonFixupHwLoops();
178183
FunctionPass *createHexagonGenExtract();
179184
FunctionPass *createHexagonGenInsert();
185+
FunctionPass *createHexagonGenMemAbsolute();
180186
FunctionPass *createHexagonGenMux();
181187
FunctionPass *createHexagonGenPredicate();
182188
FunctionPass *createHexagonHardwareLoops();
@@ -211,6 +217,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
211217
initializeHexagonConstPropagationPass(PR);
212218
initializeHexagonCopyToCombinePass(PR);
213219
initializeHexagonEarlyIfConversionPass(PR);
220+
initializeHexagonGenMemAbsolutePass(PR);
214221
initializeHexagonGenMuxPass(PR);
215222
initializeHexagonHardwareLoopsPass(PR);
216223
initializeHexagonLoopIdiomRecognizeLegacyPassPass(PR);
@@ -413,6 +420,8 @@ void HexagonPassConfig::addPreRegAlloc() {
413420
insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID);
414421
if (!DisableStoreWidening)
415422
addPass(createHexagonStoreWidening());
423+
if (EnableGenMemAbs)
424+
addPass(createHexagonGenMemAbsolute());
416425
if (!DisableHardwareLoops)
417426
addPass(createHexagonHardwareLoops());
418427
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; RUN: llc -march=hexagon -O3 -hexagon-small-data-threshold=0 < %s | FileCheck %s
2+
; This test checks the case if there are more than 2 uses of a constan address, move the
3+
; value in to a register and replace all instances of constant with the register.
4+
; The GenMemAbsolute pass generates a absolute-set instruction if there are more
5+
; than 2 uses of this register.
6+
7+
; CHECK: loadi32_3
8+
; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
9+
; CHECK-NOT: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
10+
; CHECK:r{{[0-9]+}} = memw(r[[REG:[0-9]+]]=##441652)
11+
; CHECK-NOT: r{{[0-9]+}} = {emw(##441652)
12+
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
13+
; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
14+
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
15+
; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
16+
17+
define void @loadi32_3() #0 {
18+
entry:
19+
%0 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
20+
%1 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
21+
%2 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
22+
ret void
23+
}
24+
25+
; CHECK: loadi32_2
26+
; CHECK-NOT: r{{[0-9]+}} = ##441652
27+
; CHECK: r{{[0-9]+}} = memw(##441652)
28+
; CHECK: r{{[0-9]+}} = memw(##441652)
29+
30+
define void @loadi32_2() #0 {
31+
entry:
32+
%0 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
33+
%1 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
34+
ret void
35+
}
36+
37+
; CHECK: loadi32_abs_global_3
38+
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
39+
; CHECK-NOT: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
40+
; CHECK:r{{[0-9]+}} = memw(r[[REG:[0-9]+]]=##globalInt)
41+
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
42+
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
43+
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
44+
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
45+
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
46+
47+
@globalInt = external global i32, align 8
48+
define void @loadi32_abs_global_3() #0 {
49+
entry:
50+
%0 = load volatile i32, ptr @globalInt, align 4
51+
%1 = load volatile i32, ptr @globalInt, align 4
52+
%2 = load volatile i32, ptr @globalInt, align 4
53+
ret void
54+
}
55+
56+
; CHECK: loadi32_abs_global_2
57+
; CHECK-NOT:r[[REG:[0-9]+]] = ##globalInt
58+
; CHECK:r{{[0-9]+}} = memw(##globalInt)
59+
; CHECK:r{{[0-9]+}} = memw(##globalInt)
60+
61+
define void @loadi32_abs_global_2() #0 {
62+
entry:
63+
%0 = load volatile i32, ptr @globalInt, align 4
64+
%1 = load volatile i32, ptr @globalInt, align 4
65+
ret void
66+
}
67+
68+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)