Skip to content

[Hexagon] Generate absolute-set load/store instructions. #82034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/Hexagon/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ add_llvm_target(HexagonCodeGen
HexagonFrameLowering.cpp
HexagonGenExtract.cpp
HexagonGenInsert.cpp
HexagonGenMemAbsolute.cpp
HexagonGenMux.cpp
HexagonGenPredicate.cpp
HexagonHardwareLoops.cpp
Expand Down
274 changes: 274 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
//===--- HexagonGenMemAbsolute.cpp - Generate Load/Store Set Absolute ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// This pass traverses through all the basic blocks in a function and converts
// an indexed load/store with offset "0" to a absolute-set load/store
// instruction as long as the use of the register in the new instruction
// dominates the rest of the uses and there are more than 2 uses.

#include "HexagonTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"

#define DEBUG_TYPE "hexagon-abs"

using namespace llvm;

STATISTIC(HexagonNumLoadAbsConversions,
"Number of Load instructions converted to absolute-set form");
STATISTIC(HexagonNumStoreAbsConversions,
"Number of Store instructions converted to absolute-set form");

namespace llvm {
FunctionPass *createHexagonGenMemAbsolute();
void initializeHexagonGenMemAbsolutePass(PassRegistry &Registry);
} // namespace llvm

namespace {

class HexagonGenMemAbsolute : public MachineFunctionPass {
const HexagonInstrInfo *TII;
MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;

public:
static char ID;
HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {
initializeHexagonGenMemAbsolutePass(*PassRegistry::getPassRegistry());
}

StringRef getPassName() const override {
return "Hexagon Generate Load/Store Set Absolute Address Instruction";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
}

bool runOnMachineFunction(MachineFunction &Fn) override;

private:
static bool isValidIndexedLoad(int &Opcode, int &NewOpcode);
static bool isValidIndexedStore(int &Opcode, int &NewOpcode);
};
} // namespace

char HexagonGenMemAbsolute::ID = 0;

INITIALIZE_PASS(HexagonGenMemAbsolute, "hexagon-gen-load-absolute",
"Hexagon Generate Load/Store Set Absolute Address Instruction",
false, false)

bool HexagonGenMemAbsolute::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;

TII = Fn.getSubtarget<HexagonSubtarget>().getInstrInfo();
MRI = &Fn.getRegInfo();
TRI = Fn.getRegInfo().getTargetRegisterInfo();

MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();

// Loop over all of the basic blocks
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
MBBb != MBBe; ++MBBb) {
MachineBasicBlock *MBB = &*MBBb;
// Traverse the basic block
for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
++MII) {
MachineInstr *MI = &*MII;
int Opc = MI->getOpcode();
if (Opc != Hexagon::CONST32 && Opc != Hexagon::A2_tfrsi)
continue;

const MachineOperand &MO = MI->getOperand(0);
if (!MO.isReg() || !MO.isDef())
continue;

unsigned DstReg = MO.getReg();
if (MRI->use_nodbg_empty(DstReg))
continue;

typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
use_iterator NextUseMI = MRI->use_nodbg_begin(DstReg);

MachineInstr *NextMI = NextUseMI->getParent();
int NextOpc = NextMI->getOpcode();
int NewOpc;
bool IsLoad = isValidIndexedLoad(NextOpc, NewOpc);

if (!IsLoad && !isValidIndexedStore(NextOpc, NewOpc))
continue;

// Base and Offset positions for load and store instructions
// Load R(dest), R(base), Imm -> R(dest) = mem(R(base) + Imm)
// Store R(base), Imm, R (src) -> mem(R(base) + Imm) = R(src)
unsigned BaseRegPos, ImmPos, RegPos;
if (!TII->getBaseAndOffsetPosition(*NextMI, BaseRegPos, ImmPos))
continue;
RegPos = IsLoad ? 0 : 2;

bool IsGlobal = MI->getOperand(1).isGlobal();
if (!MI->getOperand(1).isImm() && !IsGlobal)
continue;

const MachineOperand *BaseOp = nullptr;
int64_t Offset;
bool Scalable;
TII->getMemOperandWithOffset(*NextMI, BaseOp, Offset, Scalable, TRI);

// Ensure BaseOp is non-null and register type.
if (!BaseOp || !BaseOp->isReg())
continue;

if (Scalable)
continue;

unsigned BaseReg = BaseOp->getReg();
if ((DstReg != BaseReg) || (Offset != 0))
continue;

const MachineOperand &MO0 = NextMI->getOperand(RegPos);

if (!MO0.isReg())
continue;

unsigned LoadStoreReg = MO0.getReg();

// Store: Bail out if the src and base are same (def and use on same
// register).
if (LoadStoreReg == BaseReg)
continue;

// Insert the absolute-set instruction "I" only if the use of the
// BaseReg in "I" dominates the rest of the uses of BaseReg and if
// there are more than 2 uses of this BaseReg.
bool Dominates = true;
unsigned Counter = 0;
for (use_iterator I = NextUseMI, E = MRI->use_nodbg_end(); I != E; ++I) {
Counter++;
if (!MDT.dominates(NextMI, I->getParent()))
Dominates = false;
}

if ((!Dominates) || (Counter < 3))
continue;

// If we reach here, we have met all the conditions required for the
// replacement of the absolute instruction.
LLVM_DEBUG({
dbgs() << "Found a pair of instructions for absolute-set "
<< (IsLoad ? "load" : "store") << "\n";
dbgs() << *MI;
dbgs() << *NextMI;
});
MachineBasicBlock *ParentBlock = NextMI->getParent();
MachineInstrBuilder MIB;
if (IsLoad) { // Insert absolute-set load instruction
++HexagonNumLoadAbsConversions;
MIB = BuildMI(*ParentBlock, NextMI, NextMI->getDebugLoc(),
TII->get(NewOpc), LoadStoreReg)
.addReg(DstReg, RegState::Define);
} else { // Insert absolute-set store instruction
++HexagonNumStoreAbsConversions;
MIB = BuildMI(*ParentBlock, NextMI, NextMI->getDebugLoc(),
TII->get(NewOpc), DstReg);
}

MachineOperand ImmOperand = MI->getOperand(1);
if (IsGlobal)
MIB.addGlobalAddress(ImmOperand.getGlobal(), ImmOperand.getOffset(),
ImmOperand.getTargetFlags());
else
MIB.addImm(ImmOperand.getImm());

if (IsLoad)
MIB->getOperand(0).setSubReg(MO0.getSubReg());
else
MIB.addReg(LoadStoreReg, 0, MO0.getSubReg());

LLVM_DEBUG(dbgs() << "Replaced with " << *MIB << "\n");
// Erase the instructions that got replaced.
MII = MBB->erase(MI);
--MII;
NextMI->getParent()->erase(NextMI);
}
}

return true;
}

bool HexagonGenMemAbsolute::isValidIndexedLoad(int &Opc, int &NewOpc) {

bool Result = true;
switch (Opc) {
case Hexagon::L2_loadrb_io:
NewOpc = Hexagon::L4_loadrb_ap;
break;
case Hexagon::L2_loadrh_io:
NewOpc = Hexagon::L4_loadrh_ap;
break;
case Hexagon::L2_loadri_io:
NewOpc = Hexagon::L4_loadri_ap;
break;
case Hexagon::L2_loadrd_io:
NewOpc = Hexagon::L4_loadrd_ap;
break;
case Hexagon::L2_loadruh_io:
NewOpc = Hexagon::L4_loadruh_ap;
break;
case Hexagon::L2_loadrub_io:
NewOpc = Hexagon::L4_loadrub_ap;
break;
default:
Result = false;
}

return Result;
}

bool HexagonGenMemAbsolute::isValidIndexedStore(int &Opc, int &NewOpc) {

bool Result = true;
switch (Opc) {
case Hexagon::S2_storerd_io:
NewOpc = Hexagon::S4_storerd_ap;
break;
case Hexagon::S2_storeri_io:
NewOpc = Hexagon::S4_storeri_ap;
break;
case Hexagon::S2_storerh_io:
NewOpc = Hexagon::S4_storerh_ap;
break;
case Hexagon::S2_storerb_io:
NewOpc = Hexagon::S4_storerb_ap;
break;
default:
Result = false;
}

return Result;
}

//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//

FunctionPass *llvm::createHexagonGenMemAbsolute() {
return new HexagonGenMemAbsolute();
}
9 changes: 9 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ static cl::opt<bool>
static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
cl::desc("Disable splitting double registers"));

static cl::opt<bool>
EnableGenMemAbs("hexagon-mem-abs", cl::init(true), cl::Hidden,
cl::desc("Generate absolute set instructions"));

static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true),
cl::Hidden, cl::desc("Bit simplification"));

Expand Down Expand Up @@ -151,6 +155,7 @@ namespace llvm {
void initializeHexagonCopyToCombinePass(PassRegistry&);
void initializeHexagonEarlyIfConversionPass(PassRegistry&);
void initializeHexagonExpandCondsetsPass(PassRegistry&);
void initializeHexagonGenMemAbsolutePass(PassRegistry &);
void initializeHexagonGenMuxPass(PassRegistry&);
void initializeHexagonHardwareLoopsPass(PassRegistry&);
void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
Expand All @@ -177,6 +182,7 @@ namespace llvm {
FunctionPass *createHexagonFixupHwLoops();
FunctionPass *createHexagonGenExtract();
FunctionPass *createHexagonGenInsert();
FunctionPass *createHexagonGenMemAbsolute();
FunctionPass *createHexagonGenMux();
FunctionPass *createHexagonGenPredicate();
FunctionPass *createHexagonHardwareLoops();
Expand Down Expand Up @@ -211,6 +217,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
initializeHexagonConstPropagationPass(PR);
initializeHexagonCopyToCombinePass(PR);
initializeHexagonEarlyIfConversionPass(PR);
initializeHexagonGenMemAbsolutePass(PR);
initializeHexagonGenMuxPass(PR);
initializeHexagonHardwareLoopsPass(PR);
initializeHexagonLoopIdiomRecognizeLegacyPassPass(PR);
Expand Down Expand Up @@ -413,6 +420,8 @@ void HexagonPassConfig::addPreRegAlloc() {
insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID);
if (!DisableStoreWidening)
addPass(createHexagonStoreWidening());
if (EnableGenMemAbs)
addPass(createHexagonGenMemAbsolute());
if (!DisableHardwareLoops)
addPass(createHexagonHardwareLoops());
}
Expand Down
68 changes: 68 additions & 0 deletions llvm/test/CodeGen/Hexagon/load-const-extend-opt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
; RUN: llc -march=hexagon -O3 -hexagon-small-data-threshold=0 < %s | FileCheck %s
; This test checks the case if there are more than 2 uses of a constan address, move the
; value in to a register and replace all instances of constant with the register.
; The GenMemAbsolute pass generates a absolute-set instruction if there are more
; than 2 uses of this register.

; CHECK: loadi32_3
; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
; CHECK-NOT: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
; CHECK:r{{[0-9]+}} = memw(r[[REG:[0-9]+]]=##441652)
; CHECK-NOT: r{{[0-9]+}} = {emw(##441652)
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
; CHECK-NOT: r{{[0-9]+}} = memw(##441652)

define void @loadi32_3() #0 {
entry:
%0 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
%1 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
%2 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
ret void
}

; CHECK: loadi32_2
; CHECK-NOT: r{{[0-9]+}} = ##441652
; CHECK: r{{[0-9]+}} = memw(##441652)
; CHECK: r{{[0-9]+}} = memw(##441652)

define void @loadi32_2() #0 {
entry:
%0 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
%1 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
ret void
}

; CHECK: loadi32_abs_global_3
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
; CHECK-NOT: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
; CHECK:r{{[0-9]+}} = memw(r[[REG:[0-9]+]]=##globalInt)
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)

@globalInt = external global i32, align 8
define void @loadi32_abs_global_3() #0 {
entry:
%0 = load volatile i32, ptr @globalInt, align 4
%1 = load volatile i32, ptr @globalInt, align 4
%2 = load volatile i32, ptr @globalInt, align 4
ret void
}

; CHECK: loadi32_abs_global_2
; CHECK-NOT:r[[REG:[0-9]+]] = ##globalInt
; CHECK:r{{[0-9]+}} = memw(##globalInt)
; CHECK:r{{[0-9]+}} = memw(##globalInt)

define void @loadi32_abs_global_2() #0 {
entry:
%0 = load volatile i32, ptr @globalInt, align 4
%1 = load volatile i32, ptr @globalInt, align 4
ret void
}

attributes #0 = { nounwind }
Loading