Skip to content

[AMDGPU][NewPM] Port SIPeepholeSDWA pass to NPM #107049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void initializeAMDGPURegBankSelectPass(PassRegistry &);
FunctionPass *createGCNDPPCombinePass();
FunctionPass *createSIAnnotateControlFlowLegacyPass();
FunctionPass *createSIFoldOperandsLegacyPass();
FunctionPass *createSIPeepholeSDWAPass();
FunctionPass *createSIPeepholeSDWALegacyPass();
FunctionPass *createSILowerI1CopiesLegacyPass();
FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
FunctionPass *createSIShrinkInstructionsLegacyPass();
Expand Down Expand Up @@ -163,8 +163,8 @@ extern char &GCNDPPCombineLegacyID;
void initializeSIFoldOperandsLegacyPass(PassRegistry &);
extern char &SIFoldOperandsLegacyID;

void initializeSIPeepholeSDWAPass(PassRegistry &);
extern char &SIPeepholeSDWAID;
void initializeSIPeepholeSDWALegacyPass(PassRegistry &);
extern char &SIPeepholeSDWALegacyID;

void initializeSIShrinkInstructionsLegacyPass(PassRegistry &);
extern char &SIShrinkInstructionsLegacyID;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,6 @@ MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
#undef MACHINE_FUNCTION_PASS
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "SILoadStoreOptimizer.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIPeepholeSDWA.h"
#include "SIShrinkInstructions.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
Expand Down Expand Up @@ -415,7 +416,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIFixSGPRCopiesLegacyPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
initializeSIFoldOperandsLegacyPass(*PR);
initializeSIPeepholeSDWAPass(*PR);
initializeSIPeepholeSDWALegacyPass(*PR);
initializeSIShrinkInstructionsLegacyPass(*PR);
initializeSIOptimizeExecMaskingPreRAPass(*PR);
initializeSIOptimizeVGPRLiveRangePass(*PR);
Expand Down Expand Up @@ -1275,7 +1276,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
addPass(&GCNDPPCombineLegacyID);
addPass(&SILoadStoreOptimizerLegacyID);
if (isPassEnabled(EnableSDWAPeephole)) {
addPass(&SIPeepholeSDWAID);
addPass(&SIPeepholeSDWALegacyID);
addPass(&EarlyMachineLICMID);
addPass(&MachineCSELegacyID);
addPass(&SIFoldOperandsLegacyID);
Expand Down
56 changes: 39 additions & 17 deletions llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
///
//===----------------------------------------------------------------------===//

#include "SIPeepholeSDWA.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Expand All @@ -45,7 +46,7 @@ class SDWADstOperand;
using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>;
using SDWAOperandsMap = MapVector<MachineInstr *, SDWAOperandsVector>;

class SIPeepholeSDWA : public MachineFunctionPass {
class SIPeepholeSDWA {
private:
MachineRegisterInfo *MRI;
const SIRegisterInfo *TRI;
Expand All @@ -57,23 +58,27 @@ class SIPeepholeSDWA : public MachineFunctionPass {

std::optional<int64_t> foldToImm(const MachineOperand &Op) const;

public:
static char ID;

SIPeepholeSDWA() : MachineFunctionPass(ID) {
initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
}

bool runOnMachineFunction(MachineFunction &MF) override;
void matchSDWAOperands(MachineBasicBlock &MBB);
std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
void pseudoOpConvertToVOP2(MachineInstr &MI,
const GCNSubtarget &ST) const;
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;

public:
bool run(MachineFunction &MF);
};

class SIPeepholeSDWALegacy : public MachineFunctionPass {
public:
static char ID;

SIPeepholeSDWALegacy() : MachineFunctionPass(ID) {}

StringRef getPassName() const override { return "SI Peephole SDWA"; }

bool runOnMachineFunction(MachineFunction &MF) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
Expand Down Expand Up @@ -192,17 +197,17 @@ class SDWADstPreserveOperand : public SDWADstOperand {

} // end anonymous namespace

INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
INITIALIZE_PASS(SIPeepholeSDWALegacy, DEBUG_TYPE, "SI Peephole SDWA", false,
false)

char SIPeepholeSDWA::ID = 0;
char SIPeepholeSDWALegacy::ID = 0;

char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
char &llvm::SIPeepholeSDWALegacyID = SIPeepholeSDWALegacy::ID;

FunctionPass *llvm::createSIPeepholeSDWAPass() {
return new SIPeepholeSDWA();
FunctionPass *llvm::createSIPeepholeSDWALegacyPass() {
return new SIPeepholeSDWALegacy();
}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
static raw_ostream& operator<<(raw_ostream &OS, SdwaSel Sel) {
switch(Sel) {
Expand Down Expand Up @@ -1235,10 +1240,17 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
}
}

bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
bool SIPeepholeSDWALegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

return SIPeepholeSDWA().run(MF);
}

bool SIPeepholeSDWA::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

if (!ST.hasSDWA() || skipFunction(MF.getFunction()))
if (!ST.hasSDWA())
return false;

MRI = &MF.getRegInfo();
Expand Down Expand Up @@ -1295,3 +1307,13 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {

return Ret;
}

PreservedAnalyses SIPeepholeSDWAPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &) {
if (MF.getFunction().hasOptNone() || !SIPeepholeSDWA().run(MF))
return PreservedAnalyses::all();

PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
return PA;
}
24 changes: 24 additions & 0 deletions llvm/lib/Target/AMDGPU/SIPeepholeSDWA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//===--------- SIPeepholeSDWA.h -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_SIPEEPHOLESDWA_H
#define LLVM_LIB_TARGET_AMDGPU_SIPEEPHOLESDWA_H

#include "llvm/CodeGen/MachinePassManager.h"

namespace llvm {

class SIPeepholeSDWAPass : public PassInfoMixin<SIPeepholeSDWAPass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
};

} // namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_SIPEEPHOLESDWA_H
3 changes: 3 additions & 0 deletions llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# RUN: llc -mtriple=amdgcn -mcpu=kaveri -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn -mcpu=kaveri -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s

# GCN-LABEL: {{^}}name: add_shr_i32
# GCN: [[SMOV:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 123
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better duplicate the line for new subtargets. Here gfx900.


# test for 3 consecutive _sdwa's
# GFX9-LABEL: name: test1_add_co_sdwa
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=SDWA %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=SDWA %s
---
name: add_f16_u32_preserve
tracksRegLiveness: true
Expand Down
Loading