Skip to content

[AMDGPU][NewPM] Port SIPeepholeSDWA pass to NPM #107049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 11, 2024

Conversation

optimisan
Copy link
Contributor

No description provided.

Copy link

github-actions bot commented Sep 3, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@optimisan optimisan force-pushed the port-peephole-sdwa branch 3 times, most recently from c6d42ac to a11d7ab Compare September 3, 2024 05:37
@optimisan optimisan marked this pull request as ready for review September 3, 2024 05:38
@llvmbot
Copy link
Member

llvmbot commented Sep 3, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (Akshat-Oke)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/107049.diff

8 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+3-3)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-2)
  • (modified) llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp (+39-17)
  • (added) llvm/lib/Target/AMDGPU/SIPeepholeSDWA.h (+25)
  • (modified) llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir (+1)
  • (modified) llvm/test/CodeGen/AMDGPU/sdwa-ops.mir (+1)
  • (modified) llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir (+1)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 6cc6863841a4c5..64bed06c13bdb9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -36,7 +36,7 @@ void initializeAMDGPURegBankSelectPass(PassRegistry &);
 FunctionPass *createGCNDPPCombinePass();
 FunctionPass *createSIAnnotateControlFlowLegacyPass();
 FunctionPass *createSIFoldOperandsLegacyPass();
-FunctionPass *createSIPeepholeSDWAPass();
+FunctionPass *createSIPeepholeSDWALegacyPass();
 FunctionPass *createSILowerI1CopiesLegacyPass();
 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
 FunctionPass *createSIShrinkInstructionsPass();
@@ -163,8 +163,8 @@ extern char &GCNDPPCombineLegacyID;
 void initializeSIFoldOperandsLegacyPass(PassRegistry &);
 extern char &SIFoldOperandsLegacyID;
 
-void initializeSIPeepholeSDWAPass(PassRegistry &);
-extern char &SIPeepholeSDWAID;
+void initializeSIPeepholeSDWALegacyPass(PassRegistry &);
+extern char &SIPeepholeSDWALegacyID;
 
 void initializeSIShrinkInstructionsPass(PassRegistry&);
 extern char &SIShrinkInstructionsID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index d01e3f0b97ddd1..689e51e2940a3d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -100,4 +100,5 @@ MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
+MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
 #undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 757a575841e61a..d1b203c993a71e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -39,6 +39,7 @@
 #include "SILoadStoreOptimizer.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
+#include "SIPeepholeSDWA.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
@@ -414,7 +415,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSIFixSGPRCopiesLegacyPass(*PR);
   initializeSIFixVGPRCopiesPass(*PR);
   initializeSIFoldOperandsLegacyPass(*PR);
-  initializeSIPeepholeSDWAPass(*PR);
+  initializeSIPeepholeSDWALegacyPass(*PR);
   initializeSIShrinkInstructionsPass(*PR);
   initializeSIOptimizeExecMaskingPreRAPass(*PR);
   initializeSIOptimizeVGPRLiveRangePass(*PR);
@@ -1274,7 +1275,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
     addPass(&GCNDPPCombineLegacyID);
   addPass(&SILoadStoreOptimizerLegacyID);
   if (isPassEnabled(EnableSDWAPeephole)) {
-    addPass(&SIPeepholeSDWAID);
+    addPass(&SIPeepholeSDWALegacyID);
     addPass(&EarlyMachineLICMID);
     addPass(&MachineCSEID);
     addPass(&SIFoldOperandsLegacyID);
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index d80e1277b2a8a0..7f564fa101fa0c 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -19,6 +19,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "SIPeepholeSDWA.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -45,7 +46,7 @@ class SDWADstOperand;
 using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>;
 using SDWAOperandsMap = MapVector<MachineInstr *, SDWAOperandsVector>;
 
-class SIPeepholeSDWA : public MachineFunctionPass {
+class SIPeepholeSDWA {
 private:
   MachineRegisterInfo *MRI;
   const SIRegisterInfo *TRI;
@@ -57,14 +58,6 @@ class SIPeepholeSDWA : public MachineFunctionPass {
 
   std::optional<int64_t> foldToImm(const MachineOperand &Op) const;
 
-public:
-  static char ID;
-
-  SIPeepholeSDWA() : MachineFunctionPass(ID) {
-    initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
   void matchSDWAOperands(MachineBasicBlock &MBB);
   std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
   void pseudoOpConvertToVOP2(MachineInstr &MI,
@@ -72,8 +65,20 @@ class SIPeepholeSDWA : public MachineFunctionPass {
   bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
   void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
 
+public:
+  bool run(MachineFunction &MF);
+};
+
+class SIPeepholeSDWALegacy : public MachineFunctionPass {
+public:
+  static char ID;
+
+  SIPeepholeSDWALegacy() : MachineFunctionPass(ID) {}
+
   StringRef getPassName() const override { return "SI Peephole SDWA"; }
 
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
@@ -192,17 +197,17 @@ class SDWADstPreserveOperand : public SDWADstOperand {
 
 } // end anonymous namespace
 
-INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
+INITIALIZE_PASS(SIPeepholeSDWALegacy, DEBUG_TYPE, "SI Peephole SDWA", false,
+                false)
 
-char SIPeepholeSDWA::ID = 0;
+char SIPeepholeSDWALegacy::ID = 0;
 
-char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
+char &llvm::SIPeepholeSDWALegacyID = SIPeepholeSDWALegacy::ID;
 
-FunctionPass *llvm::createSIPeepholeSDWAPass() {
-  return new SIPeepholeSDWA();
+FunctionPass *llvm::createSIPeepholeSDWALegacyPass() {
+  return new SIPeepholeSDWALegacy();
 }
 
-
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 static raw_ostream& operator<<(raw_ostream &OS, SdwaSel Sel) {
   switch(Sel) {
@@ -1235,10 +1240,17 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
   }
 }
 
-bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
+bool SIPeepholeSDWALegacy::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction())) {
+    return false;
+  }
+  return SIPeepholeSDWA().run(MF);
+}
+
+bool SIPeepholeSDWA::run(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 
-  if (!ST.hasSDWA() || skipFunction(MF.getFunction()))
+  if (!ST.hasSDWA())
     return false;
 
   MRI = &MF.getRegInfo();
@@ -1295,3 +1307,13 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
 
   return Ret;
 }
+
+PreservedAnalyses SIPeepholeSDWAPass::run(MachineFunction &MF,
+                                          MachineFunctionAnalysisManager &) {
+  if (MF.getFunction().hasOptNone() || !SIPeepholeSDWA().run(MF))
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.h b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.h
new file mode 100644
index 00000000000000..8d4a2efd934f37
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.h
@@ -0,0 +1,25 @@
+
+//===--------- SIPeepholeSDWA.h -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIPEEPHOLESDWA_H
+#define LLVM_LIB_TARGET_AMDGPU_SIPEEPHOLESDWA_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class SIPeepholeSDWAPass : public PassInfoMixin<SIPeepholeSDWAPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIPEEPHOLESDWA_H
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
index 4ff43024ae8cca..5e01772cd205f8 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
@@ -1,6 +1,7 @@
 # RUN: llc -mtriple=amdgcn -mcpu=kaveri -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
 
 # GCN-LABEL: {{^}}name: add_shr_i32
 # GCN: [[SMOV:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 123
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
index ef986f8c9d2a36..540fd52ca6747f 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
@@ -1,5 +1,6 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
 
 # test for 3 consecutive _sdwa's
 # GFX9-LABEL: name:            test1_add_co_sdwa
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
index 4ca39ecc7a0aea..8886772f649fbf 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
@@ -1,6 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=SDWA %s
 ---
 name:            add_f16_u32_preserve
 tracksRegLiveness: true

@@ -1,5 +1,6 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better duplicate the line for new subtargets. Here gfx900.

@@ -1235,10 +1240,17 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
}
}

bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
bool SIPeepholeSDWALegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need braces

@@ -0,0 +1,25 @@

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extra blank line here

@arsenm arsenm merged commit e1ee07d into llvm:main Sep 11, 2024
8 checks passed
@optimisan optimisan deleted the port-peephole-sdwa branch September 11, 2024 16:35
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants