Skip to content

AMDGPU/NewPM Port GCNDPPCombine to NPM #105816

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 29, 2024

Conversation

optimisan
Copy link
Contributor

No description provided.

@optimisan optimisan force-pushed the port-gcn-dpp-combine branch from e135548 to d9f944a Compare August 26, 2024 05:51
@optimisan optimisan marked this pull request as ready for review August 26, 2024 05:51
@llvmbot
Copy link
Member

llvmbot commented Aug 26, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (Akshat-Oke)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/105816.diff

10 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+2-2)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-2)
  • (modified) llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp (+37-14)
  • (added) llvm/lib/Target/AMDGPU/GCNDPPCombine.h (+55)
  • (modified) llvm/test/CodeGen/AMDGPU/dpp64_combine.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/dpp_combine.mir (+1)
  • (modified) llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/vopc_dpp.mir (+1-1)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index c50474893eb7d5..ae12244df5cf9a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -157,8 +157,8 @@ struct AMDGPULowerBufferFatPointersPass
 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
 extern char &AMDGPURewriteOutArgumentsID;
 
-void initializeGCNDPPCombinePass(PassRegistry &);
-extern char &GCNDPPCombineID;
+void initializeGCNDPPCombineLegacyPass(PassRegistry &);
+extern char &GCNDPPCombineLegacyID;
 
 void initializeSIFoldOperandsPass(PassRegistry &);
 extern char &SIFoldOperandsID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index d8741b4b06a984..52faf145516bbd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -97,4 +97,5 @@ FUNCTION_PASS_WITH_PARAMS(
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
+MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 #undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 570f089e914699..f5a7f2d7cc48db 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -28,6 +28,7 @@
 #include "AMDGPUTargetObjectFile.h"
 #include "AMDGPUTargetTransformInfo.h"
 #include "AMDGPUUnifyDivergentExitNodes.h"
+#include "GCNDPPCombine.h"
 #include "GCNIterativeScheduler.h"
 #include "GCNSchedStrategy.h"
 #include "GCNVOPDUtils.h"
@@ -396,7 +397,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeR600VectorRegMergerPass(*PR);
   initializeGlobalISel(*PR);
   initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
-  initializeGCNDPPCombinePass(*PR);
+  initializeGCNDPPCombineLegacyPass(*PR);
   initializeSILowerI1CopiesLegacyPass(*PR);
   initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
   initializeSILowerWWMCopiesPass(*PR);
@@ -1260,7 +1261,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
   // XXX - Can we get away without running DeadMachineInstructionElim again?
   addPass(&SIFoldOperandsID);
   if (EnableDPPCombine)
-    addPass(&GCNDPPCombineID);
+    addPass(&GCNDPPCombineLegacyID);
   addPass(&SILoadStoreOptimizerID);
   if (isPassEnabled(EnableSDWAPeephole)) {
     addPass(&SIPeepholeSDWAID);
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 0ac079c69e605f..e167a581bd08ea 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -37,6 +37,7 @@
 // The mov_dpp instruction should reside in the same BB as all its uses
 //===----------------------------------------------------------------------===//
 
+#include "GCNDPPCombine.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -51,7 +52,7 @@ STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
 
 namespace {
 
-class GCNDPPCombine : public MachineFunctionPass {
+class GCNDPPCombine {
   MachineRegisterInfo *MRI;
   const SIInstrInfo *TII;
   const GCNSubtarget *ST;
@@ -76,12 +77,17 @@ class GCNDPPCombine : public MachineFunctionPass {
 
   bool combineDPPMov(MachineInstr &MI) const;
 
+  int getDPPOp(unsigned Op, bool IsShrinkable) const;
+  bool isShrinkable(MachineInstr &MI) const;
+
+public:
+  bool run(MachineFunction &MF);
+};
+class GCNDPPCombineLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  GCNDPPCombine() : MachineFunctionPass(ID) {
-    initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
-  }
+  GCNDPPCombineLegacy() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -96,22 +102,19 @@ class GCNDPPCombine : public MachineFunctionPass {
     return MachineFunctionProperties()
       .set(MachineFunctionProperties::Property::IsSSA);
   }
-
-private:
-  int getDPPOp(unsigned Op, bool IsShrinkable) const;
-  bool isShrinkable(MachineInstr &MI) const;
 };
 
 } // end anonymous namespace
 
-INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
+INITIALIZE_PASS(GCNDPPCombineLegacy, DEBUG_TYPE, "GCN DPP Combine", false,
+                false)
 
-char GCNDPPCombine::ID = 0;
+char GCNDPPCombineLegacy::ID = 0;
 
-char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
+char &llvm::GCNDPPCombineLegacyID = GCNDPPCombineLegacy::ID;
 
 FunctionPass *llvm::createGCNDPPCombinePass() {
-  return new GCNDPPCombine();
+  return new GCNDPPCombineLegacy();
 }
 
 bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
@@ -749,9 +752,17 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
   return !Rollback;
 }
 
-bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
+bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction())) {
+    return false;
+  }
+  GCNDPPCombine Impl;
+  return Impl.run(MF);
+}
+
+bool GCNDPPCombine::run(MachineFunction &MF) {
   ST = &MF.getSubtarget<GCNSubtarget>();
-  if (!ST->hasDPP() || skipFunction(MF.getFunction()))
+  if (!ST->hasDPP())
     return false;
 
   MRI = &MF.getRegInfo();
@@ -781,3 +792,15 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
   }
   return Changed;
 }
+
+PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
+                                         MachineFunctionAnalysisManager &) {
+  GCNDPPCombine Impl;
+  bool Changed = Impl.run(MF);
+  if (!Changed) {
+    return PreservedAnalyses::all();
+  }
+  auto PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
new file mode 100644
index 00000000000000..8caccf119bdb19
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -0,0 +1,55 @@
+//=======- GCNDPPCombine.h - optimization for DPP instructions ---==========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
+// operand. If any of the use instruction cannot be combined with the mov the
+// whole sequence is reverted.
+//
+// $old = ...
+// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
+//                            dpp_controls..., $row_mask, $bank_mask,
+//                            $bound_ctrl
+// $res = VALU $dpp_value [, src1]
+//
+// to
+//
+// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
+//                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
+//
+// Combining rules :
+//
+// if $row_mask and $bank_mask are fully enabled (0xF) and
+//    $bound_ctrl==DPP_BOUND_ZERO or $old==0
+// -> $combined_old = undef,
+//    $combined_bound_ctrl = DPP_BOUND_ZERO
+//
+// if the VALU op is binary and
+//    $bound_ctrl==DPP_BOUND_OFF and
+//    $old==identity value (immediate) for the VALU op
+// -> $combined_old = src1,
+//    $combined_bound_ctrl = DPP_BOUND_OFF
+//
+// Otherwise cancel.
+//
+// The mov_dpp instruction should reside in the same BB as all its uses
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MAM);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index 9a6a54bbc4e497..d16d45eef1e897 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -1,5 +1,5 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
 
 ---
 # GCN-LABEL: name: dpp64_old_impdef
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index a1c3970a5bae90..179d0becf6693a 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
 
 ---
 # old is undefined: only combine when masks are fully enabled and
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 1151bde02ef62c..43355dc694dc62 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,6 +1,6 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
 
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
index 7e286a4dd678eb..324c0037511e1c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -passes=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
 
 ---
 name:            test_cvt_f32_bf8_byte0
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 123893674ff5e9..3c1b3c95513169 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
 
 ---
 

@optimisan optimisan marked this pull request as draft August 27, 2024 06:56
@@ -1,5 +1,5 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Retain the test behavior for legacy pm. Either duplicate an additional line for NPM or add no change. Rectify the same for other tests below.

@optimisan optimisan force-pushed the port-gcn-dpp-combine branch from e43bdb8 to facb98f Compare August 28, 2024 09:04
@optimisan optimisan marked this pull request as ready for review August 28, 2024 09:12
@@ -749,9 +753,16 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
return !Rollback;
}

bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need braces


} // end namespace llvm

#endif
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#endif
#endif // LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think clang-format does this for you

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FixNamespaceComments adds it for namespaces but there's probably nothing for #endifs.

@optimisan optimisan force-pushed the port-gcn-dpp-combine branch from facb98f to eecdcd6 Compare August 29, 2024 06:44
@optimisan optimisan merged commit fdca2c3 into llvm:main Aug 29, 2024
8 checks passed
@optimisan optimisan deleted the port-gcn-dpp-combine branch August 29, 2024 09:19
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants