AMDGPU/NewPM Port GCNDPPCombine to NPM #105816

optimisan · 2024-08-23T11:40:28Z

No description provided.

llvmbot · 2024-08-26T05:52:24Z

@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (Akshat-Oke)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/105816.diff

10 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+2-2)
(modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1)
(modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-2)
(modified) llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp (+37-14)
(added) llvm/lib/Target/AMDGPU/GCNDPPCombine.h (+55)
(modified) llvm/test/CodeGen/AMDGPU/dpp64_combine.mir (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/dpp_combine.mir (+1)
(modified) llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/vopc_dpp.mir (+1-1)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index c50474893eb7d5..ae12244df5cf9a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -157,8 +157,8 @@ struct AMDGPULowerBufferFatPointersPass
 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
 extern char &AMDGPURewriteOutArgumentsID;
 
-void initializeGCNDPPCombinePass(PassRegistry &);
-extern char &GCNDPPCombineID;
+void initializeGCNDPPCombineLegacyPass(PassRegistry &);
+extern char &GCNDPPCombineLegacyID;
 
 void initializeSIFoldOperandsPass(PassRegistry &);
 extern char &SIFoldOperandsID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index d8741b4b06a984..52faf145516bbd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -97,4 +97,5 @@ FUNCTION_PASS_WITH_PARAMS(
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
+MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 #undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 570f089e914699..f5a7f2d7cc48db 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -28,6 +28,7 @@
 #include "AMDGPUTargetObjectFile.h"
 #include "AMDGPUTargetTransformInfo.h"
 #include "AMDGPUUnifyDivergentExitNodes.h"
+#include "GCNDPPCombine.h"
 #include "GCNIterativeScheduler.h"
 #include "GCNSchedStrategy.h"
 #include "GCNVOPDUtils.h"
@@ -396,7 +397,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeR600VectorRegMergerPass(*PR);
   initializeGlobalISel(*PR);
   initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
-  initializeGCNDPPCombinePass(*PR);
+  initializeGCNDPPCombineLegacyPass(*PR);
   initializeSILowerI1CopiesLegacyPass(*PR);
   initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
   initializeSILowerWWMCopiesPass(*PR);
@@ -1260,7 +1261,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
   // XXX - Can we get away without running DeadMachineInstructionElim again?
   addPass(&SIFoldOperandsID);
   if (EnableDPPCombine)
-    addPass(&GCNDPPCombineID);
+    addPass(&GCNDPPCombineLegacyID);
   addPass(&SILoadStoreOptimizerID);
   if (isPassEnabled(EnableSDWAPeephole)) {
     addPass(&SIPeepholeSDWAID);
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 0ac079c69e605f..e167a581bd08ea 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -37,6 +37,7 @@
 // The mov_dpp instruction should reside in the same BB as all its uses
 //===----------------------------------------------------------------------===//
 
+#include "GCNDPPCombine.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -51,7 +52,7 @@ STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
 
 namespace {
 
-class GCNDPPCombine : public MachineFunctionPass {
+class GCNDPPCombine {
   MachineRegisterInfo *MRI;
   const SIInstrInfo *TII;
   const GCNSubtarget *ST;
@@ -76,12 +77,17 @@ class GCNDPPCombine : public MachineFunctionPass {
 
   bool combineDPPMov(MachineInstr &MI) const;
 
+  int getDPPOp(unsigned Op, bool IsShrinkable) const;
+  bool isShrinkable(MachineInstr &MI) const;
+
+public:
+  bool run(MachineFunction &MF);
+};
+class GCNDPPCombineLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  GCNDPPCombine() : MachineFunctionPass(ID) {
-    initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
-  }
+  GCNDPPCombineLegacy() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -96,22 +102,19 @@ class GCNDPPCombine : public MachineFunctionPass {
     return MachineFunctionProperties()
       .set(MachineFunctionProperties::Property::IsSSA);
   }
-
-private:
-  int getDPPOp(unsigned Op, bool IsShrinkable) const;
-  bool isShrinkable(MachineInstr &MI) const;
 };
 
 } // end anonymous namespace
 
-INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
+INITIALIZE_PASS(GCNDPPCombineLegacy, DEBUG_TYPE, "GCN DPP Combine", false,
+                false)
 
-char GCNDPPCombine::ID = 0;
+char GCNDPPCombineLegacy::ID = 0;
 
-char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
+char &llvm::GCNDPPCombineLegacyID = GCNDPPCombineLegacy::ID;
 
 FunctionPass *llvm::createGCNDPPCombinePass() {
-  return new GCNDPPCombine();
+  return new GCNDPPCombineLegacy();
 }
 
 bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
@@ -749,9 +752,17 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
   return !Rollback;
 }
 
-bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
+bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction())) {
+    return false;
+  }
+  GCNDPPCombine Impl;
+  return Impl.run(MF);
+}
+
+bool GCNDPPCombine::run(MachineFunction &MF) {
   ST = &MF.getSubtarget<GCNSubtarget>();
-  if (!ST->hasDPP() || skipFunction(MF.getFunction()))
+  if (!ST->hasDPP())
     return false;
 
   MRI = &MF.getRegInfo();
@@ -781,3 +792,15 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
   }
   return Changed;
 }
+
+PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
+                                         MachineFunctionAnalysisManager &) {
+  GCNDPPCombine Impl;
+  bool Changed = Impl.run(MF);
+  if (!Changed) {
+    return PreservedAnalyses::all();
+  }
+  auto PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
new file mode 100644
index 00000000000000..8caccf119bdb19
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -0,0 +1,55 @@
+//=======- GCNDPPCombine.h - optimization for DPP instructions ---==========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
+// operand. If any of the use instruction cannot be combined with the mov the
+// whole sequence is reverted.
+//
+// $old = ...
+// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
+//                            dpp_controls..., $row_mask, $bank_mask,
+//                            $bound_ctrl
+// $res = VALU $dpp_value [, src1]
+//
+// to
+//
+// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
+//                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
+//
+// Combining rules :
+//
+// if $row_mask and $bank_mask are fully enabled (0xF) and
+//    $bound_ctrl==DPP_BOUND_ZERO or $old==0
+// -> $combined_old = undef,
+//    $combined_bound_ctrl = DPP_BOUND_ZERO
+//
+// if the VALU op is binary and
+//    $bound_ctrl==DPP_BOUND_OFF and
+//    $old==identity value (immediate) for the VALU op
+// -> $combined_old = src1,
+//    $combined_bound_ctrl = DPP_BOUND_OFF
+//
+// Otherwise cancel.
+//
+// The mov_dpp instruction should reside in the same BB as all its uses
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MAM);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index 9a6a54bbc4e497..d16d45eef1e897 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -1,5 +1,5 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
 
 ---
 # GCN-LABEL: name: dpp64_old_impdef
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index a1c3970a5bae90..179d0becf6693a 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
 
 ---
 # old is undefined: only combine when masks are fully enabled and
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 1151bde02ef62c..43355dc694dc62 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,6 +1,6 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
 
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
index 7e286a4dd678eb..324c0037511e1c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -passes=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
 
 ---
 name:            test_cvt_f32_bf8_byte0
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 123893674ff5e9..3c1b3c95513169 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
 
 ---

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

cdevadas · 2024-08-27T09:49:37Z

llvm/test/CodeGen/AMDGPU/dpp64_combine.mir

@@ -1,5 +1,5 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN


Retain the test behavior for legacy pm. Either duplicate an additional line for NPM or add no change. Rectify the same for other tests below.

llvm/lib/Target/AMDGPU/GCNDPPCombine.h

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

llvm/lib/Target/AMDGPU/GCNDPPCombine.h

arsenm · 2024-08-28T13:46:28Z

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

@@ -749,9 +753,16 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
  return !Rollback;
 }

-bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
+bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction())) {


Don't need braces

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

cdevadas · 2024-08-28T14:00:16Z

llvm/lib/Target/AMDGPU/GCNDPPCombine.h

+
+} // end namespace llvm
+
+#endif


Suggested change

#endif

#endif // LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H

I think clang-format does this for you

FixNamespaceComments adds it for namespaces but there's probably nothing for #endifs.

optimisan force-pushed the port-gcn-dpp-combine branch from e135548 to d9f944a Compare August 26, 2024 05:51

optimisan marked this pull request as ready for review August 26, 2024 05:51

llvmbot added the backend:AMDGPU label Aug 26, 2024

optimisan requested review from cdevadas, arsenm, MaskRay and Pierre-vh August 26, 2024 06:06

optimisan marked this pull request as draft August 27, 2024 06:56

cdevadas reviewed Aug 27, 2024

View reviewed changes

pravinjagtap reviewed Aug 27, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/GCNDPPCombine.h Show resolved Hide resolved

pravinjagtap reviewed Aug 27, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp Show resolved Hide resolved

arsenm reviewed Aug 27, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/GCNDPPCombine.h Outdated Show resolved Hide resolved

optimisan force-pushed the port-gcn-dpp-combine branch from e43bdb8 to facb98f Compare August 28, 2024 09:04

optimisan marked this pull request as ready for review August 28, 2024 09:12

arsenm approved these changes Aug 28, 2024

View reviewed changes

arsenm requested review from aeubanks and paperchalice August 28, 2024 13:46

cdevadas reviewed Aug 28, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp Show resolved Hide resolved

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp Outdated Show resolved Hide resolved

cdevadas reviewed Aug 28, 2024

View reviewed changes

optimisan and others added 5 commits August 29, 2024 06:43

AMDGPU/NewPM Port GCNDPPCombine to NPM

28fee48

Replace -run-pass with -passes in all but one test

51eb2d1

newlines and PassInfoMixin

b3354db

Apply suggestions

59d603d

Add MFPropsModifier and apply suggestions

eecdcd6

optimisan force-pushed the port-gcn-dpp-combine branch from facb98f to eecdcd6 Compare August 29, 2024 06:44

cdevadas approved these changes Aug 29, 2024

View reviewed changes

optimisan merged commit fdca2c3 into llvm:main Aug 29, 2024
8 checks passed

optimisan deleted the port-gcn-dpp-combine branch August 29, 2024 09:19

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AMDGPU/NewPM Port GCNDPPCombine to NPM #105816

AMDGPU/NewPM Port GCNDPPCombine to NPM #105816

Uh oh!

optimisan commented Aug 23, 2024

Uh oh!

llvmbot commented Aug 26, 2024

Uh oh!

Uh oh!

Uh oh!

Uh oh!

cdevadas Aug 27, 2024

Uh oh!

Uh oh!

Uh oh!

Uh oh!

arsenm Aug 28, 2024

Uh oh!

Uh oh!

Uh oh!

cdevadas Aug 28, 2024

Uh oh!

arsenm Aug 28, 2024

Uh oh!

optimisan Aug 29, 2024

Uh oh!

Uh oh!

Uh oh!

AMDGPU/NewPM Port GCNDPPCombine to NPM #105816

AMDGPU/NewPM Port GCNDPPCombine to NPM #105816

Uh oh!

Conversation

optimisan commented Aug 23, 2024

Uh oh!

llvmbot commented Aug 26, 2024

Uh oh!

Uh oh!

Uh oh!

Uh oh!

cdevadas Aug 27, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

arsenm Aug 28, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

cdevadas Aug 28, 2024

Choose a reason for hiding this comment

Uh oh!

arsenm Aug 28, 2024

Choose a reason for hiding this comment

Uh oh!

optimisan Aug 29, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!