Skip to content

Commit 31f215a

Browse files
committed
[AMDGPU] Support v_mov_b64 in dpp combine
Differential Revision: https://reviews.llvm.org/D121411
1 parent 6181458 commit 31f215a

File tree

4 files changed

+19
-3
lines changed

4 files changed

+19
-3
lines changed

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,9 @@ MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
167167
return nullptr;
168168
case AMDGPU::COPY:
169169
case AMDGPU::V_MOV_B32_e32:
170-
case AMDGPU::V_MOV_B64_PSEUDO: {
170+
case AMDGPU::V_MOV_B64_PSEUDO:
171+
case AMDGPU::V_MOV_B64_e32:
172+
case AMDGPU::V_MOV_B64_e64: {
171173
auto &Op1 = Def->getOperand(1);
172174
if (Op1.isImm())
173175
return &Op1;
@@ -183,6 +185,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
183185
bool CombBCZ,
184186
bool IsShrinkable) const {
185187
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
188+
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
186189
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
187190

188191
auto OrigOp = OrigMI.getOpcode();
@@ -383,6 +386,7 @@ bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
383386

384387
bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
385388
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
389+
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
386390
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
387391
LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
388392

@@ -399,7 +403,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
399403
return false;
400404
}
401405

402-
if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
406+
if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
407+
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
403408
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
404409
assert(DppCtrl && DppCtrl->isImm());
405410
if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) {
@@ -616,7 +621,8 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
616621
if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
617622
Changed = true;
618623
++NumDPPMovsCombined;
619-
} else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
624+
} else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
625+
MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
620626
if (ST->has64BitDPP() && combineDPPMov(MI)) {
621627
Changed = true;
622628
++NumDPPMovsCombined;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2157,6 +2157,13 @@ std::pair<MachineInstr*, MachineInstr*>
21572157
SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
21582158
assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
21592159

2160+
if (ST.hasMovB64() &&
2161+
AMDGPU::isLegal64BitDPPControl(
2162+
getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
2163+
MI.setDesc(get(AMDGPU::V_MOV_B64_dpp));
2164+
return std::make_pair(&MI, nullptr);
2165+
}
2166+
21602167
MachineBasicBlock &MBB = *MI.getParent();
21612168
DebugLoc DL = MBB.findDebugLoc(MI);
21622169
MachineFunction *MF = MBB.getParent();

llvm/test/CodeGen/AMDGPU/dpp64_combine.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A
2+
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64
23
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10
34

45
; GCN-LABEL: {{^}}dpp64_ceil:
@@ -50,6 +51,7 @@ define amdgpu_kernel void @dpp64_rcp_unsupported_ctl(i64 addrspace(1)* %arg, i64
5051

5152
; GCN-LABEL: {{^}}dpp64_div:
5253
; GCN: global_load_dwordx2 [[V:v\[[0-9:]+\]]],
54+
; DPPMOV64: v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
5355
; GFX90A-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
5456
; GFX10-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
5557
; GCN: v_div_scale_f64

llvm/test/CodeGen/AMDGPU/dpp64_combine.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
2+
# RUN: llc -march=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
23

34
---
45
# GCN-LABEL: name: dpp64_old_impdef

0 commit comments

Comments
 (0)