Skip to content

Commit 6a21e00

Browse files
authored
[AMDGPU][AsmParser] Allow v_writelane_b32 to use SGPR and M0 as source operands at the same time (#78827)
Currently the asm parser takes `v_writelane_b32 v1, s13, m0` as illegal instruction for pre-gfx11 because it uses two constant buses while the hardware can only allow one. However, based on the comment of `AMDGPUInstructionSelector::selectWritelane`, it is allowed to have M0 as lane selector and a SGPR used as SRC0 because the lane selector doesn't count as a use of constant bus. In fact, codegen can already generate this form, but this inconsistency is not exposed because the validation of constant bus limitation only happens when paring an assembly but we don't have a test case when both SGPR and M0 used as source operands for the instruction.
1 parent 30b9140 commit 6a21e00

File tree

3 files changed

+55
-0
lines changed

3 files changed

+55
-0
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3514,6 +3514,24 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
35143514
}
35153515
}
35163516

3517+
// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3518+
// Writelane is special in that it can use SGPR and M0 (which would normally
3519+
// count as using the constant bus twice - but in this case it is allowed since
3520+
// the lane selector doesn't count as a use of the constant bus). However, it is
3521+
// still required to abide by the 1 SGPR rule.
3522+
static bool checkWriteLane(const MCInst &Inst) {
3523+
const unsigned Opcode = Inst.getOpcode();
3524+
if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3525+
return false;
3526+
const MCOperand &LaneSelOp = Inst.getOperand(2);
3527+
if (!LaneSelOp.isReg())
3528+
return false;
3529+
auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3530+
if (LaneSelReg == M0 || LaneSelReg == M0_gfxpre11)
3531+
return true;
3532+
return false;
3533+
}
3534+
35173535
bool AMDGPUAsmParser::validateConstantBusLimitations(
35183536
const MCInst &Inst, const OperandVector &Operands) {
35193537
const unsigned Opcode = Inst.getOpcode();
@@ -3529,6 +3547,9 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
35293547
!isVOPD(Opcode))
35303548
return true;
35313549

3550+
if (checkWriteLane(Inst))
3551+
return true;
3552+
35323553
// Check special imm operands (used by madmk, etc)
35333554
if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
35343555
++NumLiterals;

llvm/test/MC/AMDGPU/writelane_m0.s

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx600 -show-encoding %s | FileCheck %s -check-prefix=GFX6
2+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx700 -show-encoding %s | FileCheck %s -check-prefix=GFX7
3+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx904 -show-encoding %s | FileCheck %s -check-prefix=GFX9
4+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx940 -show-encoding %s | FileCheck %s -check-prefix=GFX9
5+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1010 -show-encoding %s | FileCheck %s -check-prefix=GFX10
6+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1030 -show-encoding %s | FileCheck %s -check-prefix=GFX10
7+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1100 -show-encoding %s | FileCheck %s -check-prefix=GFX11
8+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1200 -show-encoding %s | FileCheck %s -check-prefix=GFX12
9+
10+
.text
11+
v_writelane_b32 v1, s13, m0
12+
13+
// GFX6: v_writelane_b32 v1, s13, m0 ; encoding: [0x0d,0xf8,0x02,0x04]
14+
// GFX7: v_writelane_b32 v1, s13, m0 ; encoding: [0x0d,0xf8,0x02,0x04]
15+
// GFX9: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x8a,0xd2,0x0d,0xf8,0x00,0x00]
16+
// GFX10: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x61,0xd7,0x0d,0xf8,0x00,0x00]
17+
// GFX11: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x61,0xd7,0x0d,0xfa,0x00,0x00]
18+
// GFX12: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x61,0xd7,0x0d,0xfa,0x00,0x00]
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
4+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
5+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
6+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
7+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
8+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
9+
10+
---
11+
12+
name: writelane_m0
13+
body: |
14+
bb.0:
15+
; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr0, $m0, $vgpr0
16+
$vgpr0 = V_WRITELANE_B32 $sgpr0, $m0, $vgpr0

0 commit comments

Comments
 (0)