Skip to content

Commit 99c1c83

Browse files
shiltiansearlmc1
authored andcommitted
[AMDGPU][AsmParser] Allow v_writelane_b32 to use SGPR and M0 as source operands at the same time (llvm#78827)
Currently the asm parser takes `v_writelane_b32 v1, s13, m0` as illegal instruction for pre-gfx11 because it uses two constant buses while the hardware can only allow one. However, based on the comment of `AMDGPUInstructionSelector::selectWritelane`, it is allowed to have M0 as lane selector and a SGPR used as SRC0 because the lane selector doesn't count as a use of constant bus. In fact, codegen can already generate this form, but this inconsistency is not exposed because the validation of constant bus limitation only happens when paring an assembly but we don't have a test case when both SGPR and M0 used as source operands for the instruction. Change-Id: I288a92b8ae6305c4c9a7ba51f4ddba4623fc899f
1 parent 7b7572f commit 99c1c83

File tree

3 files changed

+55
-0
lines changed

3 files changed

+55
-0
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3541,6 +3541,24 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
35413541
}
35423542
}
35433543

3544+
// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3545+
// Writelane is special in that it can use SGPR and M0 (which would normally
3546+
// count as using the constant bus twice - but in this case it is allowed since
3547+
// the lane selector doesn't count as a use of the constant bus). However, it is
3548+
// still required to abide by the 1 SGPR rule.
3549+
static bool checkWriteLane(const MCInst &Inst) {
3550+
const unsigned Opcode = Inst.getOpcode();
3551+
if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3552+
return false;
3553+
const MCOperand &LaneSelOp = Inst.getOperand(2);
3554+
if (!LaneSelOp.isReg())
3555+
return false;
3556+
auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3557+
if (LaneSelReg == M0 || LaneSelReg == M0_gfxpre11)
3558+
return true;
3559+
return false;
3560+
}
3561+
35443562
bool AMDGPUAsmParser::validateConstantBusLimitations(
35453563
const MCInst &Inst, const OperandVector &Operands) {
35463564
const unsigned Opcode = Inst.getOpcode();
@@ -3556,6 +3574,9 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
35563574
!isVOPD(Opcode))
35573575
return true;
35583576

3577+
if (checkWriteLane(Inst))
3578+
return true;
3579+
35593580
// Check special imm operands (used by madmk, etc)
35603581
if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
35613582
++NumLiterals;

llvm/test/MC/AMDGPU/writelane_m0.s

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx600 -show-encoding %s | FileCheck %s -check-prefix=GFX6
2+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx700 -show-encoding %s | FileCheck %s -check-prefix=GFX7
3+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx904 -show-encoding %s | FileCheck %s -check-prefix=GFX9
4+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx940 -show-encoding %s | FileCheck %s -check-prefix=GFX9
5+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1010 -show-encoding %s | FileCheck %s -check-prefix=GFX10
6+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1030 -show-encoding %s | FileCheck %s -check-prefix=GFX10
7+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1100 -show-encoding %s | FileCheck %s -check-prefix=GFX11
8+
// RUN: llvm-mc --triple=amdgcn --mcpu=gfx1200 -show-encoding %s | FileCheck %s -check-prefix=GFX12
9+
10+
.text
11+
v_writelane_b32 v1, s13, m0
12+
13+
// GFX6: v_writelane_b32 v1, s13, m0 ; encoding: [0x0d,0xf8,0x02,0x04]
14+
// GFX7: v_writelane_b32 v1, s13, m0 ; encoding: [0x0d,0xf8,0x02,0x04]
15+
// GFX9: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x8a,0xd2,0x0d,0xf8,0x00,0x00]
16+
// GFX10: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x61,0xd7,0x0d,0xf8,0x00,0x00]
17+
// GFX11: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x61,0xd7,0x0d,0xfa,0x00,0x00]
18+
// GFX12: v_writelane_b32 v1, s13, m0 ; encoding: [0x01,0x00,0x61,0xd7,0x0d,0xfa,0x00,0x00]
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
4+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
5+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
6+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
7+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
8+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s
9+
10+
---
11+
12+
name: writelane_m0
13+
body: |
14+
bb.0:
15+
; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr0, $m0, $vgpr0
16+
$vgpr0 = V_WRITELANE_B32 $sgpr0, $m0, $vgpr0

0 commit comments

Comments
 (0)