Skip to content

Commit e6d16f9

Browse files
authored
[AMDGPU] Allow unaligned VGPR for ds_read_b96_tr_b6 (llvm#125169)
All load transpose instructions follow gfx950 standard of even aligned VGPR except ds_read_b96_tr_b6, which allows unaligned VGPR. Co-authored-by: Sirish Pande [[email protected]](mailto:[email protected])
1 parent 51b1230 commit e6d16f9

File tree

4 files changed

+18
-5
lines changed

4 files changed

+18
-5
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4853,7 +4853,10 @@ bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
48534853

48544854
bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
48554855
auto FB = getFeatureBits();
4856-
if (!FB[AMDGPU::FeatureGFX90AInsts])
4856+
unsigned Opc = Inst.getOpcode();
4857+
// DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
4858+
// unaligned VGPR. All others only allow even aligned VGPRs.
4859+
if (!(FB[AMDGPU::FeatureGFX90AInsts]) || Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
48574860
return true;
48584861

48594862
const MCRegisterInfo *MRI = getMRI();

llvm/test/MC/AMDGPU/gfx950-unsupported.s

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,6 @@ ds_read_b64_tr_b16 v[2:3], v2 offset:-64
239239
//===----------------------------------------------------------------------===//
240240
// ds_read_b96_tr_b6
241241
//===----------------------------------------------------------------------===//
242-
ds_read_b96_tr_b6 v[1:3], v0
243-
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
244-
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
245-
246242
ds_read_b96_tr_b6 v1, v0
247243
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
248244
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx950_asm_read_tr.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,11 @@ ds_read_b96_tr_b6 v[0:2], v0
3232
ds_read_b96_tr_b6 v[2:4], v2 offset:64
3333
// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
3434
// GFX950: encoding: [0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x02]
35+
36+
ds_read_b96_tr_b6 v[1:3], v0
37+
// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
38+
// GFX950: encoding: [0x00,0x00,0xc2,0xd9,0x00,0x00,0x00,0x01]
39+
40+
ds_read_b96_tr_b6 v[1:3], v2 offset:64
41+
// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
42+
// GFX950: encoding: [0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x01]

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_ds_read_tr.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,9 @@
3535

3636
# GFX950: ds_read_b96_tr_b6 v[2:4], v2 offset:64 ; encoding: [0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x02]
3737
0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x02
38+
39+
# GFX950: ds_read_b96_tr_b6 v[1:3], v0 ; encoding: [0x00,0x00,0xc2,0xd9,0x00,0x00,0x00,0x01]
40+
0x00,0x00,0xc2,0xd9,0x00,0x00,0x00,0x01
41+
42+
# GFX950: ds_read_b96_tr_b6 v[1:3], v2 offset:64 ; encoding: [0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x01]
43+
0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x01

0 commit comments

Comments
 (0)