Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 8fef4bc

Browse files
committed
[AMDGPU] Disassembler: support for DPP
Review: http://reviews.llvm.org/D18642 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265015 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 2693fe3 commit 8fef4bc

File tree

3 files changed

+112
-7
lines changed

3 files changed

+112
-7
lines changed

lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,10 @@ DECODE_OPERAND(SReg_512)
8383
//
8484
//===----------------------------------------------------------------------===//
8585

86-
static inline uint32_t eatB32(ArrayRef<uint8_t>& Bytes) {
87-
assert(Bytes.size() >= sizeof eatB32(Bytes));
88-
const auto Res = support::endian::read32le(Bytes.data());
89-
Bytes = Bytes.slice(sizeof Res);
86+
template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
87+
assert(Bytes.size() >= sizeof(T));
88+
const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
89+
Bytes = Bytes.slice(sizeof(T));
9090
return Res;
9191
}
9292

@@ -123,16 +123,28 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
123123
do {
124124
// ToDo: better to switch encoding length using some bit predicate
125125
// but it is unknown yet, so try all we can
126+
127+
// Try to decode DPP first to solve conflict with VOP1 and VOP2 encodings
128+
if (Bytes.size() >= 8) {
129+
const uint64_t QW = eatBytes<uint64_t>(Bytes);
130+
Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
131+
if (Res) break;
132+
}
133+
134+
// Reinitialize Bytes as DPP64 could have eaten too much
135+
Bytes = Bytes_.slice(0, MaxInstBytesNum);
136+
137+
// Try decode 32-bit instruction
126138
if (Bytes.size() < 4) break;
127-
const uint32_t DW = eatB32(Bytes);
139+
const uint32_t DW = eatBytes<uint32_t>(Bytes);
128140
Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address);
129141
if (Res) break;
130142

131143
Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
132144
if (Res) break;
133145

134146
if (Bytes.size() < 4) break;
135-
const uint64_t QW = ((uint64_t)eatB32(Bytes) << 32) | DW;
147+
const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
136148
Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address);
137149
if (Res) break;
138150

@@ -261,7 +273,7 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
261273
if (Bytes.size() < 4)
262274
return errOperand(0, "cannot read literal, inst bytes left " +
263275
Twine(Bytes.size()));
264-
return MCOperand::createImm(eatB32(Bytes));
276+
return MCOperand::createImm(eatBytes<uint32_t>(Bytes));
265277
}
266278

267279
MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {

lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1705,6 +1705,8 @@ class VOP1_DPP <vop1 op, string opName, VOPProfile p> :
17051705
VOP1_DPPe <op.VI>,
17061706
VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
17071707
let AssemblerPredicates = [isVI];
1708+
let DecoderNamespace = "DPP";
1709+
let DisableDecoder = DisableVIDecoder;
17081710
let src0_modifiers = !if(p.HasModifiers, ?, 0);
17091711
let src1_modifiers = 0;
17101712
}
@@ -1767,6 +1769,8 @@ class VOP2_DPP <vop2 op, string opName, VOPProfile p> :
17671769
VOP2_DPPe <op.VI>,
17681770
VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
17691771
let AssemblerPredicates = [isVI];
1772+
let DecoderNamespace = "DPP";
1773+
let DisableDecoder = DisableVIDecoder;
17701774
let src0_modifiers = !if(p.HasModifiers, ?, 0);
17711775
let src1_modifiers = !if(p.HasModifiers, ?, 0);
17721776
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI
2+
3+
# VI: v_mov_b32_dpp v0, v0 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x58,0x00,0xff]
4+
0xfa 0x02 0x00 0x7e 0x00 0x58 0x00 0xff
5+
6+
# VI: v_mov_b32_dpp v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x01,0xff]
7+
0xfa 0x02 0x00 0x7e 0x00 0x01 0x01 0xff
8+
9+
# VI: v_mov_b32_dpp v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x1f,0x01,0xff]
10+
0xfa 0x02 0x00 0x7e 0x00 0x1f 0x01 0xff
11+
12+
# VI: v_mov_b32_dpp v0, v0 row_ror:12 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x2c,0x01,0xff]
13+
0xfa 0x02 0x00 0x7e 0x00 0x2c 0x01 0xff
14+
15+
# VI: v_mov_b32_dpp v0, v0 wave_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x30,0x01,0xff]
16+
0xfa 0x02 0x00 0x7e 0x00 0x30 0x01 0xff
17+
18+
# VI: v_mov_b32_dpp v0, v0 wave_rol:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x34,0x01,0xff]
19+
0xfa 0x02 0x00 0x7e 0x00 0x34 0x01 0xff
20+
21+
# VI: v_mov_b32_dpp v0, v0 wave_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x38,0x01,0xff]
22+
0xfa 0x02 0x00 0x7e 0x00 0x38 0x01 0xff
23+
24+
# VI: v_mov_b32_dpp v0, v0 wave_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x3c,0x01,0xff]
25+
0xfa 0x02 0x00 0x7e 0x00 0x3c 0x01 0xff
26+
27+
# VI: v_mov_b32_dpp v0, v0 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x40,0x01,0xff]
28+
0xfa 0x02 0x00 0x7e 0x00 0x40 0x01 0xff
29+
30+
# VI: v_mov_b32_dpp v0, v0 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x41,0x01,0xff]
31+
0xfa 0x02 0x00 0x7e 0x00 0x41 0x01 0xff
32+
33+
# VI: v_mov_b32_dpp v0, v0 row_bcast:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x42,0x01,0xff]
34+
0xfa 0x02 0x00 0x7e 0x00 0x42 0x01 0xff
35+
36+
# VI: v_mov_b32_dpp v0, v0 row_bcast:31 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x43,0x01,0xff]
37+
0xfa 0x02 0x00 0x7e 0x00 0x43 0x01 0xff
38+
39+
# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xa1]
40+
0xfa 0x02 0x00 0x7e 0x00 0x4d 0x08 0xa1
41+
42+
# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xaf]
43+
0xfa 0x02 0x00 0x7e 0x00 0x4d 0x00 0xaf
44+
45+
# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xf1]
46+
0xfa 0x02 0x00 0x7e 0x00 0x4d 0x00 0xf1
47+
48+
# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xff]
49+
0xfa 0x02 0x00 0x7e 0x00 0x4d 0x08 0xff
50+
51+
# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xa1]
52+
0xfa 0x02 0x00 0x7e 0x00 0x4d 0x00 0xa1
53+
54+
# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xaf]
55+
0xfa 0x02 0x00 0x7e 0x00 0x4d 0x08 0xaf
56+
57+
# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xf1]
58+
0xfa 0x02 0x00 0x7e 0x00 0x4d 0x08 0xf1
59+
60+
# VI: v_cvt_u32_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0e,0x00,0x7e,0x00,0x01,0x09,0xa1]
61+
0xfa 0x0e 0x00 0x7e 0x00 0x01 0x09 0xa1
62+
63+
# VI: v_fract_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x36,0x00,0x7e,0x00,0x01,0x09,0xa1]
64+
0xfa 0x36 0x00 0x7e 0x00 0x01 0x09 0xa1
65+
66+
# VI: v_sin_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x52,0x00,0x7e,0x00,0x01,0x09,0xa1]
67+
0xfa 0x52 0x00 0x7e 0x00 0x01 0x09 0xa1
68+
69+
# VI: v_add_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x09,0xa1]
70+
0xfa 0x00 0x00 0x02 0x00 0x01 0x09 0xa1
71+
72+
# VI: v_min_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x14,0x00,0x01,0x09,0xa1]
73+
0xfa 0x00 0x00 0x14 0x00 0x01 0x09 0xa1
74+
75+
# VI: v_and_b32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x26,0x00,0x01,0x09,0xa1]
76+
0xfa 0x00 0x00 0x26 0x00 0x01 0x09 0xa1
77+
78+
# VI: v_add_f32_dpp v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x19,0xa1]
79+
0xfa 0x00 0x00 0x02 0x00 0x01 0x19 0xa1
80+
81+
# VI: v_add_f32_dpp v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x89,0xa1]
82+
0xfa 0x00 0x00 0x02 0x00 0x01 0x89 0xa1
83+
84+
# VI: v_add_f32_dpp v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x99,0xa1]
85+
0xfa 0x00 0x00 0x02 0x00 0x01 0x99 0xa1
86+
87+
# VI: v_add_f32_dpp v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x69,0xa1]
88+
89+
0xfa 0x00 0x00 0x02 0x00 0x01 0x69 0xa1

0 commit comments

Comments
 (0)