Skip to content

[AMDGPU][True16][MC] Support v_swap_b16. #100442

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,12 @@ def VOPDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
}

// Source-encoded destination operand for instructions like v_swap_b16.
def VOPSrcEncodedDstOperand_t16Lo128 : VOPDstOperand <VGPR_16_Lo128> {
let EncoderMethod = VSrcT_b16_Lo128.EncoderMethod;
let DecoderMethod = VSrcT_b16_Lo128.DecoderMethod;
}

class VINTRPe <bits<2> op> : Enc32 {
bits<8> vdst;
bits<8> vsrc;
Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,21 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1
let isAsCheapAsAMove = 1;
}

def VOP_SWAP_I16 : VOPProfile_True16<VOP_I16_I16> {
let Outs32 = (outs VOPDstOperand_t16Lo128:$vdst,
VOPSrcEncodedDstOperand_t16Lo128:$vdst1);
let Ins32 = (ins VOPSrcEncodedDstOperand_t16Lo128:$src0,
VOPDstOperand_t16Lo128:$src1);
let Asm32 = " $vdst, $src0";
}

let SubtargetPredicate = isGFX11Plus in {
def V_SWAP_B16 : VOP1_Pseudo<"v_swap_b16", VOP_SWAP_I16, [], /* VOP1Only= */true> {
let Constraints = "$vdst = $src1, $vdst1 = $src0";
let DisableEncoding = "$vdst1, $src1";
let SchedRW = [Write64Bit, Write64Bit];
let True16Predicate = UseRealTrue16Insts;
}
// Restrict src0 to be VGPR
def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
[], /*VOP1Only=*/ 1> {
Expand Down Expand Up @@ -953,7 +967,8 @@ defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a,
"V_FFBL_B32", "v_ctz_i32_b32">;
defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
"V_FFBH_I32", "v_cls_i32">;
defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
defm V_SWAP_B16 : VOP1Only_Real_gfx11_gfx12<0x066>;
defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/MC/AMDGPU/gfx10_unsupported.s
Original file line number Diff line number Diff line change
Expand Up @@ -3287,6 +3287,9 @@ v_subrev_u32_e64 v255, s[12:13], v1, v2
v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_swap_b16 v0.l, v0.l
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_wmma_bf16_16x16x16_bf16 v[16:19], 1.0, v[8:15], v[16:19]
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

Expand Down
9 changes: 9 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -3448,6 +3448,15 @@ v_sqrt_f64 v[5:6], src_scc
v_sqrt_f64 v[254:255], 0xaf123456
// GFX11: encoding: [0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf]

v_swap_b16 v5.l, v1.h
// GFX11: encoding: [0x81,0xcd,0x0a,0x7e]

v_swap_b16 v5.h, v1.l
// GFX11: encoding: [0x01,0xcd,0x0a,0x7f]

v_swap_b16 v127.l, v127.l
// GFX11: encoding: [0x7f,0xcd,0xfe,0x7e]

v_swap_b32 v5, v1
// GFX11: encoding: [0x01,0xcb,0x0a,0x7e]

Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,18 @@ v_sqrt_f16_e32 v255.l, v1.l
v_sqrt_f16_e32 v5.l, v199.l
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_swap_b16_e32 v128.l, v0.l
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_swap_b16_e32 v0.l, s0
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_swap_b16_e32 v0.l, 0
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_swap_b16_e32 v0.l, 0xfe0b
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_trunc_f16_e32 v128, 0xfe0b
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode

Expand Down
9 changes: 9 additions & 0 deletions llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -3460,6 +3460,15 @@ v_sqrt_f64 v[5:6], src_scc
v_sqrt_f64 v[254:255], 0xaf123456
// GFX12: v_sqrt_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf]

v_swap_b16 v5.l, v1.h
// GFX12: v_swap_b16 v5.l, v1.h ; encoding: [0x81,0xcd,0x0a,0x7e]

v_swap_b16 v5.h, v1.l
// GFX12: v_swap_b16 v5.h, v1.l ; encoding: [0x01,0xcd,0x0a,0x7f]

v_swap_b16 v127.l, v127.l
// GFX12: v_swap_b16 v127.l, v127.l ; encoding: [0x7f,0xcd,0xfe,0x7e]

v_swap_b32 v5, v1
// GFX12: v_swap_b32 v5, v1 ; encoding: [0x01,0xcb,0x0a,0x7e]

Expand Down
15 changes: 15 additions & 0 deletions llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,21 @@ v_sqrt_f16_e32 v255, v1
v_sqrt_f16_e32 v5, v199
// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode

v_swap_b16_e32 v128.l, v0.l
// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_swap_b16_e32 v0.l, v255.l
// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_swap_b16_e32 v0.l, s0
// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_swap_b16_e32 v0.l, 0
// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_swap_b16_e32 v0.l, 0xfe0b
// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_trunc_f16_e32 v128, 0xfe0b
// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode

Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@
# W64: [[@LINE+1]]:1: warning: invalid instruction encoding
0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf

# GFX11: v_swap_b16 v5.h, s1/*Invalid register, operand has 'VGPR_16_Lo128' register class*/ ; encoding: [0x01,0xcc,0x0a,0x7f]
# GFX12: v_swap_b16 v5.h, s1/*Invalid register, operand has 'VGPR_16_Lo128' register class*/ ; encoding: [0x01,0xcc,0x0a,0x7f]
0x01,0xcc,0x0a,0x7f

# GFX11: v_swap_b16 v5.h, 0x3c00/*Invalid immediate*/ ; encoding: [0x00,0xcc,0x0a,0x7f]
# GFX12: v_swap_b16 v5.h, 0x3c00/*Invalid immediate*/ ; encoding: [0x00,0xcc,0x0a,0x7f]
0xf2,0xcc,0x0a,0x7f

# GFX11: v_swap_b16 v5.h, 0x78563412/*Invalid immediate*/ ; encoding: [0x12,0xcc,0x0a,0x7f]
# GFX12: v_swap_b16 v5.h, 0x78563412/*Invalid immediate*/ ; encoding: [0x12,0xcc,0x0a,0x7f]
0xff,0xcc,0x0a,0x7f,0x12,0x34,0x56,0x78

# W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
# W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3344,6 +3344,12 @@
# GFX11: v_sqrt_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf]
0xff,0x68,0xfc,0x7f,0x56,0x34,0x12,0xaf

# GFX11-TRUE16: v_swap_b16 v5.l, v1.h ; encoding: [0x81,0xcd,0x0a,0x7e]
0x81,0xcd,0x0a,0x7e

# GFX11-TRUE16: v_swap_b16 v5.h, v1.l ; encoding: [0x01,0xcd,0x0a,0x7f]
0x01,0xcd,0x0a,0x7f

# GFX11: v_swap_b32 v5, v1 ; encoding: [0x01,0xcb,0x0a,0x7e]
0x01,0xcb,0x0a,0x7e

Expand Down
Loading