Skip to content

Commit 605578f

Browse files
committed
True16 for v_alignbyte_b32 in MC
1 parent a1d71c3 commit 605578f

16 files changed

+192
-45
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1956719567
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
1956819568
llvm::SyncScope::ID SSID;
1956919569
switch (BuiltinID) {
19570+
case AMDGPU::BI__builtin_amdgcn_alignbyte: {
19571+
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19572+
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19573+
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19574+
llvm::Function *F =
19575+
CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType());
19576+
return Builder.CreateCall(F, {Src0, Src1, Src2});
19577+
}
1957019578
case AMDGPU::BI__builtin_amdgcn_div_scale:
1957119579
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
1957219580
// Translate from the intrinsics's struct return to the builtin's out

clang/test/CodeGenOpenCL/builtins-amdgcn.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,7 @@ kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) {
734734
}
735735

736736
// CHECK-LABEL: @test_alignbyte(
737-
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2)
737+
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte.i32(i32 %src0, i32 %src1, i32 %src2)
738738
kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) {
739739
*out = __builtin_amdgcn_alignbyte(src0, src1, src2);
740740
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2353,8 +2353,8 @@ def int_amdgcn_writelane :
23532353
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]
23542354
>;
23552355

2356-
def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">,
2357-
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2356+
def int_amdgcn_alignbyte : DefaultAttrsIntrinsic<[llvm_i32_ty],
2357+
[llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty],
23582358
[IntrNoMem, IntrSpeculatable]
23592359
>;
23602360

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGP
212212
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
213213
defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
214214
defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
215-
defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
215+
defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32",
216+
VOP3_Profile<VOP_I32_I32_I32_I32>,
217+
VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>,
218+
VOP3_Profile_Fake16<VOP_I32_I32_I32_I16, VOP3_OPSEL>,
219+
int_amdgcn_alignbyte>;
216220

217221
// XXX - No FPException seems suspect but manual doesn't say it does
218222
let mayRaiseFPException = 0 in {
@@ -1676,7 +1680,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>;
16761680
defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>;
16771681
defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>;
16781682
defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11_gfx12<0x216>;
1679-
defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>;
1683+
defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">;
16801684
defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>;
16811685
defm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>;
16821686
defm V_MIN3_I32 : VOP3_Realtriple_gfx11_gfx12<0x21a>;

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3
461461
v_alignbyte_b32 v5, s105, s105, s105
462462
// GFX11: v_alignbyte_b32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01]
463463

464-
v_alignbyte_b32 v5, vcc_lo, ttmp15, v3
465-
// GFX11: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04]
464+
v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l
465+
// GFX11: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04]
466466

467-
v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255
468-
// GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
467+
v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l
468+
// GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
469469

470470
v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15
471471
// GFX11: v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01]
@@ -494,6 +494,9 @@ v_alignbyte_b32 v5, src_scc, vcc_lo, -1
494494
v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null
495495
// GFX11: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
496496

497+
v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h
498+
// GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
499+
497500
v_and_b16 v5, v1, v2
498501
// GFX11: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00]
499502

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -363,22 +363,22 @@ v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bou
363363
v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
364364
// GFX11: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
365365

366-
v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
367-
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
366+
v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0]
367+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
368368

369-
v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
370-
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
369+
v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3]
370+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
371371

372-
v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror
373-
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
372+
v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf
373+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
374374

375-
v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror
376-
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff]
375+
v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf
376+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff]
377377

378-
v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1
379-
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff]
378+
v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf
379+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff]
380380

381-
v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15
381+
v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf
382382
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff]
383383

384384
v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1
@@ -387,7 +387,7 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1
387387
v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15
388388
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff]
389389

390-
v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1
390+
v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf
391391
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff]
392392

393393
v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15
@@ -405,6 +405,24 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo
405405
v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
406406
// GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
407407

408+
v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_mirror
409+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff]
410+
411+
v_alignbyte_b32_e64_dpp v5, v1, v2, s3 row_half_mirror
412+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff]
413+
414+
v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:1
415+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
416+
417+
v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_shl:15
418+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff]
419+
420+
v_alignbyte_b32_e64_dpp v5, v1, v2, m0 row_ror:1
421+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff]
422+
423+
v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h row_mirror
424+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff]
425+
408426
v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
409427
// GFX11: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
410428

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,11 @@ v_alignbit_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1
187187
v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0
188188
// GFX11: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x16,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
189189

190-
v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
191-
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
190+
v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0]
191+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
192192

193-
v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0]
194-
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
193+
v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l dpp8:[7,6,5,4,3,2,1,0]
194+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
195195

196196
v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0]
197197
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
@@ -220,6 +220,15 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1
220220
v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0
221221
// GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x17,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
222222

223+
v_alignbyte_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0]
224+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
225+
226+
v_alignbyte_b32_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0]
227+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05]
228+
229+
v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0]
230+
// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
231+
223232
v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
224233
// GFX11: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
225234

llvm/test/MC/AMDGPU/gfx12_asm_vop3.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,9 @@ v_alignbyte_b32 v5, src_scc, vcc_lo, -1
452452
v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null
453453
// GFX12: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
454454

455+
v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h
456+
// GFX12: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
457+
455458
v_and_b16 v5, v1, v2
456459
// GFX12: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00]
457460

llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,9 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo
485485
v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
486486
// GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
487487

488+
v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h row_mirror
489+
// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff]
490+
488491
v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
489492
// GFX12: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
490493

llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,9 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1
288288
v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0
289289
// GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x17,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
290290

291+
v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0]
292+
// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
293+
291294
v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
292295
// GFX12: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
293296

llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -508,10 +508,16 @@
508508
# GFX11: v_alignbyte_b32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01]
509509

510510
0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04
511-
# GFX11: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04]
511+
# W32-REAL16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04]
512+
# W32-FAKE16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04]
513+
# W64-REAL16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04]
514+
# W64-FAKE16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04]
512515

513516
0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf
514-
# GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
517+
# W32-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
518+
# W32-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
519+
# W64-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
520+
# W64-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
515521

516522
0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01
517523
# GFX11: v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01]
@@ -540,6 +546,12 @@
540546
0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf
541547
# GFX11: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
542548

549+
0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf
550+
# W32-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
551+
# W32-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
552+
# W64-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
553+
# W64-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
554+
543555
0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00
544556
# W32-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00]
545557
# W32-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00]

0 commit comments

Comments
 (0)