Skip to content

Commit 2753fd1

Browse files
committed
[AMDGPU][MC] Disallow op_sel in v_dot4 and v_dot8 with 4- or 8-bit
packed data In v_dot4 and v_dot8 instructions with 4- or 8-bit packed data (e.g., v_dot4_u32_u8, v_dot8_u32_u4), the op_sel modifier should not be allowed for GFX9 and GFX10. For GFX11 and GFX12, section 7.5.1 of the RDNS3 ISA doc seems to indicate that OPSEL is allowed.
1 parent 1fe64fe commit 2753fd1

File tree

11 files changed

+1687
-484
lines changed

11 files changed

+1687
-484
lines changed

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -383,16 +383,16 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
383383

384384
let OtherPredicates = [HasDot7Insts] in {
385385
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
386-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
386+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
387387
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
388-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
388+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
389389
} // End OtherPredicates = [HasDot7Insts]
390390

391391
let OtherPredicates = [HasDot1Insts] in {
392392
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
393-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
393+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot4, 1>;
394394
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
395-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
395+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot8, 1>;
396396
} // End OtherPredicates = [HasDot1Insts]
397397

398398
def DOT2_BF16_Profile
@@ -411,7 +411,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,
411411

412412
multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
413413
let IsDOT = 1 in
414-
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>,
414+
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>,
415415
null_frag, 1>;
416416
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
417417
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,11 +1360,12 @@ class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> {
13601360
bit IsMAI = MAI;
13611361
}
13621362

1363-
def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
1364-
def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
1365-
def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
1366-
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
1367-
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
1363+
def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
1364+
def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
1365+
def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
1366+
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
1367+
def VOP3_PACKED_NO_OPSEL : VOP3Features<1, 0, 1, 0>;
1368+
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
13681369

13691370
class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {
13701371

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ name: smfma4x4_write_vgpr_dot_write
746746
body: |
747747
bb.0:
748748
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
749-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
749+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
750750
...
751751
# GCN-LABEL: name: smfma4x4_read_srcc_vgpr_valu_write
752752
# GCN: V_MFMA
@@ -945,7 +945,7 @@ name: dot_write_vgpr_different_dot_read_srcc
945945
body: |
946946
bb.0:
947947
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
948-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
948+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
949949
...
950950
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
951951
# GCN: V_DOT
@@ -955,7 +955,7 @@ name: dot_write_vgpr_different_dot_write
955955
body: |
956956
bb.0:
957957
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
958-
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
958+
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
959959
...
960960
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
961961
# GCN: V_DOT

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,7 @@ name: xdl_smfma4x4_write_vgpr_dot_write
10141014
body: |
10151015
bb.0:
10161016
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
1017-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1017+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
10181018
...
10191019
# GCN-LABEL: name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write
10201020
# GCN: V_MFMA
@@ -1208,7 +1208,7 @@ name: dot_write_vgpr_different_dot_read_srcc
12081208
body: |
12091209
bb.0:
12101210
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
1211-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1211+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
12121212
...
12131213
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
12141214
# GCN: V_DOT
@@ -1218,7 +1218,7 @@ name: dot_write_vgpr_different_dot_write
12181218
body: |
12191219
bb.0:
12201220
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
1221-
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1221+
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
12221222
...
12231223
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
12241224
# GCN: V_DOT

llvm/test/MC/AMDGPU/dl-insts.s

Lines changed: 0 additions & 192 deletions
Original file line numberDiff line numberDiff line change
@@ -536,198 +536,6 @@ v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
536536
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
537537
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
538538
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
539-
// CHECK: encoding: [0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
540-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
541-
// CHECK: encoding: [0x00,0x50,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
542-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1]
543-
// CHECK: encoding: [0x00,0x48,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
544-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0]
545-
// CHECK: encoding: [0x00,0x58,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
546-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1]
547-
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04]
548-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0]
549-
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14]
550-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1]
551-
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
552-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0]
553-
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
554-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1]
555-
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04]
556-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
557-
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14]
558-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
559-
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
560-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
561-
// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
562-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
563-
// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x04]
564-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
565-
// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x14]
566-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
567-
// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
568-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
569-
// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
570-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
571-
// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x04]
572-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
573-
// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x14]
574-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
575-
// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
576-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
577-
// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
578-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
579-
// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x04]
580-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
581-
// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x14]
582-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
583-
// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
584-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
585-
// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
586-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
587-
// CHECK: encoding: [0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
588-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
589-
// CHECK: encoding: [0x00,0x50,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
590-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1]
591-
// CHECK: encoding: [0x00,0x48,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
592-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0]
593-
// CHECK: encoding: [0x00,0x58,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
594-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1]
595-
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04]
596-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0]
597-
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14]
598-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1]
599-
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
600-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0]
601-
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
602-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1]
603-
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04]
604-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
605-
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14]
606-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
607-
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
608-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
609-
// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
610-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
611-
// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x04]
612-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
613-
// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x14]
614-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
615-
// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
616-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
617-
// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
618-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
619-
// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x04]
620-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
621-
// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x14]
622-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
623-
// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
624-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
625-
// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
626-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
627-
// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x04]
628-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
629-
// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x14]
630-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
631-
// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
632-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
633-
// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
634-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
635-
// CHECK: encoding: [0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
636-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
637-
// CHECK: encoding: [0x00,0x50,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
638-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1]
639-
// CHECK: encoding: [0x00,0x48,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
640-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0]
641-
// CHECK: encoding: [0x00,0x58,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
642-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1]
643-
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04]
644-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0]
645-
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14]
646-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1]
647-
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
648-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0]
649-
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
650-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1]
651-
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04]
652-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
653-
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14]
654-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
655-
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
656-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
657-
// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
658-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
659-
// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x04]
660-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
661-
// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x14]
662-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
663-
// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
664-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
665-
// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
666-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
667-
// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x04]
668-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
669-
// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x14]
670-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
671-
// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
672-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
673-
// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
674-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
675-
// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x04]
676-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
677-
// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x14]
678-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
679-
// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
680-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
681-
// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
682-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
683-
// CHECK: encoding: [0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c]
684-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
685-
// CHECK: encoding: [0x00,0x50,0xab,0xd3,0x01,0x05,0x0e,0x1c]
686-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1]
687-
// CHECK: encoding: [0x00,0x48,0xab,0xd3,0x01,0x05,0x0e,0x1c]
688-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0]
689-
// CHECK: encoding: [0x00,0x58,0xab,0xd3,0x01,0x05,0x0e,0x1c]
690-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1]
691-
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04]
692-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0]
693-
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14]
694-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1]
695-
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c]
696-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0]
697-
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c]
698-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1]
699-
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04]
700-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
701-
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14]
702-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
703-
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c]
704-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
705-
// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c]
706-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
707-
// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x04]
708-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
709-
// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x14]
710-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
711-
// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x0c]
712-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
713-
// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x1c]
714-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
715-
// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x04]
716-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
717-
// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x14]
718-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
719-
// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x0c]
720-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
721-
// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x1c]
722-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
723-
// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x04]
724-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
725-
// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x14]
726-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
727-
// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x0c]
728-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
729-
// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x1c]
730-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
731539

732540
//
733541
// Test clamp.

0 commit comments

Comments
 (0)