Skip to content

Commit f5d32b2

Browse files
committed
Allow op_sel for VOP3P DOT instructions, but value of op_sel must
be 0 if present.
1 parent 8893ea2 commit f5d32b2

File tree

12 files changed

+2037
-841
lines changed

12 files changed

+2037
-841
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4626,14 +4626,17 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
46264626

46274627
uint64_t TSFlags = MII.get(Opc).TSFlags;
46284628

4629-
if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4629+
// For VOP3P DOT instructions, op_sel must be 0 if present, and op_sel_hi
4630+
// cannot be present.
4631+
if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3P)) {
46304632
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
46314633
if (OpSelIdx != -1) {
46324634
if (Inst.getOperand(OpSelIdx).getImm() != 0)
46334635
return false;
46344636
}
46354637
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
46364638
if (OpSelHiIdx != -1) {
4639+
// -1 is the default value for op_sel_hi
46374640
if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
46384641
return false;
46394642
}

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -406,22 +406,15 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
406406
let OtherPredicates = [HasDot7Insts] in {
407407
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
408408
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
409-
// v_dot4_u32_u8 does not allow op_sel Pre-GFX11
410-
defm V_DOT4_U32_U8_PREGFX11 : VOP3PInst<"v_dot4_u32_u8",
411-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
412-
413409
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
414410
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
415-
// v_dot8_u32_u4 does not allow op_sel Pre-GFX11
416-
defm V_DOT8_U32_U4_PREGFX11 : VOP3PInst<"v_dot8_u32_u4",
417-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
418411
} // End OtherPredicates = [HasDot7Insts]
419412

420413
let OtherPredicates = [HasDot1Insts] in {
421414
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
422-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot4, 1>;
415+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
423416
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
424-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot8, 1>;
417+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
425418
} // End OtherPredicates = [HasDot1Insts]
426419

427420
def DOT2_BF16_Profile
@@ -2104,8 +2097,8 @@ defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
21042097
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
21052098

21062099
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
2107-
defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_vi <0x29>;
2108-
defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_vi <0x2b>;
2100+
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
2101+
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
21092102

21102103
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
21112104
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
@@ -2263,10 +2256,8 @@ defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
22632256
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
22642257

22652258
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x13>;
2266-
defm V_DOT4_U32_U8 : VOP3P_Real_gfx11_gfx12<0x17>;
2267-
defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_gfx10<0x17>;
2268-
defm V_DOT8_U32_U4 : VOP3P_Real_gfx11_gfx12<0x19>;
2269-
defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_gfx10<0x19>;
2259+
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11_gfx12<0x17>;
2260+
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11_gfx12<0x19>;
22702261

22712262
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
22722263
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,12 +1454,11 @@ class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> {
14541454
bit IsMAI = MAI;
14551455
}
14561456

1457-
def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
1458-
def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
1459-
def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
1460-
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
1461-
def VOP3_PACKED_NO_OPSEL : VOP3Features<1, 0, 1, 0>;
1462-
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
1457+
def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
1458+
def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
1459+
def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
1460+
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
1461+
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
14631462
def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>;
14641463

14651464
// Packed is misleading, but it enables the appropriate op_sel

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ name: smfma4x4_write_vgpr_dot_write
746746
body: |
747747
bb.0:
748748
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
749-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
749+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
750750
...
751751
# GCN-LABEL: name: smfma4x4_read_srcc_vgpr_valu_write
752752
# GCN: V_MFMA
@@ -945,7 +945,7 @@ name: dot_write_vgpr_different_dot_read_srcc
945945
body: |
946946
bb.0:
947947
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
948-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
948+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
949949
...
950950
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
951951
# GCN: V_DOT
@@ -955,7 +955,7 @@ name: dot_write_vgpr_different_dot_write
955955
body: |
956956
bb.0:
957957
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
958-
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
958+
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
959959
...
960960
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
961961
# GCN: V_DOT

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,7 +1071,7 @@ name: xdl_smfma4x4_write_vgpr_dot_write
10711071
body: |
10721072
bb.0:
10731073
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
1074-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
1074+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
10751075
...
10761076
# GCN-LABEL: name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write
10771077
# GCN: V_MFMA
@@ -1265,7 +1265,7 @@ name: dot_write_vgpr_different_dot_read_srcc
12651265
body: |
12661266
bb.0:
12671267
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
1268-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
1268+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
12691269
...
12701270
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
12711271
# GCN: V_DOT
@@ -1275,7 +1275,7 @@ name: dot_write_vgpr_different_dot_write
12751275
body: |
12761276
bb.0:
12771277
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
1278-
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
1278+
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
12791279
...
12801280
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
12811281
# GCN: V_DOT

llvm/test/MC/AMDGPU/dl-insts.s

Lines changed: 9 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -389,153 +389,23 @@ v_dot8_i32_i4 v0, v1, v2, v3
389389
v_dot8_u32_u4 v0, v1, v2, v3
390390

391391
//
392-
// Test op_sel/op_sel_hi.
392+
// Test op_sel/op_sel_hi: in VOP3P dot, op_sel must be 0, op_sel_hi cannot appear
393393
//
394394

395395
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
396396
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0]
397-
// CHECK: encoding: [0x00,0x50,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
398-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1]
399-
// CHECK: encoding: [0x00,0x48,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
400-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0]
401-
// CHECK: encoding: [0x00,0x58,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
402-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1]
403-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x04]
404-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,0]
405-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x14]
406-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,1]
407-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
408-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[1,0]
409-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
410-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[1,1]
411-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x04]
412-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
413-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x14]
414-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
415-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
416-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
417-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
418-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
419-
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x04]
420-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
421-
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x14]
422-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
423-
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
424-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
425-
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
426-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
427-
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x04]
428-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
429-
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x14]
430-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
431-
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
432-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
433-
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
434-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
435-
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x04]
436-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
437-
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x14]
438-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
439-
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
440-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
441-
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
442-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
443397
// CHECK: encoding: [0x00,0x40,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
444398
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0]
445-
// CHECK: encoding: [0x00,0x50,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
446-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1]
447-
// CHECK: encoding: [0x00,0x48,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
448-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0]
449-
// CHECK: encoding: [0x00,0x58,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
450-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1]
451-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x04]
452-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,0]
453-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x14]
454-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,1]
455-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
456-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[1,0]
457-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
458-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[1,1]
459-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x04]
460-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
461-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x14]
462-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
463-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
464-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
465-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
466-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
467-
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x04]
468-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
469-
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x14]
470-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
471-
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
472-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
473-
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
474-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
475-
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x04]
476-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
477-
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x14]
478-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
479-
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
480-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
481-
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
482-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
483-
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x04]
484-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
485-
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x14]
486-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
487-
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
488-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
489-
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
490-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
491399
// CHECK: encoding: [0x00,0x40,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
492400
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0]
493-
// CHECK: encoding: [0x00,0x50,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
494-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1]
495-
// CHECK: encoding: [0x00,0x48,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
496-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0]
497-
// CHECK: encoding: [0x00,0x58,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
498-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1]
499-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x04]
500-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,0]
501-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x14]
502-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,1]
503-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
504-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[1,0]
505-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
506-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[1,1]
507-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x04]
508-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
509-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x14]
510-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
511-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
512-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
513-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
514-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
515-
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x04]
516-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
517-
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x14]
518-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
519-
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
520-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
521-
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
522-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
523-
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x04]
524-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
525-
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x14]
526-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
527-
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
528-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
529-
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
530-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
531-
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x04]
532-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
533-
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x14]
534-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
535-
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
536-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
537-
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
538-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
401+
// CHECK: encoding: [0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
402+
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
403+
// CHECK: encoding: [0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
404+
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
405+
// CHECK: encoding: [0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
406+
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
407+
// CHECK: encoding: [0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c]
408+
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
539409

540410
//
541411
// Test clamp.

0 commit comments

Comments
 (0)