Skip to content

Commit a3c2493

Browse files
committed
Allow op_sel for VOP3P DOT instructions, but value of op_sel must
be 0 if present.
1 parent 7ae70f0 commit a3c2493

File tree

12 files changed

+2037
-841
lines changed

12 files changed

+2037
-841
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4588,14 +4588,17 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
45884588

45894589
uint64_t TSFlags = MII.get(Opc).TSFlags;
45904590

4591-
if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4591+
// For VOP3P DOT instructions, op_sel must be 0 if present, and op_sel_hi
4592+
// cannot be present.
4593+
if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3P)) {
45924594
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
45934595
if (OpSelIdx != -1) {
45944596
if (Inst.getOperand(OpSelIdx).getImm() != 0)
45954597
return false;
45964598
}
45974599
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
45984600
if (OpSelHiIdx != -1) {
4601+
// -1 is the default value for op_sel_hi
45994602
if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
46004603
return false;
46014604
}

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -384,22 +384,15 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
384384
let OtherPredicates = [HasDot7Insts] in {
385385
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
386386
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
387-
// v_dot4_u32_u8 does not allow op_sel Pre-GFX11
388-
defm V_DOT4_U32_U8_PREGFX11 : VOP3PInst<"v_dot4_u32_u8",
389-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
390-
391387
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
392388
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
393-
// v_dot8_u32_u4 does not allow op_sel Pre-GFX11
394-
defm V_DOT8_U32_U4_PREGFX11 : VOP3PInst<"v_dot8_u32_u4",
395-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
396389
} // End OtherPredicates = [HasDot7Insts]
397390

398391
let OtherPredicates = [HasDot1Insts] in {
399392
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
400-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot4, 1>;
393+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
401394
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
402-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot8, 1>;
395+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
403396
} // End OtherPredicates = [HasDot1Insts]
404397

405398
def DOT2_BF16_Profile
@@ -1719,8 +1712,8 @@ defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
17191712
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
17201713

17211714
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
1722-
defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_vi <0x29>;
1723-
defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_vi <0x2b>;
1715+
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
1716+
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
17241717

17251718
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
17261719
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
@@ -1852,10 +1845,8 @@ defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
18521845
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
18531846

18541847
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x13>;
1855-
defm V_DOT4_U32_U8 : VOP3P_Real_gfx11_gfx12<0x17>;
1856-
defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_gfx10<0x17>;
1857-
defm V_DOT8_U32_U4 : VOP3P_Real_gfx11_gfx12<0x19>;
1858-
defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_gfx10<0x19>;
1848+
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11_gfx12<0x17>;
1849+
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11_gfx12<0x19>;
18591850

18601851
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
18611852
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,12 +1360,11 @@ class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> {
13601360
bit IsMAI = MAI;
13611361
}
13621362

1363-
def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
1364-
def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
1365-
def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
1366-
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
1367-
def VOP3_PACKED_NO_OPSEL : VOP3Features<1, 0, 1, 0>;
1368-
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
1363+
def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
1364+
def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
1365+
def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
1366+
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
1367+
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
13691368

13701369
class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {
13711370

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ name: smfma4x4_write_vgpr_dot_write
746746
body: |
747747
bb.0:
748748
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
749-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
749+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
750750
...
751751
# GCN-LABEL: name: smfma4x4_read_srcc_vgpr_valu_write
752752
# GCN: V_MFMA
@@ -945,7 +945,7 @@ name: dot_write_vgpr_different_dot_read_srcc
945945
body: |
946946
bb.0:
947947
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
948-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
948+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
949949
...
950950
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
951951
# GCN: V_DOT
@@ -955,7 +955,7 @@ name: dot_write_vgpr_different_dot_write
955955
body: |
956956
bb.0:
957957
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
958-
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
958+
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
959959
...
960960
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
961961
# GCN: V_DOT

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,7 @@ name: xdl_smfma4x4_write_vgpr_dot_write
10141014
body: |
10151015
bb.0:
10161016
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
1017-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
1017+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
10181018
...
10191019
# GCN-LABEL: name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write
10201020
# GCN: V_MFMA
@@ -1208,7 +1208,7 @@ name: dot_write_vgpr_different_dot_read_srcc
12081208
body: |
12091209
bb.0:
12101210
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
1211-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
1211+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
12121212
...
12131213
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
12141214
# GCN: V_DOT
@@ -1218,7 +1218,7 @@ name: dot_write_vgpr_different_dot_write
12181218
body: |
12191219
bb.0:
12201220
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
1221-
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
1221+
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
12221222
...
12231223
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
12241224
# GCN: V_DOT

llvm/test/MC/AMDGPU/dl-insts.s

Lines changed: 9 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -389,153 +389,23 @@ v_dot8_i32_i4 v0, v1, v2, v3
389389
v_dot8_u32_u4 v0, v1, v2, v3
390390

391391
//
392-
// Test op_sel/op_sel_hi.
392+
// Test op_sel/op_sel_hi: in VOP3P dot, op_sel must be 0, op_sel_hi cannot appear
393393
//
394394

395395
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
396396
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0]
397-
// CHECK: encoding: [0x00,0x50,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
398-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1]
399-
// CHECK: encoding: [0x00,0x48,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
400-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0]
401-
// CHECK: encoding: [0x00,0x58,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
402-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1]
403-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x04]
404-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,0]
405-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x14]
406-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,1]
407-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
408-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[1,0]
409-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
410-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[1,1]
411-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x04]
412-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
413-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x14]
414-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
415-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
416-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
417-
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
418-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
419-
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x04]
420-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
421-
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x14]
422-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
423-
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
424-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
425-
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
426-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
427-
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x04]
428-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
429-
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x14]
430-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
431-
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
432-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
433-
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
434-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
435-
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x04]
436-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
437-
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x14]
438-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
439-
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
440-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
441-
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
442-
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
443397
// CHECK: encoding: [0x00,0x40,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
444398
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0]
445-
// CHECK: encoding: [0x00,0x50,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
446-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1]
447-
// CHECK: encoding: [0x00,0x48,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
448-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0]
449-
// CHECK: encoding: [0x00,0x58,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
450-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1]
451-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x04]
452-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,0]
453-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x14]
454-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,1]
455-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
456-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[1,0]
457-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
458-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[1,1]
459-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x04]
460-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
461-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x14]
462-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
463-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
464-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
465-
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
466-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
467-
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x04]
468-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
469-
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x14]
470-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
471-
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
472-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
473-
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
474-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
475-
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x04]
476-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
477-
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x14]
478-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
479-
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
480-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
481-
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
482-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
483-
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x04]
484-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
485-
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x14]
486-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
487-
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
488-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
489-
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
490-
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
491399
// CHECK: encoding: [0x00,0x40,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
492400
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0]
493-
// CHECK: encoding: [0x00,0x50,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
494-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1]
495-
// CHECK: encoding: [0x00,0x48,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
496-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0]
497-
// CHECK: encoding: [0x00,0x58,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
498-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1]
499-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x04]
500-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,0]
501-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x14]
502-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,1]
503-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
504-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[1,0]
505-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
506-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[1,1]
507-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x04]
508-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
509-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x14]
510-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
511-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
512-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
513-
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
514-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
515-
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x04]
516-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
517-
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x14]
518-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
519-
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
520-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
521-
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
522-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
523-
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x04]
524-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
525-
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x14]
526-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
527-
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
528-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
529-
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
530-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
531-
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x04]
532-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
533-
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x14]
534-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
535-
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
536-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
537-
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
538-
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
401+
// CHECK: encoding: [0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
402+
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
403+
// CHECK: encoding: [0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
404+
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
405+
// CHECK: encoding: [0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
406+
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
407+
// CHECK: encoding: [0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c]
408+
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
539409

540410
//
541411
// Test clamp.

0 commit comments

Comments
 (0)