@@ -383,8 +383,15 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
383
383
384
384
let OtherPredicates = [HasDot7Insts] in {
385
385
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
386
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
387
+ // v_dot4_u32_u8 does not allow op_sel Pre-GFX11
388
+ defm V_DOT4_U32_U8_PREGFX11 : VOP3PInst<"v_dot4_u32_u8",
386
389
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
390
+
387
391
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
392
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
393
+ // v_dot8_u32_u4 does not allow op_sel Pre-GFX11
394
+ defm V_DOT8_U32_U4_PREGFX11 : VOP3PInst<"v_dot8_u32_u4",
388
395
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
389
396
} // End OtherPredicates = [HasDot7Insts]
390
397
@@ -411,7 +418,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,
411
418
412
419
multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
413
420
let IsDOT = 1 in
414
- defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL >,
421
+ defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED >,
415
422
null_frag, 1>;
416
423
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
417
424
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
@@ -1712,8 +1719,8 @@ defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
1712
1719
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
1713
1720
1714
1721
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
1715
- defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
1716
- defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
1722
+ defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_vi <0x29>;
1723
+ defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_vi <0x2b>;
1717
1724
1718
1725
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
1719
1726
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
@@ -1845,8 +1852,10 @@ defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
1845
1852
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
1846
1853
1847
1854
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x13>;
1848
- defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11_gfx12<0x17>;
1849
- defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11_gfx12<0x19>;
1855
+ defm V_DOT4_U32_U8 : VOP3P_Real_gfx11_gfx12<0x17>;
1856
+ defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_gfx10<0x17>;
1857
+ defm V_DOT8_U32_U4 : VOP3P_Real_gfx11_gfx12<0x19>;
1858
+ defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_gfx10<0x19>;
1850
1859
1851
1860
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
1852
1861
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;
0 commit comments