@@ -998,39 +998,14 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
998
998
bit NegLoAny = !or(NegLo01, NegLo2);
999
999
bit NegHiAny = !or(NegHi01, NegHi2);
1000
1000
1001
- let DstRC = !cond(!eq(ArgTy[0], v8f32): VDst_256,
1002
- !eq(ArgTy[0], v8i32): VDst_256,
1003
- !eq(ArgTy[0], v8f16): VDst_128,
1004
- !eq(ArgTy[0], v8i16): VDst_128,
1005
- !eq(ArgTy[0], v4f32): VDst_128,
1006
- !eq(ArgTy[0], v4i32): VDst_128,
1007
- !eq(ArgTy[0], v4f16): VDst_64,
1008
- !eq(ArgTy[0], v4i16): VDst_64);
1009
- let Src0RC64 = !cond(!eq(ArgTy[1], v8f16): VRegSrc_128,
1010
- !eq(ArgTy[1], v4f16): VRegSrc_64,
1011
- !eq(ArgTy[1], v4i16): VRegSrc_64,
1012
- !eq(ArgTy[1], v8i16): VRegSrc_128,
1013
- !eq(ArgTy[1], v4i32): VRegSrc_128,
1014
- !eq(ArgTy[1], v2i32): VRegSrc_64,
1015
- !eq(ArgTy[1], i32) : VRegSrc_32);
1016
- let Src1RC64 = !cond(!eq(ArgTy[2], v16f16): VRegSrc_256,
1017
- !eq(ArgTy[2], v16i16): VRegSrc_256,
1018
- !eq(ArgTy[2], v8f16): VRegSrc_128,
1019
- !eq(ArgTy[2], v8i16): VRegSrc_128,
1020
- !eq(ArgTy[2], v4i32): VRegSrc_128,
1021
- !eq(ArgTy[1], v4i16): VRegSrc_64,
1022
- !eq(ArgTy[1], v4f16): VRegSrc_64,
1023
- !eq(ArgTy[2], v2i32): VRegSrc_64,
1024
- !eq(ArgTy[2], i32) : VRegSrc_32);
1001
+ let DstRC = !cast<RegisterOperand>("VDst_"#ArgTy[0].Size);
1002
+ let Src0RC64 = !cast<RegisterOperand>("VRegSrc_"#ArgTy[1].Size);
1003
+ let Src1RC64 = !cast<RegisterOperand>("VRegSrc_"#ArgTy[2].Size);
1025
1004
let Src2RC64 = !if(IsSWMMAC, DstRC,
1026
- !cond(!eq(ArgTy[3], v8f32): VISrc_256_f32,
1027
- !eq(ArgTy[3], v8i32): VISrc_256_b32,
1028
- !eq(ArgTy[3], v8f16): VISrc_128_f16,
1029
- !eq(ArgTy[3], v8i16): VISrc_128_f32, // bf16
1030
- !eq(ArgTy[3], v4f16): VISrc_64_f16,
1031
- !eq(ArgTy[3], v4i16): VISrc_64_b32,
1032
- !eq(ArgTy[3], v4i32): VISrc_128_b32,
1033
- !eq(ArgTy[3], v4f32): VISrc_128_f32));
1005
+ !cast<RegisterOperand>("VISrc_"#ArgTy[3].Size#
1006
+ !cond(IsC_F32: "_f32",
1007
+ IsC_F16: "_f16",
1008
+ 1: "_b32")));
1034
1009
1035
1010
// For f16 and bf16 matrices A and B, each element can be modified by
1036
1011
// fneg(neg_lo,neg_hi = 1). For iu4 and iu8 matrices A and B neg_lo is
0 commit comments