Skip to content

Commit 201f4f6

Browse files
authored
AMDGPU: Add v_mfma_ld_scale_b32 for gfx950 (#116722)
1 parent b170ab2 commit 201f4f6

File tree

7 files changed

+286
-45
lines changed

7 files changed

+286
-45
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,13 +1991,14 @@ class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
19911991

19921992
class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
19931993
RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
1994+
bit HasNeg,
19941995
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
19951996
dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
19961997
HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
19971998
0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;
19981999

19992000
dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
2000-
dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi);
2001+
dag vop3p_neg = !if(HasNeg, (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi), (ins));
20012002

20022003
dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
20032004
dag ret = !con(base, vop3pFields);
@@ -2191,22 +2192,22 @@ class getAsmVOPDPart <int NumSrcArgs, string XorY> {
21912192

21922193
// Returns the assembly string for the inputs and outputs of a VOP3P
21932194
// instruction.
2194-
class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
2195+
class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasNeg,
21952196
bit HasClamp, bit HasOpSel> {
2196-
string dst = "$vdst";
2197-
string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2197+
string dst = !if(HasDst, "$vdst"# !if(!gt(NumSrcArgs, 0), ",", ""), "");
2198+
string src0 = !if(!eq(NumSrcArgs, 1), " $src0", " $src0,");
21982199
string src1 = !if(!eq(NumSrcArgs, 1), "",
21992200
!if(!eq(NumSrcArgs, 2), " $src1",
22002201
" $src1,"));
22012202
string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
22022203

2203-
string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
2204+
string mods = !if(HasNeg, "$neg_lo$neg_hi", "");
22042205
string clamp = !if(HasClamp, "$clamp", "");
22052206
string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
22062207

22072208
// Each modifier is printed as an array of bits for each operand, so
22082209
// all operands are printed as part of src0_modifiers.
2209-
string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
2210+
string ret = dst#src0#src1#src2#opsel#mods#clamp;
22102211
}
22112212

22122213
// FIXME-TRUE16 AsmVOP3OpSel will be deprecated after all
@@ -2267,7 +2268,7 @@ class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT
22672268

22682269
class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
22692270
bit HasOpSel, bit HasOMod, bit IsVOP3P,
2270-
bit HasModifiers, bit Src0HasMods,
2271+
bit HasNeg, bit Src0HasMods,
22712272
bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32,
22722273
bit HasByteSel = 0> {
22732274
string dst = !if(HasDst,
@@ -2294,7 +2295,7 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
22942295
string bytesel = !if(HasByteSel, "$byte_sel", "");
22952296
string 3PMods = !if(IsVOP3P,
22962297
!if(HasOpSel, "$op_sel_hi", "")
2297-
#!if(HasModifiers, "$neg_lo$neg_hi", ""),
2298+
#!if(HasNeg, "$neg_lo$neg_hi", ""),
22982299
"");
22992300
string clamp = !if(HasClamp, "$clamp", "");
23002301
string omod = !if(HasOMod, "$omod", "");
@@ -2554,6 +2555,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
25542555
isModifierType<Src1VT>.ret,
25552556
isModifierType<Src2VT>.ret,
25562557
HasOMod);
2558+
field bit HasNeg = HasModifiers;
25572559

25582560
field bit HasSrc0Mods = HasModifiers;
25592561
field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
@@ -2589,7 +2591,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
25892591
HasClamp, HasModifiers, HasSrc2Mods,
25902592
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
25912593
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
2592-
NumSrcArgs, HasClamp, HasOpSel,
2594+
NumSrcArgs, HasClamp, HasOpSel, HasNeg,
25932595
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
25942596
field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
25952597
NumSrcArgs, HasClamp, HasOMod,
@@ -2607,7 +2609,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
26072609
Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
26082610
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
26092611
defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
2610-
Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel,
2612+
Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg,
26112613
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
26122614

26132615
field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
@@ -2635,10 +2637,10 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
26352637
// the asm operand name via this HasModifiers flag
26362638
field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
26372639
field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
2638-
HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
2640+
HasOpSel, HasOMod, IsVOP3P, HasNeg, HasModifiers, HasModifiers,
26392641
HasModifiers, DstVT, IsFP8ByteSel>.ret;
26402642
field string Asm64 = AsmVOP3Base;
2641-
field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
2643+
field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasNeg, HasClamp, HasOpSel>.ret;
26422644
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
26432645
HasClamp,
26442646
HasOMod,

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ class VOP3P_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
2020
let AsmVOP3Base = AsmVOP3P;
2121
}
2222

23+
def VOP_MFMA_LD_SCALE : VOP3P_Profile<VOPProfile<[untyped, i32, i32, untyped]>, VOP3P_LD_SCALE> {
24+
let HasModifiers = 1;
25+
let HasNeg = 0;
26+
}
27+
2328
// Used for FMA_MIX* and MAD_MIX* insts
2429
// Their operands are only sort of f16 operands. Depending on
2530
// op_sel_hi, these may be interpreted as f32. The inline immediate
@@ -750,6 +755,10 @@ defm V_MFMA_F32_32X32X16_F16 : MAIInst<"v_mfma_f32_32x32x16f16", "F32_V8F16
750755
defm V_MFMA_F32_32X32X16_BF16 : MAIInst<"v_mfma_f32_32x32x16bf16", "F32_V8BF16_X16", int_amdgcn_mfma_f32_32x32x16_bf16>;
751756
}
752757

758+
let SubtargetPredicate = HasGFX950Insts in {
759+
defm V_MFMA_LD_SCALE_B32 : VOP3PInst<"v_mfma_ld_scale_b32", VOP_MFMA_LD_SCALE>;
760+
}
761+
753762
let SubtargetPredicate = isGFX90APlus in {
754763
let is_gfx940_xdl = 1 in {
755764
defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>;
@@ -1787,6 +1796,8 @@ defm V_MFMA_F32_16X16X32_F16 : VOP3P_Real_MFMA_gfx950 <0x54, "v_mfma_f32_16x
17871796
defm V_MFMA_F32_32X32X16_F16 : VOP3P_Real_MFMA_gfx950 <0x55, "v_mfma_f32_32x32x16_f16">;
17881797
defm V_MFMA_F32_32X32X16_BF16 : VOP3P_Real_MFMA_gfx950 <0x37, "v_mfma_f32_32x32x16_bf16">;
17891798

1799+
defm V_MFMA_LD_SCALE_B32 : VOP3P_Real_vi <0x2c>;
1800+
17901801
defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
17911802
defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
17921803
defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ class VOP3Pe <bits<7> op, VOPProfile P> : Enc64 {
423423
bits<2> index_key_8bit;
424424
bits<1> index_key_16bit;
425425

426-
let Inst{7-0} = vdst;
426+
let Inst{7-0} = !if(P.HasDst, vdst, 0);
427427
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
428428
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
429429
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
@@ -1365,6 +1365,10 @@ def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
13651365
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
13661366
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
13671367

1368+
// Packed is misleading, but it enables the appropriate op_sel
1369+
// modifiers.
1370+
def VOP3P_LD_SCALE : VOP3Features<0, 1, 1, 0>;
1371+
13681372
class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {
13691373

13701374
let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);

llvm/test/MC/AMDGPU/mai-gfx950-err.s

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --implicit-check-not=error: %s
2+
3+
v_mfma_ld_scale_b32 v0, 65
4+
// CHECK: :[[@LINE-1]]:25: error: literal operands are not supported
5+
6+
v_mfma_ld_scale_b32 65, v0
7+
// CHECK: :[[@LINE-1]]:21: error: literal operands are not supported
8+
9+
v_mfma_ld_scale_b32 65, 65
10+
// CHECK: :[[@LINE-1]]:25: error: literal operands are not supported
11+
12+
v_mfma_ld_scale_b32 s0, s1
13+
// CHECK: :[[@LINE-1]]:25: error: invalid operand (violates constant bus restrictions)
14+
15+
v_mfma_ld_scale_b32 v0, v0 clamp
16+
// CHECK: :[[@LINE-1]]:28: error: invalid operand for instruction
17+
18+
v_mfma_ld_scale_b32 v0, v0 neg_lo:[0,1]
19+
// CHECK: :[[@LINE-1]]:28: error: not a valid operand
20+
21+
v_mfma_ld_scale_b32 v0, v0 neg_lo:[1,1]
22+
// CHECK: :[[@LINE-1]]:28: error: not a valid operand
23+
24+
v_mfma_ld_scale_b32 v0, v0 neg_hi:[1,1]
25+
// CHECK: :[[@LINE-1]]:28: error: not a valid operand
26+
27+
v_mfma_ld_scale_b32 v0, v0 neg_hi:[0,1]
28+
// CHECK: :[[@LINE-1]]:28: error: not a valid operand
29+
30+
v_mfma_ld_scale_b32 v0, v0 neg_lo:[0,1] neg_hi:[0,1]
31+
// CHECK: :[[@LINE-1]]:28: error: not a valid operand

llvm/test/MC/AMDGPU/mai-gfx950.s

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,120 @@ v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15] abid:1
158158
// GFX950: v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] cbsz:3 abid:1 ; encoding: [0x00,0x8b,0xb7,0xd3,0x00,0x01,0x02,0x1c]
159159
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
160160
v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] cbsz:3 abid:1
161+
162+
//===----------------------------------------------------------------------===//
163+
// v_mfma_ld_scale_b32
164+
//===----------------------------------------------------------------------===//
165+
166+
// GFX950: v_mfma_ld_scale_b32 v0, 64 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x81,0x01,0x18]
167+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
168+
v_mfma_ld_scale_b32 v0, 64
169+
170+
// GFX950: v_mfma_ld_scale_b32 64, v0 ; encoding: [0x00,0x40,0xac,0xd3,0xc0,0x00,0x02,0x18]
171+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
172+
v_mfma_ld_scale_b32 64, v0
173+
174+
// GFX950: v_mfma_ld_scale_b32 64, 64 ; encoding: [0x00,0x40,0xac,0xd3,0xc0,0x80,0x01,0x18]
175+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
176+
v_mfma_ld_scale_b32 64, 64
177+
178+
// GFX950: v_mfma_ld_scale_b32 s0, s0 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x00,0x00,0x18]
179+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
180+
v_mfma_ld_scale_b32 s0, s0
181+
182+
// GFX950: v_mfma_ld_scale_b32 s0, v0 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x00,0x02,0x18]
183+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
184+
v_mfma_ld_scale_b32 s0, v0
185+
186+
// GFX950: v_mfma_ld_scale_b32 v0, s0 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x01,0x00,0x18]
187+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
188+
v_mfma_ld_scale_b32 v0, s0
189+
190+
// GFX950: v_mfma_ld_scale_b32 vcc_lo, vcc_lo ; encoding: [0x00,0x40,0xac,0xd3,0x6a,0xd4,0x00,0x18]
191+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
192+
v_mfma_ld_scale_b32 vcc_lo, vcc_lo
193+
194+
// GFX950: v_mfma_ld_scale_b32 m0, m0 ; encoding: [0x00,0x40,0xac,0xd3,0x7c,0xf8,0x00,0x18]
195+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
196+
v_mfma_ld_scale_b32 m0, m0
197+
198+
// GFX950: v_mfma_ld_scale_b32 src_vccz, src_vccz ; encoding: [0x00,0x40,0xac,0xd3,0xfb,0xf6,0x01,0x18]
199+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
200+
v_mfma_ld_scale_b32 vccz, vccz
201+
202+
// GFX950: v_mfma_ld_scale_b32 src_execz, src_execz ; encoding: [0x00,0x40,0xac,0xd3,0xfc,0xf8,0x01,0x18]
203+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
204+
v_mfma_ld_scale_b32 execz, execz
205+
206+
// GFX950: v_mfma_ld_scale_b32 v0, v0 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x01,0x02,0x18]
207+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
208+
v_mfma_ld_scale_b32 v0, v0
209+
210+
// GFX950: v_mfma_ld_scale_b32 v1, v1 ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x18]
211+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
212+
v_mfma_ld_scale_b32 v1, v1
213+
214+
// GFX950: v_mfma_ld_scale_b32 0, 0 ; encoding: [0x00,0x40,0xac,0xd3,0x80,0x00,0x01,0x18]
215+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
216+
v_mfma_ld_scale_b32 0, 0
217+
218+
// GFX950: v_mfma_ld_scale_b32 1, 0 ; encoding: [0x00,0x40,0xac,0xd3,0x81,0x00,0x01,0x18]
219+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
220+
v_mfma_ld_scale_b32 1, 0
221+
222+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x18]
223+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
224+
v_mfma_ld_scale_b32 v1, v1 op_sel:[1, 0]
225+
226+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
227+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
228+
v_mfma_ld_scale_b32 v1, v1 op_sel:[0, 1]
229+
230+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] ; encoding: [0x00,0x58,0xac,0xd3,0x01,0x03,0x02,0x18]
231+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
232+
v_mfma_ld_scale_b32 v1, v1 op_sel:[1, 1]
233+
234+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x08]
235+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
236+
v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1, 0]
237+
238+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0,1] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x10]
239+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
240+
v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0, 1]
241+
242+
// GFX950: v_mfma_ld_scale_b32 v1, v1 ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x18]
243+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
244+
v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1, 1]
245+
246+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0,0] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x00]
247+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
248+
v_mfma_ld_scale_b32 v1, v1 op_sel:[0,0] op_sel_hi:[0,0]
249+
250+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x08]
251+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
252+
v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[1,0]
253+
254+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x10]
255+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
256+
v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[0,1]
257+
258+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
259+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
260+
v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,1]
261+
262+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
263+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
264+
v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,1]
265+
266+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] ; encoding: [0x00,0x58,0xac,0xd3,0x01,0x03,0x02,0x18]
267+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
268+
v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] op_sel_hi:[1,1]
269+
270+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x10]
271+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
272+
v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[0,1]
273+
274+
// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x08]
275+
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
276+
v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,0]
277+

0 commit comments

Comments
 (0)