Skip to content

Commit a5174de

Browse files
authored
AMDGPU: Add encodings for minimum3/maximum3 f32 for gfx950 (#117600)
1 parent 7fc71f7 commit a5174de

File tree

5 files changed

+96
-1
lines changed

5 files changed

+96
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,9 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
431431
FeatureBF8ConversionScaleInsts,
432432
FeatureFP4ConversionScaleInsts,
433433
FeatureFP6BF6ConversionScaleInsts,
434-
FeatureF16BF16ToFP6BF6ConversionScaleInsts]
434+
FeatureF16BF16ToFP6BF6ConversionScaleInsts,
435+
FeatureMinimum3Maximum3F32
436+
]
435437
>;
436438

437439
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,6 +1937,9 @@ defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
19371937
defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
19381938
defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
19391939

1940+
defm V_MINIMUM3_F32 : VOP3_Real_vi <0x2a8>;
1941+
defm V_MAXIMUM3_F32 : VOP3_Real_vi <0x2a9>;
1942+
19401943
defm V_BITOP3_B16 : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">;
19411944
defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
19421945
let OtherPredicates = [HasFP8ConversionScaleInsts] in {

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,3 +1149,36 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095
11491149
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
11501150
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7]
11511151
buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095
1152+
1153+
1154+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1155+
// GFX950: v_maximum3_f32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04]
1156+
v_maximum3_f32 v1, v2, v3, v4
1157+
1158+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1159+
// GFX950: v_maximum3_f32 v1, -v2, -v3, -v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4]
1160+
v_maximum3_f32 v1, -v2, -v3, -v4
1161+
1162+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1163+
// GFX950: v_maximum3_f32 v1, -|v2|, -|v3|, -|v4| ; encoding: [0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4]
1164+
v_maximum3_f32 v1, -|v2|, -|v3|, -|v4|
1165+
1166+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1167+
// GFX950: v_maximum3_f32 v1, 0, 1.0, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04]
1168+
v_maximum3_f32 v1, 0.0, 1.0, v3
1169+
1170+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1171+
// GFX950: v_maximum3_f32 v2, 0, v3, 1.0 ; encoding: [0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03]
1172+
v_maximum3_f32 v2, 0.0, v3, 1.0
1173+
1174+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1175+
// GFX950: v_maximum3_f32 v1, s8, v3, 1.0 ; encoding: [0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03]
1176+
v_maximum3_f32 v1, s8, v3, 1.0
1177+
1178+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1179+
// GFX950: v_maximum3_f32 v1, v2, s8, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04]
1180+
v_maximum3_f32 v1, v2, s8, v3
1181+
1182+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1183+
// GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
1184+
v_minimum3_f32 v0, v1, v2, v3

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,3 +353,36 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 dlc
353353

354354
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
355355
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc slc dlc
356+
357+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
358+
v_maximum3_f16 v0, v1, v2, v3
359+
360+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
361+
v_minimum3_f16 v0, v1, v2, v3
362+
363+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
364+
v_maximum_f16 v0, v1, v2
365+
366+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
367+
v_minimum_f16 v0, v1, v2
368+
369+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
370+
v_maximum_f32 v0, v1, v2
371+
372+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
373+
v_minimum_f32 v0, v1, v2
374+
375+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
376+
v_maximum3_f32 v0, s1, s2, v3
377+
378+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
379+
v_maximum3_f32 v0, v3, s1, s2
380+
381+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
382+
v_maximum3_f32 v0, s1, v3, s2
383+
384+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
385+
v_minimum3_f32 v0, s1, s2, v3
386+
387+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
388+
v_minimum3_f32 v0, v1, v2, 0xdeadbeef

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,3 +857,27 @@
857857

858858
# GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02]
859859
0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02
860+
861+
# GFX950: v_maximum3_f32 v1, -v2, -v3, -v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4]
862+
0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4
863+
864+
# GFX950: v_maximum3_f32 v1, -|v2|, -|v3|, -|v4| ; encoding: [0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4]
865+
0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4
866+
867+
# GFX950: v_maximum3_f32 v1, 0, 1.0, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04]
868+
0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04
869+
870+
# GFX950: v_maximum3_f32 v1, s8, v3, 1.0 ; encoding: [0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03]
871+
0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03
872+
873+
# GFX950: v_maximum3_f32 v1, v2, s8, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04]
874+
0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04
875+
876+
# GFX950: v_maximum3_f32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04]
877+
0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04
878+
879+
# GFX950: v_maximum3_f32 v2, 0, v3, 1.0 ; encoding: [0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03]
880+
0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03
881+
882+
# GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
883+
0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04

0 commit comments

Comments
 (0)