Skip to content

Commit fc17de9

Browse files
committed
V_FMA_F16 true16 in MC
1 parent a1d71c3 commit fc17de9

17 files changed

+922
-320
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ static unsigned macToMad(unsigned Opc) {
199199
case AMDGPU::V_FMAC_F16_e64:
200200
return AMDGPU::V_FMA_F16_gfx9_e64;
201201
case AMDGPU::V_FMAC_F16_fake16_e64:
202-
return AMDGPU::V_FMA_F16_gfx9_e64;
202+
return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
203203
case AMDGPU::V_FMAC_LEGACY_F32_e64:
204204
return AMDGPU::V_FMA_LEGACY_F32_e64;
205205
case AMDGPU::V_FMAC_F64_e64:

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4033,14 +4033,15 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40334033
if (Src0Literal && !ST.hasVOP3Literal())
40344034
return nullptr;
40354035

4036-
unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
4037-
: IsF64 ? AMDGPU::V_FMA_F64_e64
4038-
: IsLegacy
4039-
? AMDGPU::V_FMA_LEGACY_F32_e64
4040-
: AMDGPU::V_FMA_F32_e64
4041-
: IsF16 ? AMDGPU::V_MAD_F16_e64
4042-
: IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4043-
: AMDGPU::V_MAD_F32_e64;
4036+
unsigned NewOpc = IsFMA ? IsF16 ? ST.hasTrue16BitInsts()
4037+
? AMDGPU::V_FMA_F16_gfx9_fake16_e64
4038+
: AMDGPU::V_FMA_F16_gfx9_e64
4039+
: IsF64 ? AMDGPU::V_FMA_F64_e64
4040+
: IsLegacy ? AMDGPU::V_FMA_LEGACY_F32_e64
4041+
: AMDGPU::V_FMA_F32_e64
4042+
: IsF16 ? AMDGPU::V_MAD_F16_e64
4043+
: IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4044+
: AMDGPU::V_MAD_F32_e64;
40444045
if (pseudoToMCOpcode(NewOpc) == -1)
40454046
return nullptr;
40464047

@@ -9285,6 +9286,7 @@ static bool isRenamedInGFX9(int Opcode) {
92859286
//
92869287
case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
92879288
case AMDGPU::V_FMA_F16_gfx9_e64:
9289+
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
92889290
case AMDGPU::V_INTERP_P2_F16:
92899291
case AMDGPU::V_MAD_F16_e64:
92909292
case AMDGPU::V_MAD_U16_e64:

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
455455
break;
456456
case AMDGPU::V_FMA_F16_e64:
457457
case AMDGPU::V_FMA_F16_gfx9_e64:
458+
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
458459
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
459460
: AMDGPU::V_FMAAK_F16;
460461
break;
@@ -484,6 +485,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
484485
break;
485486
case AMDGPU::V_FMA_F16_e64:
486487
case AMDGPU::V_FMA_F16_gfx9_e64:
488+
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
487489
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
488490
: AMDGPU::V_FMAMK_F16;
489491
break;
@@ -956,7 +958,8 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
956958
MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
957959
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
958960
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
959-
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
961+
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
962+
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
960963
shrinkMadFma(MI);
961964
continue;
962965
}

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ let FPDPRounding = 1 in {
341341
let SubtargetPredicate = isGFX9Plus in {
342342
defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
343343
VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>;
344-
defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
344+
defm V_FMA_F16_gfx9 : VOP3Inst_t16 <"v_fma_f16_gfx9", VOP_F16_F16_F16_F16, any_fma>;
345345
} // End SubtargetPredicate = isGFX9Plus
346346
} // End FPDPRounding = 1
347347

@@ -1706,7 +1706,7 @@ defm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>;
17061706
defm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>;
17071707
defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>;
17081708
defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>;
1709-
defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">;
1709+
defm V_FMA_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x248, "v_fma_f16", "V_FMA_F16_gfx9">;
17101710
defm V_MIN3_F16 : VOP3_Realtriple_gfx11<0x249>;
17111711
defm V_MIN3_I16 : VOP3_Realtriple_gfx11_gfx12<0x24a>;
17121712
defm V_MIN3_U16 : VOP3_Realtriple_gfx11_gfx12<0x24b>;

llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ body: |
1818
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
1919
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
2020
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
21-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
21+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
2222
%0 = IMPLICIT_DEF
2323
%1 = COPY %0.sub1
2424
%2 = COPY %0.sub0
@@ -43,7 +43,7 @@ body: |
4343
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
4444
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
4545
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
46-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
46+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
4747
%0 = IMPLICIT_DEF
4848
%1 = COPY %0.sub1
4949
%2 = COPY %0.sub0
@@ -68,7 +68,7 @@ body: |
6868
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
6969
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
7070
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
71-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
71+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
7272
%0 = IMPLICIT_DEF
7373
%1 = COPY %0.sub0
7474
%2 = COPY %0.sub1
@@ -90,7 +90,7 @@ body: |
9090
; GFX11-NEXT: {{ $}}
9191
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
9292
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
93-
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
93+
; GFX11-NEXT: [[V_FMA_F16_gfx9_fake16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_fake16_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
9494
; GFX11-NEXT: S_ENDPGM 0
9595
%0:vgpr_32 = COPY killed $vgpr0
9696

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2174,53 +2174,77 @@ v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
21742174
v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
21752175
// GFX11: v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
21762176

2177-
v_fma_f16 v5, v1, v2, s3
2178-
// GFX11: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00]
2177+
v_fma_f16 v5.l, v1.l, v2.l, s3
2178+
// GFX11: v_fma_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00]
21792179

2180-
v_fma_f16 v5, v255, s2, s105
2181-
// GFX11: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01]
2180+
v_fma_f16 v5.l, v255.l, s2, s105
2181+
// GFX11: v_fma_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01]
21822182

2183-
v_fma_f16 v5, s1, v255, exec_hi
2184-
// GFX11: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01]
2183+
v_fma_f16 v5.l, s1, v255.l, exec_hi
2184+
// GFX11: v_fma_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01]
21852185

2186-
v_fma_f16 v5, s105, s105, exec_lo
2187-
// GFX11: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01]
2186+
v_fma_f16 v5.l, s105, s105, exec_lo
2187+
// GFX11: v_fma_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01]
21882188

2189-
v_fma_f16 v5, vcc_lo, ttmp15, v3
2190-
// GFX11: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04]
2189+
v_fma_f16 v5.l, vcc_lo, ttmp15, v3.l
2190+
// GFX11: v_fma_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04]
21912191

2192-
v_fma_f16 v5, vcc_hi, 0xfe0b, v255
2193-
// GFX11: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
2192+
v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.l
2193+
// GFX11: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
21942194

2195-
v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
2196-
// GFX11: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1]
2195+
v_fma_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
2196+
// GFX11: v_fma_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1]
21972197

2198-
v_fma_f16 v5, m0, 0.5, m0
2199-
// GFX11: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01]
2198+
v_fma_f16 v5.l, m0, 0.5, m0
2199+
// GFX11: v_fma_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01]
22002200

2201-
v_fma_f16 v5, |exec_lo|, -1, vcc_hi
2202-
// GFX11: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01]
2201+
v_fma_f16 v5.l, |exec_lo|, -1, vcc_hi
2202+
// GFX11: v_fma_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01]
22032203

2204-
v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
2205-
// GFX11: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1]
2204+
v_fma_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
2205+
// GFX11: v_fma_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1]
22062206

2207-
v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0]
2208-
// GFX11: v_fma_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
2207+
v_fma_f16 v5.l, null, exec_lo, -|0xfe0b|
2208+
// GFX11: v_fma_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
22092209

2210-
v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
2211-
// GFX11: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3]
2210+
v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
2211+
// GFX11: v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3]
22122212

2213-
v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
2214-
// GFX11: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43]
2213+
v_fma_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
2214+
// GFX11: v_fma_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43]
22152215

2216-
v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
2217-
// GFX11: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23]
2216+
v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
2217+
// GFX11: v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23]
22182218

2219-
v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
2220-
// GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
2219+
v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp
2220+
// GFX11: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
22212221

2222-
v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2
2223-
// GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
2222+
v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2
2223+
// GFX11: v_fma_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
2224+
2225+
v_fma_f16 v5.l, v255.h, s2, s105
2226+
// GFX11: v_fma_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x48,0xd6,0xff,0x05,0xa4,0x01]
2227+
2228+
v_fma_f16 v5.l, s1, v255.h, exec_hi
2229+
// GFX11: v_fma_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0x01,0xfe,0xff,0x01]
2230+
2231+
v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.h
2232+
// GFX11: v_fma_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
2233+
2234+
v_fma_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
2235+
// GFX11: v_fma_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1]
2236+
2237+
v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc|
2238+
// GFX11: v_fma_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x48,0xd6,0xc1,0xfe,0xf4,0xc3]
2239+
2240+
v_fma_f16 v5.l, 0.5, -m0, 0.5
2241+
// GFX11: v_fma_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x48,0xd6,0xf0,0xfa,0xc0,0x43]
2242+
2243+
v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1
2244+
// GFX11: v_fma_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x48,0xd6,0xfd,0xd4,0x04,0x23]
2245+
2246+
v_fma_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2
2247+
// GFX11: v_fma_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
22242248

22252249
v_fma_f32 v5, v1, v2, s3
22262250
// GFX11: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00]

0 commit comments

Comments
 (0)