Skip to content

Commit 7aa0aca

Browse files
authored
[AMDGPU] Handle hazard in v_scalef32_sr_fp4_* conversions (llvm#118589) (llvm#1627)
2 parents b013540 + e88c80b commit 7aa0aca

File tree

7 files changed

+192
-43
lines changed

7 files changed

+192
-43
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -911,21 +911,30 @@ getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
911911
if (SIInstrInfo::isSDWA(MI)) {
912912
// Type 1: SDWA with dst_sel != DWORD
913913
if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
914-
if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
915-
return nullptr;
916-
} else {
917-
// Type 2 && Type 3: (VOP3 which write the hi bits) || (FP8DstSelInst
918-
// with op_sel[3:2] != 0)
919-
if (!AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel) ||
920-
!(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
921-
SISrcMods::DST_OP_SEL ||
922-
(AMDGPU::isFP8DstSelInst(Opcode) &&
923-
(TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
924-
SISrcMods::OP_SEL_0))))
925-
return nullptr;
914+
if (DstSel->getImm() != AMDGPU::SDWA::DWORD)
915+
return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
916+
}
917+
918+
AMDGPU::FPType IsFP4OrFP8ConvOpc = AMDGPU::getFPDstSelType(Opcode);
919+
if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel)) {
920+
// Type 2: VOP3 which write the hi bits
921+
if (TII->getNamedImmOperand(MI, AMDGPU::OpName::src0_modifiers) &
922+
SISrcMods::DST_OP_SEL)
923+
return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
924+
925+
// Type 3: FP8DstSelInst with op_sel[3:2] != 0)
926+
if (IsFP4OrFP8ConvOpc == AMDGPU::FPType::FP8 &&
927+
(TII->getNamedImmOperand(MI, AMDGPU::OpName::src2_modifiers) &
928+
SISrcMods::OP_SEL_0))
929+
return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
926930
}
927931

928-
return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
932+
// Special case: nop is required for all the opsel values for fp4 sr variant
933+
// cvt scale instructions
934+
if (IsFP4OrFP8ConvOpc == AMDGPU::FPType::FP4)
935+
return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
936+
937+
return nullptr;
929938
}
930939

931940
/// Checks whether the provided \p MI "consumes" the operand with a Dest sel

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2392,6 +2392,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
23922392
field bit IsFP8SrcByteSel = 0;
23932393
field bit IsFP8DstByteSel = 0;
23942394
field bit HasFP8DstByteSel = 0;
2395+
field bit HasFP4DstByteSel = 0;
23952396
field bit IsFP8ByteSel = !or(IsFP8SrcByteSel, IsFP8DstByteSel);
23962397

23972398
field bit HasDst = !ne(DstVT.Value, untyped.Value);
@@ -3058,13 +3059,13 @@ def isMFMA_F8F6F4Table : GenericTable {
30583059
let PrimaryKeyName = "isMFMA_F8F6F4" ;
30593060
}
30603061

3061-
def FP8DstByteSelTable : GenericTable {
3062+
def FP4FP8DstByteSelTable : GenericTable {
30623063
let FilterClass = "VOP3_Pseudo";
3063-
let CppTypeName = "FP8DstByteSelInfo";
3064-
let Fields = ["Opcode", "HasFP8DstByteSel"];
3064+
let CppTypeName = "FP4FP8DstByteSelInfo";
3065+
let Fields = ["Opcode", "HasFP8DstByteSel", "HasFP4DstByteSel"];
30653066

30663067
let PrimaryKey = ["Opcode"];
3067-
let PrimaryKeyName = "getFP8DstByteSelHelper";
3068+
let PrimaryKeyName = "getFP4FP8DstByteSelHelper";
30683069
}
30693070

30703071
def VOPDComponentTable : GenericTable {

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -397,17 +397,18 @@ struct SingleUseExceptionInfo {
397397
bool IsInvalidSingleUseProducer;
398398
};
399399

400-
#define GET_FP8DstByteSelTable_DECL
401-
#define GET_FP8DstByteSelTable_IMPL
400+
#define GET_FP4FP8DstByteSelTable_DECL
401+
#define GET_FP4FP8DstByteSelTable_IMPL
402402

403403
struct DPMACCInstructionInfo {
404404
uint16_t Opcode;
405405
bool IsDPMACCInstruction;
406406
};
407407

408-
struct FP8DstByteSelInfo {
408+
struct FP4FP8DstByteSelInfo {
409409
uint16_t Opcode;
410410
bool HasFP8DstByteSel;
411+
bool HasFP4DstByteSel;
411412
};
412413

413414
#define GET_FP8DstByteSelTable_DECL
@@ -686,9 +687,16 @@ bool isInvalidSingleUseProducerInst(unsigned Opc) {
686687
return Info && Info->IsInvalidSingleUseProducer;
687688
}
688689

689-
bool isFP8DstSelInst(unsigned Opc) {
690-
const FP8DstByteSelInfo *Info = getFP8DstByteSelHelper(Opc);
691-
return Info ? Info->HasFP8DstByteSel : false;
690+
FPType getFPDstSelType(unsigned Opc) {
691+
const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
692+
if (!Info)
693+
return FPType::None;
694+
if (Info->HasFP8DstByteSel)
695+
return FPType::FP8;
696+
if (Info->HasFP4DstByteSel)
697+
return FPType::FP4;
698+
699+
return FPType::None;
692700
}
693701

694702
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ static constexpr unsigned GFX12 = 1;
5555

5656
enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
5757

58+
enum class FPType { None, FP4, FP8 };
59+
5860
/// \returns True if \p STI is AMDHSA.
5961
bool isHsaAbi(const MCSubtargetInfo &STI);
6062

@@ -885,7 +887,7 @@ LLVM_READONLY
885887
bool isTrue16Inst(unsigned Opc);
886888

887889
LLVM_READONLY
888-
bool isFP8DstSelInst(unsigned Opc);
890+
FPType getFPDstSelType(unsigned Opc);
889891

890892
LLVM_READONLY
891893
bool isInvalidSingleUseConsumerInst(unsigned Opc);

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,7 +1013,7 @@ class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
10131013
let HasExtVOP3DPP = 0;
10141014
let HasOpSel = 1;
10151015
let HasOMod = 0;
1016-
let HasFP8DstByteSel = 1;
1016+
let HasFP4DstByteSel = 1;
10171017
}
10181018

10191019
class VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile<VOPProfile P>
@@ -1029,7 +1029,7 @@ class VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile<VOPProfile P>
10291029
let HasExtVOP3DPP = 0;
10301030
let HasOpSel = 1;
10311031
let HasOMod = 0;
1032-
let HasFP8DstByteSel = 1;
1032+
let HasFP4DstByteSel = 1;
10331033
}
10341034

10351035
class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
114114
let IsSWMMAC = P.IsSWMMAC;
115115

116116
bit HasFP8DstByteSel = P.HasFP8DstByteSel;
117+
bit HasFP4DstByteSel = P.HasFP4DstByteSel;
117118

118119
let AsmOperands = !if(isVop3OpSel,
119120
P.AsmVOP3OpSel,

llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir

Lines changed: 145 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -469,14 +469,43 @@ body: |
469469
...
470470

471471
---
472-
# GCN-LABEL: test_scalef32_sr_pk_fp4_bf16_hazard
473-
# GCN: V_CVT_SCALEF32_SR_PK_FP4_BF16_e64
474-
# GCN: S_NOP 0
475-
# GCN: V_ADD_U32_e32
476-
name: test_scalef32_sr_pk_fp4_bf16_hazard
472+
name: test_scalef32_sr_pk_fp4_bf16_opsel0_hazard
473+
body: |
474+
bb.0:
475+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
476+
; GCN-LABEL: name: test_scalef32_sr_pk_fp4_bf16_opsel0_hazard
477+
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
478+
; GCN-NEXT: {{ $}}
479+
; GCN-NEXT: S_WAITCNT 0
480+
; GCN-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
481+
; GCN-NEXT: S_WAITCNT 3952
482+
; GCN-NEXT: early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_BF16_e64 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, killed $vgpr0, 0, implicit $mode, implicit $exec
483+
; GCN-NEXT: S_NOP 0
484+
; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr1, $vgpr1, implicit $exec
485+
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
486+
S_WAITCNT 0
487+
renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
488+
S_WAITCNT 3952
489+
early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_BF16_e64 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, killed $vgpr0, 0, implicit $mode, implicit $exec
490+
renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr1, $vgpr1, implicit $exec
491+
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
492+
...
493+
494+
---
495+
name: test_scalef32_sr_pk_fp4_bf16_opsel3_hazard
477496
body: |
478497
bb.0:
479498
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
499+
; GCN-LABEL: name: test_scalef32_sr_pk_fp4_bf16_opsel3_hazard
500+
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
501+
; GCN-NEXT: {{ $}}
502+
; GCN-NEXT: S_WAITCNT 0
503+
; GCN-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
504+
; GCN-NEXT: S_WAITCNT 3952
505+
; GCN-NEXT: early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_BF16_e64 8, killed $vgpr2, 0, killed $vgpr3, 4, killed $vgpr4, killed $vgpr0, 0, implicit $mode, implicit $exec
506+
; GCN-NEXT: S_NOP 0
507+
; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr1, $vgpr1, implicit $exec
508+
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
480509
S_WAITCNT 0
481510
renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
482511
S_WAITCNT 3952
@@ -486,14 +515,87 @@ body: |
486515
...
487516

488517
---
489-
# GCN-LABEL: test_scalef32_sr_pk_fp4_f32_hazard
490-
# GCN: V_CVT_SCALEF32_SR_PK_FP4_F32_e64
491-
# GCN: S_NOP 0
492-
# GCN: V_ADD_U32_e32
493-
name: test_scalef32_sr_pk_fp4_f32_hazard
518+
name: test_scalef32_sr_pk_fp4_bf16_opsel0_neg_fp4_as_src_hazard
519+
body: |
520+
bb.0:
521+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
522+
; GCN-LABEL: name: test_scalef32_sr_pk_fp4_bf16_opsel0_neg_fp4_as_src_hazard
523+
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
524+
; GCN-NEXT: {{ $}}
525+
; GCN-NEXT: S_WAITCNT 0
526+
; GCN-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
527+
; GCN-NEXT: S_WAITCNT 3952
528+
; GCN-NEXT: early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_BF16_e64 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, killed $vgpr0, 0, implicit $mode, implicit $exec
529+
; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr0, $vgpr0, implicit $exec
530+
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
531+
S_WAITCNT 0
532+
renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
533+
S_WAITCNT 3952
534+
early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_BF16_e64 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, killed $vgpr0, 0, implicit $mode, implicit $exec
535+
renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr0, $vgpr0, implicit $exec
536+
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
537+
...
538+
539+
---
540+
name: test_scalef32_sr_pk_fp4_bf16_opsel3_neg_fp4_as_src_hazard
541+
body: |
542+
bb.0:
543+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
544+
; GCN-LABEL: name: test_scalef32_sr_pk_fp4_bf16_opsel3_neg_fp4_as_src_hazard
545+
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
546+
; GCN-NEXT: {{ $}}
547+
; GCN-NEXT: S_WAITCNT 0
548+
; GCN-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
549+
; GCN-NEXT: S_WAITCNT 3952
550+
; GCN-NEXT: early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_BF16_e64 8, killed $vgpr2, 0, killed $vgpr3, 4, killed $vgpr4, killed $vgpr0, 0, implicit $mode, implicit $exec
551+
; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr0, $vgpr0, implicit $exec
552+
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
553+
S_WAITCNT 0
554+
renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
555+
S_WAITCNT 3952
556+
early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_BF16_e64 8, killed $vgpr2, 0, killed $vgpr3, 4, killed $vgpr4, killed $vgpr0, 0, implicit $mode, implicit $exec
557+
renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr0, $vgpr0, implicit $exec
558+
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
559+
...
560+
561+
---
562+
name: test_scalef32_sr_pk_fp4_f32_opsel0_hazard
563+
body: |
564+
bb.0:
565+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
566+
; GCN-LABEL: name: test_scalef32_sr_pk_fp4_f32_opsel0_hazard
567+
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
568+
; GCN-NEXT: {{ $}}
569+
; GCN-NEXT: S_WAITCNT 0
570+
; GCN-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
571+
; GCN-NEXT: S_WAITCNT 3952
572+
; GCN-NEXT: early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_F32_e64 0, killed $vgpr2_vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, killed $vgpr0, 0, implicit $mode, implicit $exec
573+
; GCN-NEXT: S_NOP 0
574+
; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr1, $vgpr1, implicit $exec
575+
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
576+
S_WAITCNT 0
577+
renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
578+
S_WAITCNT 3952
579+
early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_F32_e64 0, killed $vgpr2_vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, killed $vgpr0, 0, implicit $mode, implicit $exec
580+
renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr1, $vgpr1, implicit $exec
581+
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
582+
...
583+
584+
---
585+
name: test_scalef32_sr_pk_fp4_f32_opsel3_hazard
494586
body: |
495587
bb.0:
496588
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
589+
; GCN-LABEL: name: test_scalef32_sr_pk_fp4_f32_opsel3_hazard
590+
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
591+
; GCN-NEXT: {{ $}}
592+
; GCN-NEXT: S_WAITCNT 0
593+
; GCN-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
594+
; GCN-NEXT: S_WAITCNT 3952
595+
; GCN-NEXT: early-clobber renamable $vgpr1 = V_CVT_SCALEF32_SR_PK_FP4_F32_e64 8, killed $vgpr2_vgpr3, 0, killed $vgpr4, 4, killed $vgpr5, killed $vgpr0, 0, implicit $mode, implicit $exec
596+
; GCN-NEXT: S_NOP 0
597+
; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 killed $vgpr1, $vgpr1, implicit $exec
598+
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
497599
S_WAITCNT 0
498600
renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec
499601
S_WAITCNT 3952
@@ -619,16 +721,42 @@ body: |
619721
...
620722

621723
---
622-
# GCN-LABEL: test_cvt_scale_cvt_scale_hazard
623-
# GCN: V_CVT_SCALEF32_PK_FP4_F16_e64
624-
# GCN: S_NOP 0
625-
# GCN: V_CVT_SCALEF32_SR_PK_FP4_F16_e64
626-
# GCN: S_NOP 0
627-
# GCN: S_SETPC_B64_return
628-
name: test_cvt_scale_cvt_scale_hazard
724+
name: test_cvt_scale_cvt_scalef32_sr_pk_fp4_f16_opsel0_hazard
725+
body: |
726+
bb.0:
727+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
728+
; GCN-LABEL: name: test_cvt_scale_cvt_scalef32_sr_pk_fp4_f16_opsel0_hazard
729+
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
730+
; GCN-NEXT: {{ $}}
731+
; GCN-NEXT: S_WAITCNT 0
732+
; GCN-NEXT: renamable $vgpr2 = V_CVT_SCALEF32_PK_FP4_F16_e64 8, $vgpr0, 0, $vgpr1, 4, killed $vgpr2, 0, implicit $mode, implicit $exec
733+
; GCN-NEXT: S_NOP 0
734+
; GCN-NEXT: early-clobber renamable $vgpr4 = V_CVT_SCALEF32_SR_PK_FP4_F16_e64 0, killed $vgpr0, 0, killed $vgpr3, 0, killed $vgpr1, killed $vgpr2, 0, implicit $mode, implicit $exec
735+
; GCN-NEXT: S_NOP 0
736+
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit $exec
737+
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
738+
S_WAITCNT 0
739+
renamable $vgpr2 = V_CVT_SCALEF32_PK_FP4_F16_e64 8, $vgpr0, 0, $vgpr1, 4, killed $vgpr2, 0, implicit $mode, implicit $exec
740+
early-clobber renamable $vgpr4 = V_CVT_SCALEF32_SR_PK_FP4_F16_e64 0, killed $vgpr0, 0, killed $vgpr3, 0, killed $vgpr1, killed $vgpr2, 0, implicit $mode, implicit $exec
741+
$vgpr0 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit $exec
742+
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
743+
...
744+
745+
---
746+
name: test_cvt_scale_cvt_scalef32_sr_pk_fp4_f16_opsel3_hazard
629747
body: |
630748
bb.0:
631749
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
750+
; GCN-LABEL: name: test_cvt_scale_cvt_scalef32_sr_pk_fp4_f16_opsel3_hazard
751+
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
752+
; GCN-NEXT: {{ $}}
753+
; GCN-NEXT: S_WAITCNT 0
754+
; GCN-NEXT: renamable $vgpr2 = V_CVT_SCALEF32_PK_FP4_F16_e64 8, $vgpr0, 0, $vgpr1, 4, killed $vgpr2, 0, implicit $mode, implicit $exec
755+
; GCN-NEXT: S_NOP 0
756+
; GCN-NEXT: early-clobber renamable $vgpr4 = V_CVT_SCALEF32_SR_PK_FP4_F16_e64 8, killed $vgpr0, 0, killed $vgpr3, 4, killed $vgpr1, killed $vgpr2, 0, implicit $mode, implicit $exec
757+
; GCN-NEXT: S_NOP 0
758+
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit $exec
759+
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit killed $vgpr0
632760
S_WAITCNT 0
633761
renamable $vgpr2 = V_CVT_SCALEF32_PK_FP4_F16_e64 8, $vgpr0, 0, $vgpr1, 4, killed $vgpr2, 0, implicit $mode, implicit $exec
634762
early-clobber renamable $vgpr4 = V_CVT_SCALEF32_SR_PK_FP4_F16_e64 8, killed $vgpr0, 0, killed $vgpr3, 4, killed $vgpr1, killed $vgpr2, 0, implicit $mode, implicit $exec

0 commit comments

Comments
 (0)