@@ -923,6 +923,30 @@ getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
923
923
return TII->getNamedOperand (MI, AMDGPU::OpName::vdst);
924
924
}
925
925
926
+ // / Checks whether the provided \p MI "consumes" the operand with a Dest sel
927
+ // / fowarding issue \p Dst . We may "consume" the Dst via a standard explicit
928
+ // / RAW, or through irregular ways (e.g implicit RAW, certain types of WAW)
929
+ static bool consumesDstSelForwardingOperand (const MachineInstr *VALU,
930
+ const MachineOperand *Dst,
931
+ const SIRegisterInfo *TRI) {
932
+ // We must consider implicit reads of the VALU. SDWA with dst_sel and
933
+ // UNUSED_PRESERVE will implicitly read the result from forwarded dest,
934
+ // and we must account for that hazard.
935
+ // We also must account for WAW hazards. In particular, WAW with dest
936
+ // preserve semantics (e.g. VOP3 with op_sel, VOP2 &&
937
+ // !zeroesHigh16BitsOfDest) will read the forwarded dest for parity
938
+ // check for ECC. Without accounting for this hazard, the ECC will be
939
+ // wrong.
940
+ // TODO: limit to RAW (including implicit reads) + problematic WAW (i.e.
941
+ // complete zeroesHigh16BitsOfDest)
942
+ for (auto &Operand : VALU->operands ()) {
943
+ if (Operand.isReg () && TRI->regsOverlap (Dst->getReg (), Operand.getReg ())) {
944
+ return true ;
945
+ }
946
+ }
947
+ return false ;
948
+ }
949
+
926
950
int GCNHazardRecognizer::checkVALUHazards (MachineInstr *VALU) {
927
951
int WaitStatesNeeded = 0 ;
928
952
@@ -955,38 +979,17 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
955
979
956
980
auto IsShift16BitDefFn = [this , VALU](const MachineInstr &MI) {
957
981
const SIRegisterInfo *TRI = ST.getRegisterInfo ();
958
- SmallVector<const MachineOperand *, 4 > Dsts;
959
982
const MachineOperand *ForwardedDst = getDstSelForwardingOperand (MI, ST);
960
983
if (ForwardedDst) {
961
- Dsts. push_back ( ForwardedDst);
984
+ return consumesDstSelForwardingOperand (VALU, ForwardedDst, TRI );
962
985
} else if (MI.isInlineAsm ()) {
963
986
// Assume inline asm has dst forwarding hazard
964
- for (auto &Op :
965
- drop_begin (MI.operands (), InlineAsm::MIOp_FirstOperand)) {
966
- if (Op.isReg () && Op.isDef ()) {
967
- Dsts.push_back (&Op);
968
- }
969
- }
970
- }
971
-
972
- for (auto Dst : Dsts) {
973
- // We must consider implicit reads of the VALU. SDWA with dst_sel and
974
- // UNUSED_PRESERVE will implicitly read the result from forwarded dest,
975
- // and we must account for that hazard.
976
- // We also must account for WAW hazards. In particular, WAW with dest
977
- // preserve semantics (e.g. VOP3 with op_sel, VOP2 &&
978
- // !zeroesHigh16BitsOfDest) will read the forwarded dest for parity
979
- // check for ECC. Without accounting for this hazard, the ECC will be
980
- // wrong.
981
- // TODO: limit to RAW (including implicit reads) + problematic WAW (i.e.
982
- // complete zeroesHigh16BitsOfDest)
983
- for (auto &Operand : VALU->operands ()) {
984
- if (Operand.isReg () &&
985
- TRI->regsOverlap (Dst->getReg (), Operand.getReg ())) {
987
+ for (auto &Def : MI.all_defs ()) {
988
+ if (consumesDstSelForwardingOperand (VALU, &Def, TRI))
986
989
return true ;
987
- }
988
990
}
989
991
}
992
+
990
993
return false ;
991
994
};
992
995
0 commit comments