@@ -690,22 +690,28 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
690
690
691
691
template <GFXCORE_FAMILY gfxCoreFamily>
692
692
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
693
+ auto syncTaskCount = this ->csr ->peekTaskCount ();
694
+ ze_result_t status = ZE_RESULT_SUCCESS;
693
695
694
- if (this ->isFlushTaskSubmissionEnabled && !this ->isSyncModeQueue ) {
696
+ if (isInOrderExecutionEnabled ()) {
697
+ status = synchronizeInOrderExecution (timeout);
698
+ } else if (this ->isFlushTaskSubmissionEnabled && !this ->isSyncModeQueue ) {
695
699
const int64_t timeoutInMicroSeconds = timeout / 1000 ;
696
- auto syncTaskCount = this ->csr ->peekTaskCount ();
697
700
const auto waitStatus = this ->csr ->waitForCompletionWithTimeout (NEO::WaitParams{false , false , timeoutInMicroSeconds},
698
701
syncTaskCount);
699
702
if (waitStatus == NEO::WaitStatus::GpuHang) {
700
- this ->printKernelsPrintfOutput (true );
701
- this ->checkAssert ();
702
- return ZE_RESULT_ERROR_DEVICE_LOST;
703
+ status = ZE_RESULT_ERROR_DEVICE_LOST;
703
704
}
705
+ }
706
+
707
+ if (status == ZE_RESULT_SUCCESS) {
704
708
this ->csr ->getInternalAllocationStorage ()->cleanAllocationList (syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
705
- this ->printKernelsPrintfOutput (false );
706
- this ->checkAssert ();
707
709
}
708
- return ZE_RESULT_SUCCESS;
710
+
711
+ this ->printKernelsPrintfOutput (status == ZE_RESULT_ERROR_DEVICE_LOST);
712
+ this ->checkAssert ();
713
+
714
+ return status;
709
715
}
710
716
711
717
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -852,7 +858,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
852
858
}
853
859
854
860
if (isInOrderExecutionEnabled ()) {
855
- auto status = synchronizeInOrderExecution ();
861
+ auto status = synchronizeInOrderExecution (std::numeric_limits< uint64_t >:: max () );
856
862
if (status != ZE_RESULT_SUCCESS) {
857
863
return status;
858
864
}
@@ -1061,23 +1067,42 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAll
1061
1067
}
1062
1068
1063
1069
template <GFXCORE_FAMILY gfxCoreFamily>
1064
- ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution() const {
1070
+ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution(uint64_t timeout) const {
1071
+ std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, now;
1072
+ uint64_t timeDiff = 0 ;
1073
+
1074
+ ze_result_t status = ZE_RESULT_NOT_READY;
1075
+
1065
1076
auto hostAddress = static_cast <uint32_t *>(this ->inOrderDependencyCounterAllocation ->getUnderlyingBuffer ());
1066
1077
auto waitValue = this ->inOrderDependencyCounter ;
1067
1078
1068
- auto lastHangCheckTime = std::chrono::high_resolution_clock::now ();
1079
+ lastHangCheckTime = std::chrono::high_resolution_clock::now ();
1080
+ waitStartTime = lastHangCheckTime;
1069
1081
1070
- while (*hostAddress < waitValue) {
1082
+ do {
1071
1083
this ->csr ->downloadAllocation (*this ->inOrderDependencyCounterAllocation );
1072
1084
1073
- bool status = NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t >(hostAddress, waitValue, std::greater_equal<uint32_t >());
1085
+ if (NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t >(hostAddress, waitValue, std::greater_equal<uint32_t >())) {
1086
+ status = ZE_RESULT_SUCCESS;
1087
+ break ;
1088
+ }
1074
1089
1075
- if (!status && this ->csr ->checkGpuHangDetected (std::chrono::high_resolution_clock::now (), lastHangCheckTime)) {
1076
- return ZE_RESULT_ERROR_DEVICE_LOST;
1090
+ if (this ->csr ->checkGpuHangDetected (std::chrono::high_resolution_clock::now (), lastHangCheckTime)) {
1091
+ status = ZE_RESULT_ERROR_DEVICE_LOST;
1092
+ break ;
1077
1093
}
1078
- }
1079
1094
1080
- return ZE_RESULT_SUCCESS;
1095
+ if (timeout == std::numeric_limits<uint64_t >::max ()) {
1096
+ continue ;
1097
+ } else if (timeout == 0 ) {
1098
+ break ;
1099
+ }
1100
+
1101
+ now = std::chrono::high_resolution_clock::now ();
1102
+ timeDiff = std::chrono::duration_cast<std::chrono::nanoseconds>(now - waitStartTime).count ();
1103
+ } while (timeDiff < timeout);
1104
+
1105
+ return status;
1081
1106
}
1082
1107
1083
1108
} // namespace L0
0 commit comments