61
61
62
62
namespace triton { namespace backend { namespace pytorch {
63
63
64
- namespace {
65
-
66
- #ifdef TRITON_ENABLE_GPU
67
- void CUDART_CB
68
- CaptureTimestampCallback (void * data)
69
- {
70
- auto * timestamp = reinterpret_cast <std::atomic<uint64_t >*>(data);
71
- SET_TIMESTAMP (*timestamp);
72
- }
73
- #endif
74
-
75
- } // namespace
76
-
77
64
//
78
65
// ModelState
79
66
//
@@ -1304,16 +1291,22 @@ ModelInstanceState::ProcessRequests(
1304
1291
}
1305
1292
}
1306
1293
1294
+ #ifdef TRITON_ENABLE_GPU
1295
+ if (Kind () == TRITONSERVER_INSTANCEGROUPKIND_MODEL) {
1296
+ // For 'KIND_MODEL', multiple streams will be involved, so we need to call
1297
+ // 'cudaStreamSynchronize' before reading the output tensors.
1298
+ for (auto & stream : stream_vec_) {
1299
+ cudaStreamSynchronize (stream);
1300
+ }
1301
+ }
1302
+ #endif
1303
+
1307
1304
uint64_t compute_end_ns = 0 ;
1308
- std::atomic< uint64_t > compute_output_start{ 0 } ;
1305
+ uint64_t compute_output_start = 0 ;
1309
1306
1310
1307
if ((Kind () == TRITONSERVER_INSTANCEGROUPKIND_MODEL) && (device_cnt_ > 0 )) {
1311
1308
#ifdef TRITON_ENABLE_GPU
1312
- // For the compute infer duration, multiple streams will be involved, so we
1313
- // need to launch a CUDA callback function for timestamp capturing.
1314
- cudaLaunchHostFunc (
1315
- GetCudaStreamByInstanceKind (), CaptureTimestampCallback,
1316
- reinterpret_cast <void *>(&compute_output_start));
1309
+ SET_TIMESTAMP (compute_output_start);
1317
1310
#endif
1318
1311
} else {
1319
1312
RESPOND_ALL_AND_SET_TRUE_IF_ERROR (
@@ -1323,14 +1316,6 @@ ModelInstanceState::ProcessRequests(
1323
1316
reinterpret_cast <void *>(&compute_output_start_event_)));
1324
1317
}
1325
1318
1326
- #ifdef TRITON_ENABLE_GPU
1327
- if (Kind () == TRITONSERVER_INSTANCEGROUPKIND_MODEL) {
1328
- for (auto & stream : stream_vec_) {
1329
- cudaStreamSynchronize (stream);
1330
- }
1331
- }
1332
- #endif
1333
-
1334
1319
if (!all_response_failed) {
1335
1320
if (!invalid_index) {
1336
1321
RESPOND_ALL_AND_SET_TRUE_IF_ERROR (
0 commit comments