@@ -290,8 +290,8 @@ ModelInstanceState::SaveRequestsToSharedMemory(
290
290
request, &request_timeout));
291
291
292
292
std::unique_ptr<InferRequest> infer_request;
293
- TRITONBACKEND_ResponseFactory* factory_ptr;
294
- RETURN_IF_ERROR (TRITONBACKEND_ResponseFactoryNew (&factory_ptr, request));
293
+ TRITONBACKEND_ResponseFactory* factory_ptr = nullptr ;
294
+ // RETURN_IF_ERROR(TRITONBACKEND_ResponseFactoryNew(&factory_ptr, request));
295
295
296
296
infer_request = std::make_unique<InferRequest>(
297
297
id, correlation_id, pb_input_tensors, requested_output_names,
@@ -322,8 +322,6 @@ ModelInstanceState::LaunchStubProcess()
322
322
thread_pool_ = std::make_unique<boost::asio::thread_pool>(
323
323
model_state->StateForBackend ()->thread_pool_size );
324
324
325
- queue_monitor_thread_ = true ;
326
- queue_monitor_ = std::thread (&ModelInstanceState::MessageQueueMonitor, this );
327
325
request_executor_ = std::make_unique<RequestExecutor>(
328
326
Stub ()->ShmPool (), model_state->TritonServer ());
329
327
@@ -685,44 +683,6 @@ ModelInstanceState::ExecuteBLSRequest(
685
683
}
686
684
}
687
685
688
- void
689
- ModelInstanceState::MessageQueueMonitor ()
690
- {
691
- while (queue_monitor_thread_) {
692
- bi::managed_external_buffer::handle_t handle =
693
- Stub ()->ParentMessageQueue ()->Pop ();
694
- if (handle == DUMMY_MESSAGE) {
695
- break ;
696
- }
697
- std::unique_ptr<IPCMessage> message =
698
- IPCMessage::LoadFromSharedMemory (Stub ()->ShmPool (), handle);
699
-
700
- // Need to notify the model instance thread that the execute response has
701
- // been received.
702
- if (message->Command () == PYTHONSTUB_ExecuteResponse) {
703
- std::lock_guard<std::mutex> guard{mu_};
704
- received_message_ = std::move (message);
705
- cv_.notify_one ();
706
- } else if (message->Command () == PYTHONSTUB_ResponseSend) {
707
- std::shared_ptr<IPCMessage> response_send_message = std::move (message);
708
- std::packaged_task<void ()> task ([this , response_send_message] {
709
- ResponseSendDecoupled (response_send_message);
710
- });
711
- boost::asio::post (*thread_pool_, std::move (task));
712
- } else if (
713
- message->Command () == PYTHONSTUB_InferExecRequest ||
714
- message->Command () == PYTHONSTUB_InferStreamExecRequest) {
715
- std::shared_ptr<IPCMessage> bls_execute = std::move (message);
716
- std::packaged_task<void ()> task ([this , bls_execute] {
717
- ExecuteBLSRequest (
718
- bls_execute,
719
- (bls_execute->Command () == PYTHONSTUB_InferStreamExecRequest));
720
- });
721
- boost::asio::post (*thread_pool_, std::move (task));
722
- }
723
- }
724
- }
725
-
726
686
void
727
687
ModelInstanceState::StubToParentMQMonitor ()
728
688
{
@@ -769,6 +729,25 @@ ModelInstanceState::StubToParentMQMonitor()
769
729
ProcessModelControlRequest (message);
770
730
break ;
771
731
}
732
+ case PYTHONSTUB_ResponseSend: {
733
+ std::shared_ptr<IPCMessage> response_send_message = std::move (message);
734
+ std::packaged_task<void ()> task ([this , response_send_message] {
735
+ ResponseSendDecoupled (response_send_message);
736
+ });
737
+ boost::asio::post (*thread_pool_, std::move (task));
738
+ break ;
739
+ }
740
+ case PYTHONSTUB_InferExecRequest:
741
+ case PYTHONSTUB_InferStreamExecRequest: {
742
+ std::shared_ptr<IPCMessage> bls_execute = std::move (message);
743
+ std::packaged_task<void ()> task ([this , bls_execute] {
744
+ ExecuteBLSRequest (
745
+ bls_execute,
746
+ (bls_execute->Command () == PYTHONSTUB_InferStreamExecRequest));
747
+ });
748
+ boost::asio::post (*thread_pool_, std::move (task));
749
+ break ;
750
+ }
772
751
default : {
773
752
LOG_MESSAGE (
774
753
TRITONSERVER_LOG_ERROR, " Unexpected message type received." );
@@ -1228,26 +1207,23 @@ ModelInstanceState::ProcessRequests(
1228
1207
IPCMessage::Create (Stub ()->ShmPool (), false /* inline_response*/ ));
1229
1208
ipc_message->Command () = PYTHONSTUB_CommandType::PYTHONSTUB_ExecuteRequest;
1230
1209
ipc_message->Args () = request_batch.handle_ ;
1231
- received_message_ = nullptr ;
1210
+
1232
1211
ScopedDefer execute_finalize ([this ] {
1233
1212
// Push a dummy message to signal the thread to terminate.
1234
1213
Stub ()->StubMessageQueue ()->Push (DUMMY_MESSAGE);
1235
1214
});
1236
1215
1216
+ std::unique_ptr<IPCMessage> response;
1237
1217
{
1238
- std::unique_lock<std::mutex> guard{mu_};
1239
1218
Stub ()->StubMessageQueue ()->Push (ipc_message->ShmHandle ());
1240
- cv_.wait (guard, [this ] { return received_message_ != nullptr ; });
1219
+ bi::managed_external_buffer::handle_t response_message;
1220
+ Stub ()->ReceiveMessageFromStub (response_message);
1221
+ response = IPCMessage::LoadFromSharedMemory (Stub ()->ShmPool (), response_message);
1241
1222
}
1242
-
1243
-
1244
- AllocatedSharedMemory<char > response_batch = Stub ()->ShmPool ()->Load <char >(received_message_->Args ());
1245
-
1223
+ char * ipc_message_shm = reinterpret_cast <char *>(response->GetAllocatedSharedMemory ().data_ .get ());;
1246
1224
ResponseBatch* response_batch_shm_ptr =
1247
- reinterpret_cast <ResponseBatch*>(response_batch. data_ . get ( ));
1225
+ reinterpret_cast <ResponseBatch*>(ipc_message_shm + sizeof (IPCMessageShm ));
1248
1226
1249
- received_message_.reset ();
1250
-
1251
1227
uint64_t compute_end_ns = 0 ;
1252
1228
SET_TIMESTAMP (compute_end_ns);
1253
1229
reporter.SetComputeEndNs (compute_end_ns);
@@ -1282,7 +1258,7 @@ ModelInstanceState::ProcessRequests(
1282
1258
}
1283
1259
bi::managed_external_buffer::handle_t * response_shm_handle =
1284
1260
reinterpret_cast <bi::managed_external_buffer::handle_t *>(
1285
- response_batch. data_ . get ( ) + sizeof (ResponseBatch ));
1261
+ ipc_message_shm + sizeof (ResponseBatch ) + sizeof (IPCMessageShm ));
1286
1262
1287
1263
// If the output provided by the model is in GPU, we will pass the list of
1288
1264
// buffers provided by Triton to the stub process.
@@ -1390,8 +1366,6 @@ ModelInstanceState::ProcessRequests(
1390
1366
}
1391
1367
}
1392
1368
1393
- // Finalize the execute.
1394
- execute_finalize.Complete ();
1395
1369
}
1396
1370
1397
1371
// If the output tensor is in GPU, there will be a second round trip
@@ -1610,7 +1584,6 @@ ModelInstanceState::~ModelInstanceState()
1610
1584
Stub ()->TerminateStub ();
1611
1585
TerminateMonitor ();
1612
1586
Stub ()->ClearQueues ();
1613
- received_message_.reset ();
1614
1587
Stub ().reset ();
1615
1588
}
1616
1589
0 commit comments