@@ -1089,6 +1089,17 @@ ModelInstanceState::ResponseSendDecoupled(
1089
1089
ResponseSendMessage* send_message_payload =
1090
1090
reinterpret_cast <ResponseSendMessage*>(send_message.data_ .get ());
1091
1091
std::unique_ptr<PbString> error_message;
1092
+ ScopedDefer response_factory_deleter ([send_message_payload] {
1093
+ if (send_message_payload->flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
1094
+ TRITONBACKEND_ResponseFactory* response_factory =
1095
+ reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1096
+ send_message_payload->response_factory_address );
1097
+ std::unique_ptr<
1098
+ TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1099
+ lresponse_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1100
+ response_factory));
1101
+ }
1102
+ });
1092
1103
ScopedDefer _ ([send_message_payload] {
1093
1104
{
1094
1105
bi::scoped_lock<bi::interprocess_mutex> guard{send_message_payload->mu };
@@ -1214,13 +1225,6 @@ ModelInstanceState::ResponseSendDecoupled(
1214
1225
SetErrorForResponseSendMessage (
1215
1226
send_message_payload, WrapTritonErrorInSharedPtr (error), error_message);
1216
1227
}
1217
-
1218
- if (send_message_payload->flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
1219
- std::unique_ptr<
1220
- TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1221
- lresponse_factory (
1222
- reinterpret_cast <TRITONBACKEND_ResponseFactory*>(response_factory));
1223
- }
1224
1228
}
1225
1229
1226
1230
TRITONSERVER_Error*
@@ -1357,6 +1361,16 @@ ModelInstanceState::ProcessRequests(
1357
1361
(*responses)[r] = nullptr ;
1358
1362
continue ;
1359
1363
}
1364
+ {
1365
+ TRITONBACKEND_ResponseFactory* response_factory =
1366
+ reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1367
+ pb_infer_requests[r]->GetResponseFactoryAddress ());
1368
+ std::unique_ptr<
1369
+ TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1370
+ lresponse_factory (
1371
+ reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1372
+ response_factory));
1373
+ }
1360
1374
infer_response = InferResponse::LoadFromSharedMemory (
1361
1375
Stub ()->ShmPool (), response_shm_handle[r],
1362
1376
false /* open_cuda_handle */ );
0 commit comments