Skip to content

Commit 1c7ff5a

Browse files
committed
Clean up response_factory when the final flag is set
1 parent c426243 commit 1c7ff5a

File tree

4 files changed

+23
-8
lines changed

4 files changed

+23
-8
lines changed

src/pb_utils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ struct ResponseSenderBase {
225225
bi::managed_external_buffer::handle_t error;
226226
intptr_t request_address;
227227
intptr_t response_factory_address;
228+
bool is_response_factory_cleaned;
228229
};
229230

230231
struct ResponseSendMessage : ResponseSenderBase {

src/python_be.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,7 @@ ModelInstanceState::ResponseSendDecoupled(
13101310
TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
13111311
response_factory(reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
13121312
send_message_payload->response_factory_address));
1313+
send_message_payload->is_response_factory_cleaned = true;
13131314
}
13141315
}
13151316

src/response_sender.cc

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -41,17 +41,18 @@ ResponseSender::ResponseSender(
4141
const std::shared_ptr<PbCancel>& pb_cancel)
4242
: request_address_(request_address),
4343
response_factory_address_(response_factory_address), shm_pool_(shm_pool),
44-
closed_(false), pb_cancel_(pb_cancel)
44+
closed_(false), pb_cancel_(pb_cancel), is_response_factory_cleaned_(false)
4545
{
4646
}
4747

4848
ResponseSender::~ResponseSender()
4949
{
50-
// std::cerr << "===== ResponseSender::~ResponseSender() =====" << std::endl;
51-
// std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
52-
// stub->EnqueueCleanupId(
53-
// reinterpret_cast<void*>(response_factory_address_),
54-
// PYTHONSTUB_DecoupledResponseFactoryCleanup);
50+
if (!is_response_factory_cleaned_) {
51+
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
52+
stub->EnqueueCleanupId(
53+
reinterpret_cast<void*>(response_factory_address_),
54+
PYTHONSTUB_DecoupledResponseFactoryCleanup);
55+
}
5556
}
5657

5758
void
@@ -189,6 +190,9 @@ ResponseSender::Send(
189190
}
190191
}
191192

193+
is_response_factory_cleaned_ =
194+
send_message_payload->is_response_factory_cleaned;
195+
192196
if (send_message_payload->has_error) {
193197
if (send_message_payload->is_error_set) {
194198
std::unique_ptr<PbString> error = PbString::LoadFromSharedMemory(

src/response_sender.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -46,7 +46,16 @@ class ResponseSender {
4646
intptr_t request_address_;
4747
intptr_t response_factory_address_;
4848
std::unique_ptr<SharedMemoryManager>& shm_pool_;
49+
// The flag to indicate if the response sender is closed. It is set to true
50+
// once the TRITONSERVER_RESPONSE_COMPLETE_FINAL flag is set, meaning that the
51+
// response_sender should not be used anymore. This flag is separate from the
52+
// `is_response_factory_cleaned_` flag because errors might occur after
53+
// complete_final flag is set but before the response_factory gets cleaned up.
4954
bool closed_;
5055
std::shared_ptr<PbCancel> pb_cancel_;
56+
// The flag to indicate if the response_factory is already cleaned up in the
57+
// python backend side. If not, the response_factory will be cleaned up in the
58+
// destructor.
59+
bool is_response_factory_cleaned_;
5160
};
5261
}}} // namespace triton::backend::python

0 commit comments

Comments
 (0)