Skip to content

Commit 95519a1

Browse files
committed
Remove extra logs
1 parent 8b17fbd commit 95519a1

File tree

3 files changed

+31
-61
lines changed

3 files changed

+31
-61
lines changed

src/pb_stub.cc

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -719,22 +719,12 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
719719
ResponseBatch* response_batch_shm_ptr = reinterpret_cast<ResponseBatch*>(
720720
response_batch.value().data_.get() + sizeof(IPCMessageShm));
721721

722-
// Handle two special cases:
723-
// 1. For default(non-decoupled) mode, where the response
724-
// factory should already be cleaned up with the previous response sent
725-
// from response sender, and yet the model tries to return another
726-
// response from `execute()` function. Notify the backend to NOT to
722+
723+
// If the response sender is already closed, notify the backend NOT to
727724
// delete the response factory again during error handling.
728-
// 2.The response sender is already closed, need to notify the backend to
729-
// NOT to delete the response factory again during error handling.
730-
// std::string error_string = pb_exception.what();
731-
if ((err_message.find(
732-
"Non-decoupled model cannot send more than one response") !=
733-
std::string::npos) ||
734-
(err_message.find("Response sender has been closed") !=
735-
std::string::npos)) {
725+
if (err_message.find("Response sender has been closed") !=
726+
std::string::npos) {
736727
response_batch_shm_ptr->is_response_factory_deleted = true;
737-
LOG_ERROR << "=== caught error: " << err_message;
738728
}
739729

740730
response_batch_shm_ptr->has_error = true;
@@ -752,8 +742,6 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
752742
}
753743
} else {
754744
if (!response_batch) {
755-
// No response is returned from `execute()`.
756-
std::cerr << "===== response_batch is not set" << std::endl;
757745
response_batch = shm_pool_->Construct<char>(
758746
sizeof(ResponseBatch) + sizeof(IPCMessageShm));
759747
ResponseBatch* response_batch_shm_ptr = reinterpret_cast<ResponseBatch*>(
@@ -764,8 +752,6 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
764752
response_batch.value().data_.get() + sizeof(IPCMessageShm));
765753
response_batch_shm_ptr->has_error = false;
766754
response_batch_shm_ptr->is_error_set = false;
767-
std::cerr << "===== response_batch_shm_ptr->batch_size: "
768-
<< response_batch_shm_ptr->batch_size << std::endl;
769755
}
770756

771757
execute_response = IPCMessage::Create(
@@ -865,8 +851,32 @@ Stub::ProcessReturnedResponses(
865851
}
866852

867853
InferResponse* response = py_responses[i].cast<InferResponse*>();
868-
request->GetResponseSender()->UpdateStateAndCounters(
869-
response, TRITONSERVER_RESPONSE_COMPLETE_FINAL);
854+
try {
855+
request->GetResponseSender()->UpdateStateAndCounters(
856+
response, TRITONSERVER_RESPONSE_COMPLETE_FINAL);
857+
}
858+
catch (const PythonBackendException& pb_exception) {
859+
// Handle the exception here to catch the error when there's a response
860+
// returned from `execute()`, and the below error message is thrown.
861+
// In default (non-decoupled) mode, the response factory should already
862+
// have been cleaned up when the previous response was sent by the
863+
// response sender. However, if the model attempts to return another
864+
// response from the `execute()` function, notify the backend NOT to
865+
// delete the response factory again during error handling.
866+
std::string err_message = pb_exception.what();
867+
if (err_message.find(
868+
"Non-decoupled model cannot send more than one response") !=
869+
std::string::npos) {
870+
response_batch = std::move(shm_pool_->Construct<char>(
871+
sizeof(ResponseBatch) + sizeof(IPCMessageShm)));
872+
ResponseBatch* response_batch_shm_ptr =
873+
reinterpret_cast<ResponseBatch*>(
874+
response_batch.value().data_.get() + sizeof(IPCMessageShm));
875+
response_batch_shm_ptr->batch_size = 0;
876+
response_batch_shm_ptr->is_response_factory_deleted = true;
877+
}
878+
throw pb_exception;
879+
}
870880
}
871881
}
872882
// Return all the created responses using response_batch. The reason
@@ -883,18 +893,15 @@ Stub::ProcessReturnedResponses(
883893
reinterpret_cast<bi::managed_external_buffer::handle_t*>(
884894
response_batch.value().data_.get() + sizeof(ResponseBatch) +
885895
sizeof(IPCMessageShm));
886-
std::cerr << "===== response_size: " << responses_size << std::endl;
887896
for (size_t i = 0; i < responses_size; i++) {
888897
// Check the return type of execute function.
889898
InferRequest* infer_request = py_requests[i].cast<InferRequest*>();
890899
InferResponse* infer_response = py_responses[i].cast<InferResponse*>();
891900
if (!py::isinstance<py::none>(py_responses[i])) {
892-
std::cerr << "===== response is NOT None" << std::endl;
893901
infer_response->PruneOutputTensors(infer_request->RequestedOutputNames());
894902
ProcessResponse(infer_response);
895903
responses_shm_handle[i] = infer_response->ShmHandle();
896904
} else {
897-
std::cerr << "===== response is None" << std::endl;
898905
responses_shm_handle[i] = 0;
899906
}
900907
}

src/python_be.cc

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -843,8 +843,6 @@ ModelInstanceState::ProcessCleanupRequest(
843843
infer_payload_.erase(id);
844844
} else if (message->Command() == PYTHONSTUB_DecoupledResponseFactoryCleanup) {
845845
// Delete response factory
846-
std::cerr << "=== ResponseFactoryDeleter -> ProcessCleanupRequest ==="
847-
<< std::endl;
848846
std::unique_ptr<
849847
TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
850848
response_factory(reinterpret_cast<TRITONBACKEND_ResponseFactory*>(id));
@@ -1165,8 +1163,6 @@ ModelInstanceState::ResponseSendDecoupled(
11651163
TRITONBACKEND_ResponseFactory* response_factory =
11661164
reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
11671165
send_message_payload->response_factory_address);
1168-
std::cerr << "=== ResponseFactoryDeleter -> ResponseSendDecoupled ==="
1169-
<< std::endl;
11701166
std::unique_ptr<
11711167
TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
11721168
lresponse_factory(reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
@@ -1366,20 +1362,11 @@ ModelInstanceState::ProcessRequests(
13661362
reporter.SetBatchStatistics(total_batch_size);
13671363

13681364
if (response_batch_shm_ptr->has_error) {
1369-
// The "is_response_factory_deleted" flag indicates whether the response
1370-
// factory has been deleted. The flag is used in a corner case
1371-
// where after the response sender sends a response and complete final flag,
1372-
// and closes the response factory, the model returns a response from
1373-
// `execute()`. For both default and decoupled mode, upon handling that
1374-
// error, no need to delete the response factory.
13751365
if (!response_batch_shm_ptr->is_response_factory_deleted) {
13761366
for (uint32_t r = 0; r < request_count; r++) {
13771367
TRITONBACKEND_ResponseFactory* response_factory =
13781368
reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
13791369
pb_infer_requests[r]->GetResponseFactoryAddress());
1380-
std::cerr << "=== ResponseFactoryDeleter -> "
1381-
"response_batch_shm_ptr->has_error ==="
1382-
<< std::endl;
13831370
std::unique_ptr<
13841371
TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
13851372
lresponse_factory(reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
@@ -1411,7 +1398,6 @@ ModelInstanceState::ProcessRequests(
14111398
// usage of response sender, so only create a TRITONBACKEND_Response
14121399
// object for the valid responses, and skip the None responses later.
14131400
if (response_shm_handle[i] == 0) {
1414-
std::cerr << "=== PYBE response_shm_handle is 0 ===" << std::endl;
14151401
responses->emplace_back(nullptr);
14161402
} else {
14171403
TRITONBACKEND_Response* response;
@@ -1434,18 +1420,15 @@ ModelInstanceState::ProcessRequests(
14341420
gpu_output_buffers(request_count);
14351421
GPUBuffersHelper gpu_buffer_helper;
14361422

1437-
std::cerr << "=== PYBE request_count: " << request_count << std::endl;
14381423
for (uint32_t r = 0; r < request_count; ++r) {
14391424
NVTX_RANGE(nvtx_, "LoadingResponse " + Name());
1425+
requires_deferred_callback.push_back(false);
14401426
if (response_shm_handle[r] == 0) {
1441-
std::cerr << "=== PYBE skip the response_shm_handle is 0 ==="
1442-
<< std::endl;
14431427
continue;
14441428
}
14451429
TRITONBACKEND_Response* response = (*responses)[r];
14461430
TRITONBACKEND_Request* request = requests[r];
14471431
uint32_t requested_output_count = 0;
1448-
requires_deferred_callback.push_back(false);
14491432

14501433
shm_responses.emplace_back(nullptr);
14511434
std::unique_ptr<InferResponse>& infer_response = shm_responses.back();
@@ -1459,21 +1442,10 @@ ModelInstanceState::ProcessRequests(
14591442
(*responses)[r] = nullptr;
14601443
continue;
14611444
}
1462-
1463-
// if (response_shm_handle[r] == 0) {
1464-
// std::cerr << "=== PYBE response_shm_handle is 0 ===" << std::endl;
1465-
// LOG_IF_ERROR(
1466-
// TRITONBACKEND_ResponseDelete((*responses)[r]),
1467-
// "failed to delete response");
1468-
// (*responses)[r] = nullptr;
1469-
// continue;
1470-
// }
14711445
{
14721446
TRITONBACKEND_ResponseFactory* response_factory =
14731447
reinterpret_cast<TRITONBACKEND_ResponseFactory*>(
14741448
pb_infer_requests[r]->GetResponseFactoryAddress());
1475-
std::cerr << "=== ResponseFactoryDeleter -> regular workflow ==="
1476-
<< std::endl;
14771449
std::unique_ptr<
14781450
TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
14791451
lresponse_factory(
@@ -1522,17 +1494,13 @@ ModelInstanceState::ProcessRequests(
15221494
GUARDED_RESPOND_IF_ERROR(
15231495
responses, r,
15241496
TRITONBACKEND_RequestOutputCount(request, &requested_output_count));
1525-
std::cerr << "=== PYBE requested_output_count: " << requested_output_count
1526-
<< std::endl;
15271497
std::set<std::string> requested_output_names;
15281498
for (size_t j = 0; j < requested_output_count; ++j) {
15291499
const char* output_name;
15301500
GUARDED_RESPOND_IF_ERROR(
15311501
responses, r,
15321502
TRITONBACKEND_RequestOutputName(request, j, &output_name));
15331503
requested_output_names.insert(output_name);
1534-
std::cerr << "=== PYBE requested_output_name: " << output_name
1535-
<< std::endl;
15361504
}
15371505

15381506
bool require_deferred_callback = false;

src/response_sender.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,6 @@ ResponseSender::UpdateStateAndCounters(
106106
}
107107

108108
if (flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
109-
std::cerr << "=== ResponseSender -> UpdateStateAndCounters closing RF ==="
110-
<< std::endl;
111109
response_factory_deleted_.exchange(true);
112110
closed_ = true;
113111
}
@@ -177,7 +175,6 @@ ResponseSender::Send(
177175
bi::scoped_lock<bi::interprocess_mutex> guard{send_message_payload->mu};
178176
// The server will destruct the response factory if the final flag is set.
179177
if (flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
180-
std::cerr << "====== scoped_defer -> closing RF =====" << std::endl;
181178
response_factory_deleted_.exchange(true);
182179
}
183180
stub->SendIPCUtilsMessage(ipc_message);
@@ -280,8 +277,6 @@ ResponseSender::Close()
280277
void
281278
ResponseSender::DeleteResponseFactory()
282279
{
283-
std::cerr << "=== ResponseSender -> DeleteResponseFactory, "
284-
<< response_factory_deleted_ << " ===" << std::endl;
285280
bool already_deleted = response_factory_deleted_.exchange(true);
286281
if (!already_deleted) {
287282
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();

0 commit comments

Comments
 (0)