@@ -843,8 +843,6 @@ ModelInstanceState::ProcessCleanupRequest(
843
843
infer_payload_.erase (id);
844
844
} else if (message->Command () == PYTHONSTUB_DecoupledResponseFactoryCleanup) {
845
845
// Delete response factory
846
- std::cerr << " === ResponseFactoryDeleter -> ProcessCleanupRequest ==="
847
- << std::endl;
848
846
std::unique_ptr<
849
847
TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
850
848
response_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(id));
@@ -1165,8 +1163,6 @@ ModelInstanceState::ResponseSendDecoupled(
1165
1163
TRITONBACKEND_ResponseFactory* response_factory =
1166
1164
reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1167
1165
send_message_payload->response_factory_address );
1168
- std::cerr << " === ResponseFactoryDeleter -> ResponseSendDecoupled ==="
1169
- << std::endl;
1170
1166
std::unique_ptr<
1171
1167
TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1172
1168
lresponse_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
@@ -1366,20 +1362,11 @@ ModelInstanceState::ProcessRequests(
1366
1362
reporter.SetBatchStatistics (total_batch_size);
1367
1363
1368
1364
if (response_batch_shm_ptr->has_error ) {
1369
- // The "is_response_factory_deleted" flag indicates whether the response
1370
- // factory has been deleted. The flag is used in a corner case
1371
- // where after the response sender sends a response and complete final flag,
1372
- // and closes the response factory, the model returns a response from
1373
- // `execute()`. For both default and decoupled mode, upon handling that
1374
- // error, no need to delete the response factory.
1375
1365
if (!response_batch_shm_ptr->is_response_factory_deleted ) {
1376
1366
for (uint32_t r = 0 ; r < request_count; r++) {
1377
1367
TRITONBACKEND_ResponseFactory* response_factory =
1378
1368
reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1379
1369
pb_infer_requests[r]->GetResponseFactoryAddress ());
1380
- std::cerr << " === ResponseFactoryDeleter -> "
1381
- " response_batch_shm_ptr->has_error ==="
1382
- << std::endl;
1383
1370
std::unique_ptr<
1384
1371
TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1385
1372
lresponse_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
@@ -1411,7 +1398,6 @@ ModelInstanceState::ProcessRequests(
1411
1398
// usage of response sender, so only create a TRITONBACKEND_Response
1412
1399
// object for the valid responses, and skip the None responses later.
1413
1400
if (response_shm_handle[i] == 0 ) {
1414
- std::cerr << " === PYBE response_shm_handle is 0 ===" << std::endl;
1415
1401
responses->emplace_back (nullptr );
1416
1402
} else {
1417
1403
TRITONBACKEND_Response* response;
@@ -1434,18 +1420,15 @@ ModelInstanceState::ProcessRequests(
1434
1420
gpu_output_buffers (request_count);
1435
1421
GPUBuffersHelper gpu_buffer_helper;
1436
1422
1437
- std::cerr << " === PYBE request_count: " << request_count << std::endl;
1438
1423
for (uint32_t r = 0 ; r < request_count; ++r) {
1439
1424
NVTX_RANGE (nvtx_, " LoadingResponse " + Name ());
1425
+ requires_deferred_callback.push_back (false );
1440
1426
if (response_shm_handle[r] == 0 ) {
1441
- std::cerr << " === PYBE skip the response_shm_handle is 0 ==="
1442
- << std::endl;
1443
1427
continue ;
1444
1428
}
1445
1429
TRITONBACKEND_Response* response = (*responses)[r];
1446
1430
TRITONBACKEND_Request* request = requests[r];
1447
1431
uint32_t requested_output_count = 0 ;
1448
- requires_deferred_callback.push_back (false );
1449
1432
1450
1433
shm_responses.emplace_back (nullptr );
1451
1434
std::unique_ptr<InferResponse>& infer_response = shm_responses.back ();
@@ -1459,21 +1442,10 @@ ModelInstanceState::ProcessRequests(
1459
1442
(*responses)[r] = nullptr ;
1460
1443
continue ;
1461
1444
}
1462
-
1463
- // if (response_shm_handle[r] == 0) {
1464
- // std::cerr << "=== PYBE response_shm_handle is 0 ===" << std::endl;
1465
- // LOG_IF_ERROR(
1466
- // TRITONBACKEND_ResponseDelete((*responses)[r]),
1467
- // "failed to delete response");
1468
- // (*responses)[r] = nullptr;
1469
- // continue;
1470
- // }
1471
1445
{
1472
1446
TRITONBACKEND_ResponseFactory* response_factory =
1473
1447
reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1474
1448
pb_infer_requests[r]->GetResponseFactoryAddress ());
1475
- std::cerr << " === ResponseFactoryDeleter -> regular workflow ==="
1476
- << std::endl;
1477
1449
std::unique_ptr<
1478
1450
TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1479
1451
lresponse_factory (
@@ -1522,17 +1494,13 @@ ModelInstanceState::ProcessRequests(
1522
1494
GUARDED_RESPOND_IF_ERROR (
1523
1495
responses, r,
1524
1496
TRITONBACKEND_RequestOutputCount (request, &requested_output_count));
1525
- std::cerr << " === PYBE requested_output_count: " << requested_output_count
1526
- << std::endl;
1527
1497
std::set<std::string> requested_output_names;
1528
1498
for (size_t j = 0 ; j < requested_output_count; ++j) {
1529
1499
const char * output_name;
1530
1500
GUARDED_RESPOND_IF_ERROR (
1531
1501
responses, r,
1532
1502
TRITONBACKEND_RequestOutputName (request, j, &output_name));
1533
1503
requested_output_names.insert (output_name);
1534
- std::cerr << " === PYBE requested_output_name: " << output_name
1535
- << std::endl;
1536
1504
}
1537
1505
1538
1506
bool require_deferred_callback = false ;
0 commit comments