35
35
36
36
namespace triton { namespace backend { namespace python {
37
37
38
+ void
39
+ AssertResponseSenderArgumentsWellFormed (
40
+ const std::shared_ptr<InferResponse>& response, const uint32_t flags)
41
+ {
42
+ // Check the correctness of the provided flags.
43
+ if (flags != TRITONSERVER_RESPONSE_COMPLETE_FINAL && flags != 0 ) {
44
+ throw PythonBackendException (
45
+ " Unable to send response. Unsupported flag provided." );
46
+ }
47
+
48
+ if (flags == 0 && response == nullptr ) {
49
+ throw PythonBackendException (
50
+ " Inference Response object must be provided when the response flags is "
51
+ " set to zero." );
52
+ }
53
+ }
54
+
38
55
ResponseSender::ResponseSender (
39
56
intptr_t request_address, intptr_t response_factory_address,
40
- std::unique_ptr<SharedMemoryManager>& shm_pool,
57
+ bool const * is_decoupled, std::unique_ptr<SharedMemoryManager>& shm_pool,
41
58
const std::shared_ptr<PbCancel>& pb_cancel)
42
59
: request_address_(request_address),
43
- response_factory_address_ (response_factory_address), shm_pool_(shm_pool),
44
- closed_(false ), pb_cancel_(pb_cancel)
60
+ response_factory_address_ (response_factory_address),
61
+ is_decoupled_(is_decoupled), shm_pool_(shm_pool), pb_cancel_(pb_cancel),
62
+ closed_(false ), number_of_response_sent_(0 )
45
63
{
46
64
}
47
65
@@ -54,15 +72,32 @@ ResponseSender::~ResponseSender()
54
72
}
55
73
56
74
void
57
- ResponseSender::Send (
58
- std::shared_ptr<InferResponse> infer_response , const uint32_t flags)
75
+ ResponseSender::UpdateStateAndCounters (
76
+ const std::shared_ptr<InferResponse>& response , const uint32_t flags)
59
77
{
60
- // Release the GIL. This avoids a potential deadlock situation in the parent
61
- // process, where every thread in the thread pool is indirectly waiting for a
62
- // function in the stub process that acquires the GIL. Meanwhile, the current
63
- // thread, which holds the GIL, is also waiting for the parent side to have
64
- // the next available thread to pick up the job during resource contention.
65
- py::gil_scoped_release release;
78
+ if (is_decoupled_ == nullptr ) {
79
+ // TODO: Can a model access the response sender on a BLS infer request?
80
+ throw PythonBackendException (
81
+ " Unable to send response. Response sender has no reference to the "
82
+ " decoupled state of the model." );
83
+ }
84
+ bool is_decoupled = *is_decoupled_;
85
+
86
+ std::lock_guard<std::mutex> lk (mu_);
87
+
88
+ if (!is_decoupled) {
89
+ if (response != nullptr && number_of_response_sent_ > 0 ) {
90
+ throw PythonBackendException (
91
+ " Unable to send response. Non-decoupled model cannot send more than "
92
+ " one response." );
93
+ }
94
+ if (response == nullptr && flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL &&
95
+ number_of_response_sent_ == 0 ) {
96
+ throw PythonBackendException (
97
+ " Unable to send response. Non-decoupled model cannot send complete "
98
+ " final before sending a response." );
99
+ }
100
+ }
66
101
67
102
if (closed_) {
68
103
throw PythonBackendException (
@@ -72,18 +107,22 @@ ResponseSender::Send(
72
107
if (flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
73
108
closed_ = true ;
74
109
}
110
+ number_of_response_sent_++;
111
+ }
75
112
76
- // Check the correctness of the provided flags.
77
- if (flags != TRITONSERVER_RESPONSE_COMPLETE_FINAL && flags != 0 ) {
78
- throw PythonBackendException (
79
- " Unable to send response. Unsupported flag provided." );
80
- }
113
+ void
114
+ ResponseSender::Send (
115
+ std::shared_ptr<InferResponse> infer_response, const uint32_t flags)
116
+ {
117
+ // Release the GIL. This avoids a potential deadlock situation in the parent
118
+ // process, where every thread in the thread pool is indirectly waiting for a
119
+ // function in the stub process that acquires the GIL. Meanwhile, the current
120
+ // thread, which holds the GIL, is also waiting for the parent side to have
121
+ // the next available thread to pick up the job during resource contention.
122
+ py::gil_scoped_release release;
81
123
82
- if (flags == 0 && infer_response == nullptr ) {
83
- throw PythonBackendException (
84
- " Inference Response object must be provided when the response flags is "
85
- " set to zero." );
86
- }
124
+ AssertResponseSenderArgumentsWellFormed (infer_response, flags);
125
+ UpdateStateAndCounters (infer_response, flags);
87
126
88
127
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance ();
89
128
0 commit comments