@@ -109,7 +109,10 @@ class ModelState : public BackendModel {
109
109
bool EnabledCacheCleaning () { return enable_cache_cleaning_; }
110
110
111
111
bool EnabledWeightSharing () { return enable_weight_sharing_; }
112
- const std::map<std::string, std::pair<int64_t , int64_t >>& ModelOutputs () { return model_outputs_; }
112
+ const std::map<std::string, std::pair<int64_t , int64_t >>& ModelOutputs ()
113
+ {
114
+ return model_outputs_;
115
+ }
113
116
114
117
private:
115
118
ModelState (TRITONBACKEND_Model* triton_model);
@@ -538,8 +541,7 @@ class ModelInstanceState : public BackendModelInstance {
538
541
const std::string& control_kind, bool required, bool * have_control);
539
542
void AddInputToMap (
540
543
NamingConvention naming_convention,
541
- const std::vector<std::string> allowed_inputs,
542
- const std::string &io_name,
544
+ const std::vector<std::string> allowed_inputs, const std::string& io_name,
543
545
const uint32_t index);
544
546
TRITONSERVER_Error* ValidateInputs (const size_t expected_input_cnt);
545
547
void AddInputToMap (
@@ -812,7 +814,12 @@ ModelInstanceState::ValidateTypedSequenceControl(
812
814
813
815
return nullptr ; // success
814
816
}
815
- void ModelInstanceState::AddInputToMap (NamingConvention naming_convention, const std::vector<std::string> allowed_inputs, const std::string &io_name, const uint32_t index) {
817
+ void
818
+ ModelInstanceState::AddInputToMap (
819
+ NamingConvention naming_convention,
820
+ const std::vector<std::string> allowed_inputs, const std::string& io_name,
821
+ const uint32_t index)
822
+ {
816
823
std::string deliminator = " __" ;
817
824
818
825
if (is_dict_input_) {
@@ -1006,7 +1013,7 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1006
1013
}
1007
1014
triton::common::TritonJson::Value sequence_batching;
1008
1015
if (model_state_->ModelConfig ().Find (
1009
- " sequence_batching" , &sequence_batching)){
1016
+ " sequence_batching" , &sequence_batching)) {
1010
1017
triton::common::TritonJson::Value states;
1011
1018
if (sequence_batching.Find (" state" , &states)) {
1012
1019
for (size_t i = 0 ; i < states.ArraySize (); i++) {
@@ -1023,8 +1030,8 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1023
1030
if (!pr.first && (state_dtype != " TYPE_STRING" )) {
1024
1031
return TRITONSERVER_ErrorNew (
1025
1032
TRITONSERVER_ERROR_INTERNAL,
1026
- (" unsupported datatype " + state_dtype + " for input state '" + state_name +
1027
- " ' for model '" + model_state_->Name () + " '" )
1033
+ (" unsupported datatype " + state_dtype + " for input state '" +
1034
+ state_name + " ' for model '" + model_state_->Name () + " '" )
1028
1035
.c_str ());
1029
1036
}
1030
1037
@@ -1035,10 +1042,11 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1035
1042
if ((dims.size () + (supports_batching_ ? 1 : 0 )) > 1 ) {
1036
1043
return TRITONSERVER_ErrorNew (
1037
1044
TRITONSERVER_ERROR_INTERNAL,
1038
- (" Triton only supports 1 dimensional List of String as input for "
1045
+ (" Triton only supports 1 dimensional List of String as input "
1046
+ " for "
1039
1047
" '" +
1040
- std::string (state_name) + " ' for model '" + model_state_-> Name () +
1041
- " '" )
1048
+ std::string (state_name) + " ' for model '" +
1049
+ model_state_-> Name () + " '" )
1042
1050
.c_str ());
1043
1051
}
1044
1052
}
@@ -1162,8 +1170,8 @@ ModelInstanceState::ValidateOutputs()
1162
1170
if (!pr.first && (state_dtype != " TYPE_STRING" )) {
1163
1171
return TRITONSERVER_ErrorNew (
1164
1172
TRITONSERVER_ERROR_INTERNAL,
1165
- (" unsupported datatype " + state_dtype + " for state '" + state_name +
1166
- " ' for model '" + model_state_->Name () + " '" )
1173
+ (" unsupported datatype " + state_dtype + " for state '" +
1174
+ state_name + " ' for model '" + model_state_->Name () + " '" )
1167
1175
.c_str ());
1168
1176
}
1169
1177
@@ -1172,10 +1180,11 @@ ModelInstanceState::ValidateOutputs()
1172
1180
if ((dims.size () + (supports_batching_ ? 1 : 0 )) > 1 ) {
1173
1181
return TRITONSERVER_ErrorNew (
1174
1182
TRITONSERVER_ERROR_INTERNAL,
1175
- (" Triton only supports 1 dimensional List of String as output for "
1183
+ (" Triton only supports 1 dimensional List of String as output "
1184
+ " for "
1176
1185
" '" +
1177
- std::string (state_name) + " ' for model '" + model_state_-> Name () +
1178
- " '" )
1186
+ std::string (state_name) + " ' for model '" +
1187
+ model_state_-> Name () + " '" )
1179
1188
.c_str ());
1180
1189
}
1181
1190
}
@@ -1678,7 +1687,8 @@ ModelInstanceState::GetNamingConvention(
1678
1687
}
1679
1688
1680
1689
triton::common::TritonJson::Value sequence_batching;
1681
- if (model_state_->ModelConfig ().Find (" sequence_batching" , &sequence_batching)) {
1690
+ if (model_state_->ModelConfig ().Find (
1691
+ " sequence_batching" , &sequence_batching)) {
1682
1692
// If we need to manage state for the model, then we need to check
1683
1693
// the naming of the state adheres to both the input and output conventions
1684
1694
triton::common::TritonJson::Value states;
@@ -1696,16 +1706,17 @@ ModelInstanceState::GetNamingConvention(
1696
1706
for (size_t i = 0 ; i < states.ArraySize (); i++) {
1697
1707
triton::common::TritonJson::Value state;
1698
1708
RETURN_IF_ERROR (states.IndexAsObject (i, &state));
1699
- std::string name_entry = io_kind == " input" ? " input_name" : " output_name" ;
1709
+ std::string name_entry =
1710
+ io_kind == " input" ? " input_name" : " output_name" ;
1700
1711
std::string state_name;
1701
- RETURN_IF_ERROR (
1702
- state.MemberAsString (name_entry.c_str (), &state_name));
1712
+ RETURN_IF_ERROR (state.MemberAsString (name_entry.c_str (), &state_name));
1703
1713
int start_pos = state_name.find (deliminator);
1704
1714
if (start_pos == -1 ) {
1705
1715
return TRITONSERVER_ErrorNew (
1706
1716
TRITONSERVER_ERROR_INVALID_ARG,
1707
1717
(" PyTorch model '" + model_state_->Name () +
1708
- " ' is using sequence batching with state but state '" + state_name +
1718
+ " ' is using sequence batching with state but state '" +
1719
+ state_name +
1709
1720
" ' does not follow the <name>__<index> naming convention. " )
1710
1721
.c_str ());
1711
1722
} else {
@@ -1721,7 +1732,8 @@ ModelInstanceState::GetNamingConvention(
1721
1732
return TRITONSERVER_ErrorNew (
1722
1733
TRITONSERVER_ERROR_INVALID_ARG,
1723
1734
(" PyTorch model '" + model_state_->Name () +
1724
- " ' is using sequence batching with state but state '" + state_name +
1735
+ " ' is using sequence batching with state but state '" +
1736
+ state_name +
1725
1737
" ' does not follow the <name>__<index> naming convention. " )
1726
1738
.c_str ());
1727
1739
}
@@ -1912,8 +1924,9 @@ SetStringInputTensor(
1912
1924
bool
1913
1925
SetStringBuffer (
1914
1926
torch::List<torch::jit::IValue>* tensor, TRITONBACKEND_Response** response,
1915
- TRITONBACKEND_Output* response_output, TRITONBACKEND_State* response_state, const size_t tensor_element_count,
1916
- cudaStream_t stream, std::string* serialized, bool state)
1927
+ TRITONBACKEND_Output* response_output, TRITONBACKEND_State* response_state,
1928
+ const size_t tensor_element_count, cudaStream_t stream,
1929
+ std::string* serialized, bool state)
1917
1930
{
1918
1931
bool cuda_copy = false ;
1919
1932
@@ -1938,7 +1951,7 @@ SetStringBuffer(
1938
1951
TRITONSERVER_Error* err;
1939
1952
void * buffer;
1940
1953
1941
- if (!state){
1954
+ if (!state) {
1942
1955
auto err = TRITONBACKEND_OutputBuffer (
1943
1956
response_output, &buffer, serialized->size (), &actual_memory_type,
1944
1957
&actual_memory_type_id);
@@ -1984,19 +1997,20 @@ SetStringOutputBuffer(
1984
1997
TRITONBACKEND_Output* response_output, const size_t tensor_element_count,
1985
1998
cudaStream_t stream, std::string* serialized)
1986
1999
{
1987
- return SetStringBuffer (tensor, response, response_output, nullptr /* response_state */ , tensor_element_count,
1988
- stream, serialized, false /* state */ );
1989
-
2000
+ return SetStringBuffer (
2001
+ tensor, response, response_output, nullptr /* response_state */ ,
2002
+ tensor_element_count, stream, serialized, false /* state */ );
1990
2003
}
1991
2004
1992
2005
bool
1993
2006
SetStringStateBuffer (
1994
- torch::List<torch::jit::IValue>* tensor, TRITONBACKEND_Response** response,
2007
+ torch::List<torch::jit::IValue>* tensor, TRITONBACKEND_Response** response,
1995
2008
TRITONBACKEND_State* response_state, const size_t tensor_element_count,
1996
2009
cudaStream_t stream, std::string* serialized)
1997
2010
{
1998
- return SetStringBuffer (tensor, response, nullptr /* response_output */ , response_state, tensor_element_count,
1999
- stream, serialized, true /* state */ );
2011
+ return SetStringBuffer (
2012
+ tensor, response, nullptr /* response_output */ , response_state,
2013
+ tensor_element_count, stream, serialized, true /* state */ );
2000
2014
}
2001
2015
2002
2016
@@ -2063,8 +2077,8 @@ ModelInstanceState::SetInputTensors(
2063
2077
// The input must be in contiguous CPU/GPU memory.
2064
2078
std::vector<std::pair<TRITONSERVER_MemoryType, int64_t >> alloc_perference;
2065
2079
if (device_.is_cpu ()) {
2066
- alloc_perference = {{TRITONSERVER_MEMORY_CPU_PINNED, 0 },
2067
- {TRITONSERVER_MEMORY_CPU, 0 }};
2080
+ alloc_perference = {
2081
+ {TRITONSERVER_MEMORY_CPU_PINNED, 0 }, {TRITONSERVER_MEMORY_CPU, 0 }};
2068
2082
} else {
2069
2083
alloc_perference = {{TRITONSERVER_MEMORY_GPU, device_.index ()}};
2070
2084
}
@@ -2176,7 +2190,7 @@ ModelInstanceState::ReadOutputTensors(
2176
2190
bool cuda_copy = false ;
2177
2191
// The serialized string buffer must be valid until output copies are done
2178
2192
std::vector<std::unique_ptr<std::string>> string_buffer;
2179
- for (auto & output : model_state_->ModelOutputs ()) {
2193
+ for (auto & output : model_state_->ModelOutputs ()) {
2180
2194
int op_index = output_index_map_[output.first ];
2181
2195
auto name = output.first ;
2182
2196
auto output_tensor_pair = output.second ;
@@ -2239,14 +2253,14 @@ ModelInstanceState::ReadOutputTensors(
2239
2253
}
2240
2254
if (output_tensor_pair.first != -1 ) {
2241
2255
responder.ProcessTensor (
2242
- name, output_dtype, batchn_shape, output_buffer,
2243
- memory_type, memory_id);
2256
+ name, output_dtype, batchn_shape, output_buffer, memory_type,
2257
+ memory_id);
2244
2258
}
2245
2259
if (output_tensor_pair.second != -1 ) {
2246
2260
std::vector<TRITONBACKEND_State*> states;
2247
2261
states = responder.ProcessStateTensor (
2248
- name, output_dtype, batchn_shape, output_buffer,
2249
- memory_type, memory_id);
2262
+ name, output_dtype, batchn_shape, output_buffer, memory_type ,
2263
+ memory_id);
2250
2264
// Update the states
2251
2265
for (auto & state : states) {
2252
2266
RETURN_IF_ERROR (TRITONBACKEND_StateUpdate (state));
@@ -2297,9 +2311,11 @@ ModelInstanceState::ReadOutputTensors(
2297
2311
}
2298
2312
if (output_tensor_pair.second != -1 ) {
2299
2313
TRITONBACKEND_State* response_state;
2300
- RESPOND_AND_SET_NULL_IF_ERROR (&response, TRITONBACKEND_StateNew (
2301
- &response_state, request, name.c_str (), TRITONSERVER_TYPE_BYTES,
2302
- batchn_shape.data (), batchn_shape.size ()));
2314
+ RESPOND_AND_SET_NULL_IF_ERROR (
2315
+ &response, TRITONBACKEND_StateNew (
2316
+ &response_state, request, name.c_str (),
2317
+ TRITONSERVER_TYPE_BYTES, batchn_shape.data (),
2318
+ batchn_shape.size ()));
2303
2319
2304
2320
string_buffer.emplace_back (new std::string ());
2305
2321
cuda_copy |= SetStringStateBuffer (
0 commit comments