Skip to content

Commit ea76722

Browse files
gedoensmaxkrishung5
authored andcommitted
dynamic CUDA and TRT options updating
1 parent e2061b7 commit ea76722

File tree

1 file changed

+63
-35
lines changed

1 file changed

+63
-35
lines changed

src/onnxruntime.cc

Lines changed: 63 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -673,13 +673,8 @@ ModelState::LoadModel(
673673
key = "trt_ep_context_embed_mode";
674674
value = value_string;
675675
} else {
676-
return TRITONSERVER_ErrorNew(
677-
TRITONSERVER_ERROR_INVALID_ARG,
678-
std::string(
679-
"unknown parameter '" + param_key +
680-
"' is provided for TensorRT Execution "
681-
"Accelerator")
682-
.c_str());
676+
key = param_key;
677+
params.MemberAsString(param_key.c_str(), &value);
683678
}
684679
if (!key.empty() && !value.empty()) {
685680
keys.push_back(key);
@@ -692,9 +687,25 @@ ModelState::LoadModel(
692687
c_keys.push_back(keys[i].c_str());
693688
c_values.push_back(values[i].c_str());
694689
}
695-
RETURN_IF_ORT_ERROR(ort_api->UpdateTensorRTProviderOptions(
690+
auto status = ort_api->UpdateTensorRTProviderOptions(
696691
rel_trt_options.get(), c_keys.data(), c_values.data(),
697-
keys.size()));
692+
keys.size());
693+
if (status != nullptr) {
694+
OrtAllocator* allocator;
695+
char* options;
696+
RETURN_IF_ORT_ERROR(
697+
ort_api->GetAllocatorWithDefaultOptions(&allocator));
698+
RETURN_IF_ORT_ERROR(
699+
ort_api->GetTensorRTProviderOptionsAsString(
700+
rel_trt_options.get(), allocator, &options));
701+
return TRITONSERVER_ErrorNew(
702+
TRITONSERVER_ERROR_INVALID_ARG,
703+
(std::string("unknown parameters in config following "
704+
"options are supported for TensorRT "
705+
"Execution Provider: ") +
706+
std::string(options))
707+
.c_str());
708+
}
698709
}
699710
}
700711

@@ -724,39 +735,56 @@ ModelState::LoadModel(
724735
// Default GPU execution provider.
725736
// Using default values for everything other than device id and cuda
726737
// stream
727-
OrtCUDAProviderOptions cuda_options;
728-
cuda_options.device_id = instance_group_device_id;
729-
cuda_options.has_user_compute_stream = stream != nullptr ? 1 : 0;
730-
cuda_options.user_compute_stream =
731-
stream != nullptr ? (void*)stream : nullptr,
732-
cuda_options.default_memory_arena_cfg = nullptr;
733-
738+
OrtCUDAProviderOptionsV2* cuda_options;
739+
RETURN_IF_ORT_ERROR(ort_api->CreateCUDAProviderOptions(&cuda_options));
740+
std::unique_ptr<
741+
OrtCUDAProviderOptionsV2, decltype(ort_api->ReleaseCUDAProviderOptions)>
742+
rel_cuda_options(cuda_options, ort_api->ReleaseCUDAProviderOptions);
743+
std::map<std::string, std::string> options;
744+
options["device_id"] = std::to_string(instance_group_device_id);
734745
{
735746
// Parse CUDA EP configurations
736747
triton::common::TritonJson::Value params;
737748
if (model_config_.Find("parameters", &params)) {
738-
int cudnn_conv_algo_search = 0;
739-
RETURN_IF_ERROR(TryParseModelStringParameter(
740-
params, "cudnn_conv_algo_search", &cudnn_conv_algo_search, 0));
741-
cuda_options.cudnn_conv_algo_search =
742-
static_cast<OrtCudnnConvAlgoSearch>(cudnn_conv_algo_search);
743-
744-
RETURN_IF_ERROR(TryParseModelStringParameter(
745-
params, "gpu_mem_limit", &cuda_options.gpu_mem_limit,
746-
std::numeric_limits<size_t>::max()));
747-
748-
RETURN_IF_ERROR(TryParseModelStringParameter(
749-
params, "arena_extend_strategy",
750-
&cuda_options.arena_extend_strategy, 0));
751-
752-
RETURN_IF_ERROR(TryParseModelStringParameter(
753-
params, "do_copy_in_default_stream",
754-
&cuda_options.do_copy_in_default_stream, true));
749+
std::vector<std::string> members;
750+
RETURN_IF_ERROR(params.Members(&members));
751+
for (auto& m : members) {
752+
const auto [it_value, success] = options.insert({m, ""});
753+
if (success) {
754+
params.MemberAsString(m.c_str(), &it_value->second);
755+
}
756+
}
755757
}
756758
}
757759

758-
RETURN_IF_ORT_ERROR(ort_api->SessionOptionsAppendExecutionProvider_CUDA(
759-
soptions, &cuda_options));
760+
std::vector<const char*> option_names, option_values;
761+
for (const auto& [key, value] : options) {
762+
option_names.push_back(key.c_str());
763+
option_values.push_back(value.c_str());
764+
}
765+
auto status = ort_api->UpdateCUDAProviderOptions(
766+
rel_cuda_options.get(), option_names.data(), option_values.data(),
767+
option_values.size());
768+
if (status != nullptr) {
769+
OrtAllocator* allocator;
770+
char* options;
771+
RETURN_IF_ORT_ERROR(ort_api->GetAllocatorWithDefaultOptions(&allocator));
772+
RETURN_IF_ORT_ERROR(ort_api->GetCUDAProviderOptionsAsString(
773+
rel_cuda_options.get(), allocator, &options));
774+
return TRITONSERVER_ErrorNew(
775+
TRITONSERVER_ERROR_INVALID_ARG,
776+
(std::string("unknown parameters in config following options are "
777+
"supported for CUDA Execution Provider: ") +
778+
std::string(options))
779+
.c_str());
780+
}
781+
782+
if (stream != nullptr) {
783+
RETURN_IF_ORT_ERROR(ort_api->UpdateCUDAProviderOptionsWithValue(
784+
rel_cuda_options.get(), "user_compute_stream", stream));
785+
}
786+
RETURN_IF_ORT_ERROR(ort_api->SessionOptionsAppendExecutionProvider_CUDA_V2(
787+
soptions, cuda_options));
760788
LOG_MESSAGE(
761789
TRITONSERVER_LOG_VERBOSE,
762790
(std::string("CUDA Execution Accelerator is set for '") + Name() +

0 commit comments

Comments
 (0)