@@ -673,13 +673,8 @@ ModelState::LoadModel(
673
673
key = " trt_ep_context_embed_mode" ;
674
674
value = value_string;
675
675
} else {
676
- return TRITONSERVER_ErrorNew (
677
- TRITONSERVER_ERROR_INVALID_ARG,
678
- std::string (
679
- " unknown parameter '" + param_key +
680
- " ' is provided for TensorRT Execution "
681
- " Accelerator" )
682
- .c_str ());
676
+ key = param_key;
677
+ params.MemberAsString (param_key.c_str (), &value);
683
678
}
684
679
if (!key.empty () && !value.empty ()) {
685
680
keys.push_back (key);
@@ -692,9 +687,25 @@ ModelState::LoadModel(
692
687
c_keys.push_back (keys[i].c_str ());
693
688
c_values.push_back (values[i].c_str ());
694
689
}
695
- RETURN_IF_ORT_ERROR ( ort_api->UpdateTensorRTProviderOptions (
690
+ auto status = ort_api->UpdateTensorRTProviderOptions (
696
691
rel_trt_options.get (), c_keys.data (), c_values.data (),
697
- keys.size ()));
692
+ keys.size ());
693
+ if (status != nullptr ) {
694
+ OrtAllocator* allocator;
695
+ char * options;
696
+ RETURN_IF_ORT_ERROR (
697
+ ort_api->GetAllocatorWithDefaultOptions (&allocator));
698
+ RETURN_IF_ORT_ERROR (
699
+ ort_api->GetTensorRTProviderOptionsAsString (
700
+ rel_trt_options.get (), allocator, &options));
701
+ return TRITONSERVER_ErrorNew (
702
+ TRITONSERVER_ERROR_INVALID_ARG,
703
+ (std::string (" unknown parameters in config following "
704
+ " options are supported for TensorRT "
705
+ " Execution Provider: " ) +
706
+ std::string (options))
707
+ .c_str ());
708
+ }
698
709
}
699
710
}
700
711
@@ -724,39 +735,56 @@ ModelState::LoadModel(
724
735
// Default GPU execution provider.
725
736
// Using default values for everything other than device id and cuda
726
737
// stream
727
- OrtCUDAProviderOptions cuda_options;
728
- cuda_options. device_id = instance_group_device_id ;
729
- cuda_options. has_user_compute_stream = stream != nullptr ? 1 : 0 ;
730
- cuda_options. user_compute_stream =
731
- stream != nullptr ? ( void *)stream : nullptr ,
732
- cuda_options. default_memory_arena_cfg = nullptr ;
733
-
738
+ OrtCUDAProviderOptionsV2* cuda_options;
739
+ RETURN_IF_ORT_ERROR (ort_api-> CreateCUDAProviderOptions (& cuda_options)) ;
740
+ std::unique_ptr<
741
+ OrtCUDAProviderOptionsV2, decltype (ort_api-> ReleaseCUDAProviderOptions )>
742
+ rel_cuda_options (cuda_options, ort_api-> ReleaseCUDAProviderOptions );
743
+ std::map<std::string, std::string> options ;
744
+ options[ " device_id " ] = std::to_string (instance_group_device_id);
734
745
{
735
746
// Parse CUDA EP configurations
736
747
triton::common::TritonJson::Value params;
737
748
if (model_config_.Find (" parameters" , ¶ms)) {
738
- int cudnn_conv_algo_search = 0 ;
739
- RETURN_IF_ERROR (TryParseModelStringParameter (
740
- params, " cudnn_conv_algo_search" , &cudnn_conv_algo_search, 0 ));
741
- cuda_options.cudnn_conv_algo_search =
742
- static_cast <OrtCudnnConvAlgoSearch>(cudnn_conv_algo_search);
743
-
744
- RETURN_IF_ERROR (TryParseModelStringParameter (
745
- params, " gpu_mem_limit" , &cuda_options.gpu_mem_limit ,
746
- std::numeric_limits<size_t >::max ()));
747
-
748
- RETURN_IF_ERROR (TryParseModelStringParameter (
749
- params, " arena_extend_strategy" ,
750
- &cuda_options.arena_extend_strategy , 0 ));
751
-
752
- RETURN_IF_ERROR (TryParseModelStringParameter (
753
- params, " do_copy_in_default_stream" ,
754
- &cuda_options.do_copy_in_default_stream , true ));
749
+ std::vector<std::string> members;
750
+ RETURN_IF_ERROR (params.Members (&members));
751
+ for (auto & m : members) {
752
+ const auto [it_value, success] = options.insert ({m, " " });
753
+ if (success) {
754
+ params.MemberAsString (m.c_str (), &it_value->second );
755
+ }
756
+ }
755
757
}
756
758
}
757
759
758
- RETURN_IF_ORT_ERROR (ort_api->SessionOptionsAppendExecutionProvider_CUDA (
759
- soptions, &cuda_options));
760
+ std::vector<const char *> option_names, option_values;
761
+ for (const auto & [key, value] : options) {
762
+ option_names.push_back (key.c_str ());
763
+ option_values.push_back (value.c_str ());
764
+ }
765
+ auto status = ort_api->UpdateCUDAProviderOptions (
766
+ rel_cuda_options.get (), option_names.data (), option_values.data (),
767
+ option_values.size ());
768
+ if (status != nullptr ) {
769
+ OrtAllocator* allocator;
770
+ char * options;
771
+ RETURN_IF_ORT_ERROR (ort_api->GetAllocatorWithDefaultOptions (&allocator));
772
+ RETURN_IF_ORT_ERROR (ort_api->GetCUDAProviderOptionsAsString (
773
+ rel_cuda_options.get (), allocator, &options));
774
+ return TRITONSERVER_ErrorNew (
775
+ TRITONSERVER_ERROR_INVALID_ARG,
776
+ (std::string (" unknown parameters in config following options are "
777
+ " supported for CUDA Execution Provider: " ) +
778
+ std::string (options))
779
+ .c_str ());
780
+ }
781
+
782
+ if (stream != nullptr ) {
783
+ RETURN_IF_ORT_ERROR (ort_api->UpdateCUDAProviderOptionsWithValue (
784
+ rel_cuda_options.get (), " user_compute_stream" , stream));
785
+ }
786
+ RETURN_IF_ORT_ERROR (ort_api->SessionOptionsAppendExecutionProvider_CUDA_V2 (
787
+ soptions, cuda_options));
760
788
LOG_MESSAGE (
761
789
TRITONSERVER_LOG_VERBOSE,
762
790
(std::string (" CUDA Execution Accelerator is set for '" ) + Name () +
0 commit comments