@@ -627,25 +627,17 @@ ModelInstanceState::ModelInstanceState(
627
627
#ifdef TRITON_ENABLE_GPU
628
628
if (Kind () == TRITONSERVER_INSTANCEGROUPKIND_MODEL) {
629
629
// Only set the torch device and create a CUDA stream if the model uses GPU.
630
+ for (auto it = device_id_set_.begin (); it != device_id_set_.end (); ++it) {
631
+ cudaStream_t stream;
632
+ THROW_IF_BACKEND_INSTANCE_ERROR (
633
+ CreateCudaStream (*it, 0 /* cuda_stream_priority */ , &stream));
634
+ stream_map_.insert ({*it, stream});
635
+ }
630
636
if (!device_id_set_.empty ()) {
631
- auto it = device_id_set_.begin ();
632
637
// Use the first device to create the default stream.
633
- THROW_IF_BACKEND_INSTANCE_ERROR (
634
- CreateCudaStream (*it, 0 /* cuda_stream_priority */ , &stream_));
638
+ auto it = device_id_set_.begin ();
635
639
device_ = torch::Device (torch::kCUDA , *it);
636
-
637
- // Create a CUDA stream for other devices so that they can be synchronized
638
- // later. Skip the first device since it is used to create the default
639
- // stream.
640
- if (it != device_id_set_.end ()) {
641
- ++it;
642
- }
643
- for (; it != device_id_set_.end (); ++it) {
644
- cudaStream_t stream;
645
- THROW_IF_BACKEND_INSTANCE_ERROR (
646
- CreateCudaStream (*it, 0 /* cuda_stream_priority */ , &stream));
647
- stream_map_.insert ({*it, stream});
648
- }
640
+ stream_ = stream_map_[*it];
649
641
}
650
642
}
651
643
0 commit comments