triton-inference-server · tanmayv25 · Apr 18, 2024 · Feb 19, 2024 · Apr 18, 2024
diff --git a/src/libtorch.cc b/src/libtorch.cc
@@ -56,6 +56,12 @@
 #include <cuda_runtime_api.h>
 #endif  // TRITON_ENABLE_GPU
 
+// for thread control
+// https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api
+// https://github.com/pytorch/pytorch/blob/v2.2.1-rc3/aten/src/ATen/Parallel.h#L133
+#include <ATen/Parallel.h>
+
+
 //
 // PyTorch C++ (LibTorch) Backend that implements the TRITONBACKEND API.
 //
@@ -465,6 +471,54 @@ ModelState::ParseParameters()
            " for model instance '" + Name() + "'")
               .c_str());
     }
+
+    // If "INTRA_OP_THREAD_COUNT" is not present in 'parameters' then no update
+    // is made to 'intra_op_thread_count', which by default will take all
+    // threads
+    int intra_op_thread_count = -1;
+    err = ParseParameterInt(
+        params, "INTRA_OP_THREAD_COUNT", &intra_op_thread_count);
+    if (err != nullptr) {
+      if (TRITONSERVER_ErrorCode(err) != TRITONSERVER_ERROR_NOT_FOUND) {
+        return err;
+      } else {
+        TRITONSERVER_ErrorDelete(err);
+      }
+    } else {
+      if (intra_op_thread_count > 0) {
+        at::set_num_threads(intra_op_thread_count);
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_INFO,
+            (std::string("Intra op thread count is set to ") +
+             std::to_string(intra_op_thread_count) + " for model instance '" +
+             Name() + "'")
+                .c_str());
+      }
+    }
+
+    // If "INTER_OP_THREAD_COUNT" is not present in 'parameters' then no update
+    // is made to 'inter_op_thread_count', which by default will take all
+    // threads
+    int inter_op_thread_count = -1;
+    err = ParseParameterInt(
+        params, "INTER_OP_THREAD_COUNT", &inter_op_thread_count);
+    if (err != nullptr) {
+      if (TRITONSERVER_ErrorCode(err) != TRITONSERVER_ERROR_NOT_FOUND) {
+        return err;
+      } else {
+        TRITONSERVER_ErrorDelete(err);
+      }
+    } else {
+      if (inter_op_thread_count > 0) {
+        at::set_num_interop_threads(inter_op_thread_count);
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_INFO,
+            (std::string("Inter op thread count is set to ") +
+             std::to_string(inter_op_thread_count) + " for model instance '" +
+             Name() + "'")
+                .c_str());
+      }
+    }
   }
 
   return nullptr;

diff --git a/src/libtorch_utils.cc b/src/libtorch_utils.cc
@@ -149,6 +149,19 @@ ParseParameter(
   return nullptr;
 }
 
+TRITONSERVER_Error*
+ParseParameterInt(
+    triton::common::TritonJson::Value& params, const std::string& mkey,
+    int* value)
+{
+  std::string value_str;
+  RETURN_IF_ERROR(GetParameterValue(params, mkey, &value_str));
+  RETURN_IF_ERROR(ParseIntValue(value_str, value));
+
+  return nullptr;
+}
+
+
 #ifdef TRITON_ENABLE_GPU
 TRITONSERVER_Error*
 ConvertCUDAStatusToTritonError(

diff --git a/src/libtorch_utils.h b/src/libtorch_utils.h
@@ -62,4 +62,11 @@ TRITONSERVER_Error* ParseParameter(
     triton::common::TritonJson::Value& params, const std::string& mkey,
     bool* value);
 
+// If the key 'mkey' is present in 'params' then update 'value' with the
+// value associated with that key. If 'mkey' is not present in 'params' then
+// 'value' is set to 'default_value'.
+TRITONSERVER_Error* ParseParameterInt(
+    triton::common::TritonJson::Value& params, const std::string& mkey,
+    int* value);
+
 }}}  // namespace triton::backend::pytorch