pytorch
diff --git a/‎core/BUILD
Lines changed: 1 addition & 0 deletions b/‎core/BUILD
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/compiler.cpp
Lines changed: 124 additions & 26 deletions b/‎core/compiler.cpp
Lines changed: 124 additions & 26 deletions
diff --git a/‎core/compiler.h
Lines changed: 4 additions & 1 deletion b/‎core/compiler.h
Lines changed: 4 additions & 1 deletion
diff --git a/‎core/conversion/BUILD
Lines changed: 1 addition & 0 deletions b/‎core/conversion/BUILD
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/conversion/InterfaceTypes.cpp
Lines changed: 0 additions & 49 deletions b/‎core/conversion/InterfaceTypes.cpp
Lines changed: 0 additions & 49 deletions
diff --git a/‎core/conversion/conversion.cpp
Lines changed: 4 additions & 1 deletion b/‎core/conversion/conversion.cpp
Lines changed: 4 additions & 1 deletion
diff --git a/‎core/conversion/conversion.h
Lines changed: 3 additions & 13 deletions b/‎core/conversion/conversion.h
Lines changed: 3 additions & 13 deletions
diff --git a/‎core/conversion/evaluators/aten.cpp
Lines changed: 1 addition & 1 deletion b/‎core/conversion/evaluators/aten.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/ir/BUILD
Lines changed: 35 additions & 0 deletions b/‎core/ir/BUILD
Lines changed: 35 additions & 0 deletions
@@ -26,6 +26,7 @@ cc_library(
         "//core/conversion",
         "//core/runtime",
         "//core/lowering",
+        "//core/partitioning",
         "//core/util/logging",
         "@tensorrt//:nvinfer",
     ] + select({
 
@@ -12,46 +12,33 @@
 #include "torch/csrc/jit/frontend/function_schema_parser.h"
 #include "torch/csrc/jit/ir/ir.h"
 #include "torch/csrc/jit/passes/graph_fuser.h"
+#include "torch/csrc/jit/passes/loop_unrolling.h"
 #include "torch/csrc/jit/passes/lower_graph.h"
 #include "torch/csrc/jit/passes/pass_manager.h"
 #include "torch/custom_class.h"
 
 #include "core/compiler.h"
-#include "core/util/prelude.h"
 
 #include "core/conversion/conversion.h"
 #include "core/lowering/lowering.h"
+#include "core/partitioning/partitioning.h"
 #include "core/runtime/runtime.h"
 
 namespace trtorch {
 namespace core {
 
-c10::FunctionSchema GenerateGraphSchema(
-    torch::jit::script::Module mod,
-    std::string method_name,
-    std::shared_ptr<torch::jit::Graph>& g) {
-  std::vector<c10::Argument> args;
-  for (auto in : g->inputs()) {
-    args.push_back(c10::Argument(in->debugName(), in->type()));
-  }
-
-  std::vector<c10::Argument> returns;
-  for (auto out : g->outputs()) {
-    returns.push_back(c10::Argument(out->debugName(), out->type()));
-  }
-
-  return c10::FunctionSchema(method_name, method_name, args, returns);
-}
-
 void AddEngineToGraph(
     torch::jit::script::Module mod,
     std::shared_ptr<torch::jit::Graph>& g,
-    const std::string& serialized_engine) {
-  auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name(), serialized_engine);
+    const std::string& serialized_engine,
+    std::string engine_id = "",
+    bool fallback = false) {
+  auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name() + engine_id, serialized_engine);
   // Get required metadata about the engine out
   auto num_io = engine_ptr->num_io;
   auto name = engine_ptr->name;
 
+  //..
   // Add the engine as an attribute of the module, this will let the engine be
   // serialized and deserialized
   mod.register_attribute(
@@ -108,17 +95,19 @@ void AddEngineToGraph(
   g->block()->appendNode(unpack_node);
 
   // If there are multiple output tensors from TensorRT we wrap them in a tuple
-  // to return
-  if (unpack_node->outputs().size() > 1) {
+  // to return, convert to tuple only when we only have 1 segmented graph
+  if (!fallback && unpack_node->outputs().size() > 1) {
     // Creates prim::TupleConstruct(<output tensors>) using outputs of the
     // unpack node
     auto return_tuple_node = g->createTuple(unpack_node->outputs());
     g->block()->appendNode(return_tuple_node);
     // Set the output as the produced tuple
     g->registerOutput(return_tuple_node->outputs()[0]);
   } else {
-    // Set the output as the sole output tensor
-    g->registerOutput(unpack_node->outputs()[0]);
+    // if fallback is enabled, multiple outputs will be registered
+    for (size_t i = 0; i < unpack_node->outputs().size(); ++i) {
+      g->registerOutput(unpack_node->outputs()[i]);
+    }
   }
 
   LOG_DEBUG(*g << "(AddEngineToGraph)\n");
@@ -142,6 +131,7 @@ std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::
 
   auto convert_cfg = std::move(cfg.convert_info);
   auto g = graph_and_parameters.first;
+
   auto params = graph_and_parameters.second;
   auto named_params = conversion::get_named_params(g->inputs(), params);
 
@@ -151,7 +141,115 @@ std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::
   return std::move(engine);
 }
 
+void AddSegmentedBlockToGraph(
+    std::shared_ptr<torch::jit::Graph>& g,
+    partitioning::SegmentedBlock& seg,
+    std::unordered_map<torch::jit::Value*, torch::jit::Value*>& old_to_new_g) {
+  // old_to_new_g contains: original global graph value => new global graph value,
+  // mini_to_new_g: mini graph value -> new graph value
+  std::unordered_map<torch::jit::Value*, torch::jit::Value*> mini_to_new_g;
+  size_t input_idx = 0;
+  if (seg.target() == partitioning::SegmentedBlock::kTensorRT && g->inputs().size() > 0) {
+    if (g->inputs()[0]->type()->str().find("__torch__") == std::string::npos) {
+      auto self = g->insertInput(0, "self_1");
+      self->setType(seg.inputs()[0]->type());
+    }
+    mini_to_new_g[seg.inputs()[input_idx++]] = g->inputs()[0];
+  }
+
+  for (auto& raw_input : seg.raw_inputs()) {
+    if (old_to_new_g.count(raw_input)) {
+      mini_to_new_g[seg.inputs()[input_idx++]] = old_to_new_g[raw_input];
+    }
+  }
+
+  for (const auto n : seg.nodes()) {
+    util::cloneNode(n, g, mini_to_new_g);
+  }
+
+  // original graph value => new global graph value
+  for (size_t i = 0; i < seg.raw_outputs().size(); ++i) {
+    old_to_new_g[seg.raw_outputs()[i]] = mini_to_new_g[seg.outputs()[i]];
+  }
+
+  return;
+}
+
+torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Module& mod, CompileSpec cfg) {
+  // TODO: Should be doing a functional transform but need PR #31978
+  // [jit] More robust mangling
+  // torch::jit::script::Module new_mod = mod.clone();
+  torch::jit::script::Module new_mod(mod._ivalue()->name() + "_trt");
+  std::vector<std::shared_ptr<torch::jit::Graph>> graphs;
+  for (const torch::jit::script::Method& method : mod.get_methods()) {
+    // Don't convert hidden methods
+    if (method.name().rfind("_", 0)) {
+      auto new_g = std::make_shared<torch::jit::Graph>();
+      auto graph_and_parameters = lowering::Lower(mod, method.name());
+
+      auto g = graph_and_parameters.first;
+      auto params = graph_and_parameters.second;
+      auto named_params = conversion::get_named_params(g->inputs(), params);
+      auto convert_cfg = std::move(cfg.convert_info);
+      LOG_INFO(*g << "(LoweringGraph)\n");
+
+      // segment the graph and convert segmented TensorRT block
+      auto segmented_blocks = partitioning::Partition(g, convert_cfg.input_ranges, cfg.partition_info);
+      if (segmented_blocks.size() == 1 && segmented_blocks[0].target() == partitioning::SegmentedBlock::kTorch) {
+        LOG_WARNING("Didn't generate any TensorRT engines, the compiler did nothing\n");
+        return mod;
+      }
+
+      std::unordered_map<torch::jit::Value*, torch::jit::Value*> old_to_new_g;
+      // add global graph's input to old_to_new_g mapping
+      for (auto input : g->inputs()) {
+        util::getOrAddInputForValue(input, new_g, old_to_new_g);
+      }
+      for (auto& seg_block : segmented_blocks) {
+        std::string cur_block_target =
+            seg_block.target() == partitioning::SegmentedBlock::kTensorRT ? "TensorRT" : "Torch";
+        LOG_INFO(*g << "(MiniGraphIn" << cur_block_target << "Block\n");
+        std::ostringstream trt_engine_id;
+        trt_engine_id << reinterpret_cast<const int*>(&seg_block);
+        if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) {
+          std::vector<ir::InputRange> input_ranges;
+          for (auto& shape : seg_block.in_shape()) {
+            input_ranges.push_back(ir::InputRange(shape));
+          }
+          // update the input ranges for each segments
+          convert_cfg.input_ranges = input_ranges;
+          auto engine = conversion::ConvertBlockToEngine(seg_block.block(), convert_cfg, named_params);
+          auto temp_g = std::make_shared<torch::jit::Graph>();
+          AddEngineToGraph(new_mod, temp_g, engine, trt_engine_id.str(), true);
+
+          seg_block.update_graph(temp_g);
+          AddSegmentedBlockToGraph(new_g, seg_block, old_to_new_g);
+        } else {
+          AddSegmentedBlockToGraph(new_g, seg_block, old_to_new_g);
+        }
+      }
+
+      for (auto& output : g->outputs()) {
+        new_g->registerOutput(old_to_new_g[output]);
+      }
+
+      LOG_INFO(*new_g << "(FallbackGraph)\n");
+
+      auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
+      auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
+      new_mod.type()->addMethod(new_method);
+      new_method->setSchema(schema);
+    }
+  }
+
+  return new_mod;
+}
+
 torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, CompileSpec cfg) {
+  // TODO: not sure how to deal with duplicated code here, so just cut out a branch temporally
+  if (cfg.partition_info.enabled) {
+    return CompileGraphWithFallback(mod, cfg);
+  }
   // TODO: Should be doing a functional transform but need PR #31978
   // [jit] More robust mangling
   // torch::jit::script::Module new_mod = mod.clone();
@@ -164,7 +262,7 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
       auto new_g = std::make_shared<torch::jit::Graph>();
       AddEngineToGraph(new_mod, new_g, engine);
       auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
-      auto schema = GenerateGraphSchema(new_mod, new_method->name(), new_g);
+      auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
       new_mod.type()->addMethod(new_method);
       new_method->setSchema(schema);
     }
@@ -180,7 +278,7 @@ torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine) {
   auto new_g = std::make_shared<torch::jit::Graph>();
   AddEngineToGraph(new_mod, new_g, engine);
   auto new_method = new_mod._ivalue()->compilation_unit()->create_function("forward", new_g);
-  auto schema = GenerateGraphSchema(new_mod, new_method->name(), new_g);
+  auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
   new_mod.type()->addMethod(new_method);
   new_method->setSchema(schema);
 
 
@@ -3,14 +3,17 @@
 #include <cuda_runtime.h>
 #include <vector>
 #include "core/conversion/conversion.h"
+#include "core/ir/ir.h"
+#include "core/partitioning/partitioning.h"
 #include "torch/csrc/jit/api/module.h"
 
 namespace trtorch {
 namespace core {
 
 struct CompileSpec {
-  CompileSpec(std::vector<conversion::InputRange> input_ranges) : convert_info(std::move(input_ranges)) {}
+  CompileSpec(std::vector<ir::InputRange> input_ranges) : convert_info(std::move(input_ranges)) {}
   conversion::ConversionInfo convert_info;
+  partitioning::PartitionInfo partition_info;
 };
 
 bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::string method_name);
 
@@ -23,6 +23,7 @@ cc_library(
         "//core/conversion/conversionctx",
         "//core/conversion/converters",
         "//core/conversion/evaluators",
+        "//core/ir",
         "//core/util:prelude",
     ] + select({
         ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
 
@@ -23,55 +23,6 @@ GraphParams get_named_params(c10::ArrayRef<torch::jit::Value*> inputs, std::vect
   return std::move(named_params);
 }
 
-InputRange::InputRange(std::vector<int64_t> d) {
-  if (d.size() > 5) {
-    LOG_WARNING("Verify that this dim size is accepted");
-  }
-
-  opt = util::toDims(d);
-  min = util::toDims(d);
-  max = util::toDims(d);
-  input_shape = util::toDims(d);
-  input_is_dynamic = false;
-}
-
-InputRange::InputRange(std::vector<int64_t> min_shape, std::vector<int64_t> opt_shape, std::vector<int64_t> max_shape) {
-  if (min_shape.size() > 5 || opt_shape.size() > 5 || max_shape.size() > 5) {
-    LOG_WARNING("Verify that this dim size is accepted");
-  }
-
-  std::set<size_t> sizes;
-  sizes.insert(min_shape.size());
-  sizes.insert(opt_shape.size());
-  sizes.insert(max_shape.size());
-
-  if (sizes.size() != 1) {
-    LOG_ERROR(
-        "Expected all input sizes have the same dimensions, but found dimensions: min("
-        << min_shape.size() << "), opt(" << opt_shape.size() << "), max(" << max_shape.size() << ")");
-  }
-
-  min = util::toDims(min_shape);
-  opt = util::toDims(opt_shape);
-  max = util::toDims(max_shape);
-
-  std::vector<int64_t> dyn_shape;
-  for (size_t i = 0; i < opt_shape.size(); i++) {
-    std::set<uint64_t> dim;
-    dim.insert(min_shape[i]);
-    dim.insert(opt_shape[i]);
-    dim.insert(max_shape[i]);
-    if (dim.size() != 1) {
-      dyn_shape.push_back(-1);
-      input_is_dynamic = true;
-    } else {
-      dyn_shape.push_back(opt_shape[i]);
-    }
-  }
-
-  input_shape = util::toDims(dyn_shape);
-}
-
 } // namespace conversion
 } // namespace core
 } // namespace trtorch
@@ -118,7 +118,10 @@ void AddLayer(ConversionCtx* ctx, const torch::jit::Node* n) {
                        << "please report this error to https://www.github.com/NVIDIA/TRTorch/issues");
 }
 
-void AddInputs(ConversionCtx* ctx, at::ArrayRef<const torch::jit::Value*> inputs, std::vector<InputRange>& input_dims) {
+void AddInputs(
+    ConversionCtx* ctx,
+    at::ArrayRef<const torch::jit::Value*> inputs,
+    std::vector<ir::InputRange>& input_dims) {
   std::vector<const torch::jit::Value*> input_tensors;
   for (auto in : inputs) {
     // Disregarding inputs that are not tensors
 
@@ -4,27 +4,17 @@
 
 #include "NvInfer.h"
 #include "core/conversion/conversionctx/ConversionCtx.h"
+#include "core/ir/ir.h"
 #include "torch/csrc/jit/ir/ir.h"
 
 namespace trtorch {
 namespace core {
 namespace conversion {
 
-struct InputRange {
-  nvinfer1::Dims min;
-  nvinfer1::Dims max;
-  nvinfer1::Dims opt;
-  nvinfer1::Dims input_shape;
-  bool input_is_dynamic = false;
-  // Should we restrict to unsigned?
-  InputRange(std::vector<int64_t> d);
-  InputRange(std::vector<int64_t> min_shape, std::vector<int64_t> opt_shape, std::vector<int64_t> max_shape);
-};
-
 struct ConversionInfo {
-  std::vector<InputRange> input_ranges;
+  std::vector<ir::InputRange> input_ranges;
   BuilderSettings engine_settings;
-  ConversionInfo(std::vector<InputRange> input_ranges)
+  ConversionInfo(std::vector<ir::InputRange> input_ranges)
       : input_ranges(std::move(input_ranges)), engine_settings(BuilderSettings()) {}
 };
 
 
@@ -450,7 +450,7 @@ auto aten_registrations TRTORCH_UNUSED =
                       if (args.at(n->input(0)).IValue()->isInt()) {
                         auto a = args.at(n->input(0)).unwrapToInt();
                         auto b = args.at(n->input(1)).unwrapToInt();
-                        return std::floor(a / b);
+                        return static_cast<int>(std::floor(a / b));
                       } else if (args.at(n->input(0)).IValue()->isDouble()) {
                         auto a = args.at(n->input(0)).unwrapToDouble();
                         auto b = args.at(n->input(1)).unwrapToDouble();
 
@@ -0,0 +1,35 @@
+package(default_visibility = ["//visibility:public"])
+
+config_setting(
+    name = "use_pre_cxx11_abi",
+    values = {
+        "define": "abi=pre_cxx11_abi",
+    }
+)
+
+cc_library(
+    name = "ir",
+    hdrs = [
+        "ir.h"
+    ],
+    srcs = [
+        "InputRange.cpp",
+    ],
+    deps = [
+        "@tensorrt//:nvinfer",
+        "//core/util:prelude",
+    ] + select({
+        ":use_pre_cxx11_abi":  ["@libtorch_pre_cxx11_abi//:libtorch"],
+        "//conditions:default":  ["@libtorch//:libtorch"],
+    }),
+)
+
+load("@rules_pkg//:pkg.bzl", "pkg_tar")
+
+pkg_tar(
+    name = "include",
+    package_dir = "core/ir/",
+    srcs = [
+        "ir.h",
+    ],
+)