pytorch
diff --git a/‎.github/code-owners.yml
Lines changed: 0 additions & 12 deletions b/‎.github/code-owners.yml
Lines changed: 0 additions & 12 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 2 additions & 2 deletions b/‎CMakeLists.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/compiler.cpp
Lines changed: 33 additions & 19 deletions b/‎core/compiler.cpp
Lines changed: 33 additions & 19 deletions
diff --git a/‎core/compiler.h
Lines changed: 5 additions & 1 deletion b/‎core/compiler.h
Lines changed: 5 additions & 1 deletion
diff --git a/‎core/conversion/conversionctx/BUILD
Lines changed: 1 addition & 0 deletions b/‎core/conversion/conversionctx/BUILD
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/conversion/conversionctx/ConversionCtx.h
Lines changed: 2 additions & 9 deletions b/‎core/conversion/conversionctx/ConversionCtx.h
Lines changed: 2 additions & 9 deletions
diff --git a/‎core/conversion/converters/converter_util.cpp
Lines changed: 32 additions & 0 deletions b/‎core/conversion/converters/converter_util.cpp
Lines changed: 32 additions & 0 deletions
diff --git a/‎core/conversion/converters/converter_util.h
Lines changed: 8 additions & 0 deletions b/‎core/conversion/converters/converter_util.h
Lines changed: 8 additions & 0 deletions
diff --git a/‎core/conversion/converters/impl/einsum.cpp
Lines changed: 7 additions & 0 deletions b/‎core/conversion/converters/impl/einsum.cpp
Lines changed: 7 additions & 0 deletions
@@ -9,7 +9,6 @@
 
 "component: build system":
   - "narendasan"
-  - "andi4191"
 
 "component: conversion":
   - "narendasan"
@@ -29,7 +28,6 @@
   - "peri044"
 
 "component: execution":
-  - "andi4191"
   - "narendasan"
 
 "component: lowering":
@@ -48,15 +46,12 @@
   - "peri044"
 
 "component: runtime":
-  - "andi4191"
   - "narendasan"
 
 "component: tests":
-  - "andi4191"
   - "narendasan"
 
 "component: torchtrtc":
-  - "andi4191"
   - "narendasan"
 
 "component: dependencies":
@@ -74,24 +69,20 @@
   - "tanayvarshney"
 
 "infrastructre":
-  - "andi4191"
   - "narendasan"
 
 "component: packaging":
   - "narendasan"
-  - "andi4191"
   - "peri044"
 
 "channel: NGC":
-  - "andi4191"
   - "peri044"
 
 "channel: linux-x86":
   - "narendasan"
   - "peri044"
 
 "channel: linux-sbsa":
-  - "andi4191"
   - "bowang007"
 
 "channel: windows":
@@ -102,16 +93,13 @@
   - "bowang007"
 
 "component: tooling":
-  - "andi4191"
   - "narendasan"
 
 "performance":
-  - "andi4191"
   - "peri044"
   - "bowang007"
 
 "channel: docker":
-  - "andi4191"
   - "narendasan"
 
 "ux":
 
@@ -2,8 +2,8 @@
 cmake_minimum_required(VERSION 3.17)
 project(Torch-TensorRT LANGUAGES CXX)
 
-# use c++17
-set(CMAKE_CXX_STANDARD 17)
+# use c++14 like PyTorch
+set(CMAKE_CXX_STANDARD 14)
 
 # Build the libraries with -fPIC
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
@@ -31,11 +31,17 @@ void AddEngineToGraph(
     torch::jit::script::Module mod,
     std::shared_ptr<torch::jit::Graph>& g,
     const std::string& serialized_engine,
-    runtime::CudaDevice& device_info,
+    runtime::RTDevice& device_info,
+    const std::vector<std::string>& input_binding_names,
+    const std::vector<std::string>& output_binding_names,
     std::string engine_id = "",
     bool fallback = false) {
   auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(
-      mod._ivalue()->name() + "_engine_" + engine_id, serialized_engine, device_info);
+      mod._ivalue()->name() + "_engine_" + engine_id,
+      serialized_engine,
+      device_info,
+      input_binding_names,
+      output_binding_names);
   // Get required metadata about the engine out
   auto num_io = engine_ptr->num_io;
   auto name = engine_ptr->name;
@@ -137,10 +143,13 @@ partitioning::GraphAndMapping BuildHybridGraph(
   auto partitioning_info = cfg.partitioning_info;
 
   auto partitioning_ctx = partitioning::PartitioningCtx(block, partitioning_info);
-  auto collection_input_ivalues_map =
-      partitioning::generateRandomInputs(partitioning_info.collection_input_spec_map, first_use_types);
+  partitioning_ctx.input_types_map = first_use_types;
 
-  partitioning::partition(&partitioning_ctx, collection_input_ivalues_map);
+  // Generate a dictionary of input torch::jit::Value's to their min, opt, max tensors and store in ctx
+  // TODO: Combine this within partition call
+  partitioning::populateInputIValues(&partitioning_ctx);
+
+  partitioning::partition(&partitioning_ctx);
 
   for (auto& partitioned_block : partitioning_ctx.partitioned_blocks) {
     partitioning::PartitionedGraph& segmented_blocks = partitioned_block.second;
@@ -151,23 +160,24 @@ partitioning::GraphAndMapping BuildHybridGraph(
       trt_engine_id << reinterpret_cast<const int*>(&seg_block);
 
       if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) {
-        auto shapes = seg_block.in_shapes();
-        auto types = seg_block.in_types();
-        std::vector<ir::Input> inputs;
-        for (size_t i = 0; i < shapes.size(); i++) {
-          auto in = ir::Input(shapes[i]);
-          in.dtype = util::ScalarTypeToTRTDataType(types[i]);
-          inputs.push_back(in);
-        }
+        auto inputs = seg_block.construct_inputs_spec();
         // update the input ranges for each segments
         convert_info.inputs = ir::associate_specs_with_inputs(seg_block.g(), inputs, static_params);
 
         // TODO mapping Inputs Ivalue to flatten one here
         auto engine = conversion::ConvertBlockToEngine(seg_block.block(), convert_info, static_params);
         auto temp_g = std::make_shared<torch::jit::Graph>();
         auto device_spec = convert_info.engine_settings.device;
-        auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
-        AddEngineToGraph(new_mod, temp_g, engine, cuda_device, trt_engine_id.str(), true);
+        auto cuda_device = runtime::RTDevice(device_spec.gpu_id, device_spec.device_type);
+        AddEngineToGraph(
+            new_mod,
+            temp_g,
+            engine,
+            cuda_device,
+            std::vector<std::string>(),
+            std::vector<std::string>(),
+            trt_engine_id.str(),
+            true);
 
         seg_block.update_graph(temp_g);
       }
@@ -283,7 +293,7 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)
   torch::jit::Module new_mod(mod._ivalue()->name() + "_trt");
 
   auto device_spec = cfg.convert_info.engine_settings.device;
-  auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
+  auto cuda_device = runtime::RTDevice(device_spec.gpu_id, device_spec.device_type);
 
   for (const torch::jit::Method& method : mod.get_methods()) {
     if (method.name().compare("forward") == 0) {
@@ -331,7 +341,7 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)
             "Not all operations in graph are supported by the compiler");
         // TODO find the right
         auto engine = conversion::ConvertBlockToEngine(g->block(), cfg.convert_info, static_params);
-        AddEngineToGraph(new_mod, new_g, engine, cuda_device);
+        AddEngineToGraph(new_mod, new_g, engine, cuda_device, std::vector<std::string>(), std::vector<std::string>());
       }
       auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
       auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
@@ -342,12 +352,16 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)
   return new_mod;
 }
 
-torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine, runtime::CudaDevice cuda_device) {
+torch::jit::script::Module EmbedEngineInNewModule(
+    const std::string& engine,
+    runtime::RTDevice cuda_device,
+    const std::vector<std::string>& input_binding_names,
+    const std::vector<std::string>& output_binding_names) {
   std::ostringstream engine_id;
   engine_id << reinterpret_cast<const int*>(&engine);
   torch::jit::script::Module new_mod("tensorrt_engine_mod_" + engine_id.str());
   auto new_g = std::make_shared<torch::jit::Graph>();
-  AddEngineToGraph(new_mod, new_g, engine, cuda_device);
+  AddEngineToGraph(new_mod, new_g, engine, cuda_device, input_binding_names, output_binding_names);
   auto new_method = new_mod._ivalue()->compilation_unit()->create_function("forward", new_g);
   auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
   new_mod.type()->addMethod(new_method);
 
@@ -28,7 +28,11 @@ std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::
 
 torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec cfg);
 
-torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine, runtime::CudaDevice cuda_device);
+torch::jit::script::Module EmbedEngineInNewModule(
+    const std::string& engine,
+    runtime::RTDevice cuda_device,
+    const std::vector<std::string>& input_binding_names,
+    const std::vector<std::string>& output_binding_names);
 
 void set_device(const int gpu_id);
 
 
@@ -21,6 +21,7 @@ cc_library(
     deps = [
         "@tensorrt//:nvinfer",
         "//core/util:prelude",
+        "//core/ir",
     ] + select({
         ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
         "//conditions:default": ["@libtorch//:libtorch"],
 
@@ -9,28 +9,21 @@
 #include "torch/csrc/jit/ir/ir.h"
 
 #include <cuda_runtime.h>
+#include "core/ir/ir.h"
 #include "core/util/prelude.h"
 
 namespace torch_tensorrt {
 namespace core {
 namespace conversion {
 
-struct Device {
-  nvinfer1::DeviceType device_type;
-  int64_t gpu_id;
-  int64_t dla_core;
-  bool allow_gpu_fallback;
-  Device() : device_type(nvinfer1::DeviceType::kGPU), gpu_id(0), dla_core(0), allow_gpu_fallback(false) {}
-};
-
 struct BuilderSettings {
   std::set<nvinfer1::DataType> enabled_precisions = {};
   bool sparse_weights = false;
   bool disable_tf32 = false;
   bool refit = false;
   bool debug = false;
   bool truncate_long_and_double = false;
-  Device device;
+  ir::Device device;
   nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
   nvinfer1::IInt8Calibrator* calibrator = nullptr;
   uint64_t num_avg_timing_iters = 1;
 
@@ -156,6 +156,38 @@ nvinfer1::ILayer* add_elementwise(
   return ele;
 }
 
+nvinfer1::ITensor* add_abs(
+    ConversionCtx* ctx,
+    const torch::jit::Node* n,
+    nvinfer1::ITensor* self,
+    const std::string& name) {
+  nvinfer1::ILayer* absolute_value_layer;
+
+  // Check if TRT Unary ops support the input type
+  bool unary_supported_input = (self->getType() == nvinfer1::DataType::kFLOAT) ||
+      (self->getType() == nvinfer1::DataType::kHALF) || (self->getType() == nvinfer1::DataType::kINT8);
+  if (unary_supported_input) {
+    absolute_value_layer = ctx->net->addUnary(*self, nvinfer1::UnaryOperation::kABS);
+    TORCHTRT_CHECK(absolute_value_layer, "Unable to create abs layer from node: " << *n);
+    absolute_value_layer->setName(name.c_str());
+  } else {
+    LOG_GRAPH(
+        "Tensor is of unsupported type "
+        << self->getType() << " for IUnaryLayer::kABS. Using backup implementation via IElementWise (max(x, -x)");
+    // For types not supported by kABS, use an elementwise implementation abs(x) = max(x, -1 * x)
+    at::Tensor neg_one = torch::full({1}, -1).to(util::TRTDataTypeToScalarType(self->getType()));
+    auto neg_one_const = tensor_to_const(ctx, neg_one);
+    auto neg_layer = add_elementwise(
+        ctx, nvinfer1::ElementWiseOperation::kPROD, self, neg_one_const, util::node_info(n) + std::string("_Negation"));
+    TORCHTRT_CHECK(neg_layer, "Unable to create prod layer from node: " << *n);
+    absolute_value_layer =
+        add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMAX, self, neg_layer->getOutput(0), name);
+    TORCHTRT_CHECK(absolute_value_layer, "Unable to create max layer from node: " << *n);
+  }
+
+  return absolute_value_layer->getOutput(0);
+}
+
 nvinfer1::ITensor* applyIdentityOp(ConversionCtx* ctx, nvinfer1::ITensor* tensor, const std::string& tensor_name) {
   auto id_layer = ctx->net->addIdentity(*tensor);
   auto id_out_tensor = id_layer->getOutput(0);
 
@@ -35,13 +35,21 @@ nvinfer1::ITensor* addUnpadding(
     bool trailing = true,
     bool use_zeros = true);
 
+// TODO: Change add_elementwise schema to output nvinfer1::ITensor* instead of nvinfer1::ILayer*,
+// for consistency with other utils. Need to change schema and usage in all calling contexts
 nvinfer1::ILayer* add_elementwise(
     ConversionCtx* ctx,
     nvinfer1::ElementWiseOperation op,
     nvinfer1::ITensor* self,
     nvinfer1::ITensor* other,
     const std::string& name);
 
+nvinfer1::ITensor* add_abs(
+    ConversionCtx* ctx,
+    const torch::jit::Node* n,
+    nvinfer1::ITensor* self,
+    const std::string& name);
+
 // Apply an identity operation on a tensor. Used in the case where an input is an output to a network.
 nvinfer1::ITensor* applyIdentityOp(ConversionCtx* ctx, nvinfer1::ITensor* tensor, const std::string& name);
 
 
@@ -18,6 +18,13 @@ auto einsum_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pat
        auto equation = args[0].unwrapToString();
        auto in = args[1].IValue()->toListRef();
 
+       TORCHTRT_CHECK(
+           in.size() <= 2,
+           "TensorRT currently supports up to 2 input tensors "
+               << "to einsum but operation had " << in.size()
+               << " input tensors, please specify torch_executed_ops=[\"aten::einsum\"] "
+               << "at compilation time to avoid this error.");
+
        std::vector<nvinfer1::ITensor*> tensors;
 
        // Populate vector of ITensor pointers