pytorch
diff --git a/‎core/partitioning/partitioninginfo/PartitioningInfo.h
Lines changed: 1 addition & 0 deletions b/‎core/partitioning/partitioninginfo/PartitioningInfo.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/partitioning/shape_analysis.cpp
Lines changed: 56 additions & 12 deletions b/‎core/partitioning/shape_analysis.cpp
Lines changed: 56 additions & 12 deletions
diff --git a/‎core/util/trt_util.cpp
Lines changed: 1 addition & 0 deletions b/‎core/util/trt_util.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎cpp/src/compile_spec.cpp
Lines changed: 3 additions & 0 deletions b/‎cpp/src/compile_spec.cpp
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/_cpp_api/classtorch__tensorrt_1_1DataType.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/classtorch__tensorrt_1_1DataType.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html
Lines changed: 2 additions & 2 deletions
@@ -17,6 +17,7 @@ struct PartitioningInfo {
   std::vector<std::string> forced_fallback_operators;
   bool truncate_long_and_double;
   ir::Device target_device;
+  bool cast_int8_inputs = false;
 
   std::string getGPUDeviceString() const {
     return "cuda:" + std::to_string(target_device.gpu_id);
 
@@ -99,18 +99,24 @@ torch::jit::Node* getUpstreamCastNode(torch::jit::Value* val) {
   return nullptr;
 }
 
-torch::jit::Node* createCastNode(SegmentedBlock& seg_block, size_t index, bool is_input, std::string device) {
+torch::jit::Node* createCastNode(
+    SegmentedBlock& seg_block,
+    size_t index,
+    bool is_input,
+    at::ScalarType dtype,
+    std::string device,
+    bool force_create_node = false) {
   auto cast_raw_value = is_input ? seg_block.raw_inputs()[index] : seg_block.raw_outputs()[index];
   auto cast_subgraph_value = is_input ? seg_block.inputs()[index] : seg_block.outputs()[index];
   torch::jit::Node* cast_node = getUpstreamCastNode(cast_raw_value);
   auto g = seg_block.g();
   // if we can find upstream aten::to node, we use it's parameters for creating new cast node
-  if (cast_node) {
+  if (cast_node && !force_create_node) {
     std::unordered_map<torch::jit::Value*, torch::jit::Value*> value_map;
     value_map.insert({cast_node->inputs()[0], cast_subgraph_value});
     if (!is_input) {
       // if this value is output, we need to cast it to int32
-      auto const_val = g->insertConstant(3);
+      auto const_val = g->insertConstant(dtype);
       if (cast_node->inputs()[1]->node()->output()->type()->kind() == torch::jit::TypeKind::DeviceObjType) {
         value_map.insert({cast_node->inputs()[2], const_val});
       } else {
@@ -122,7 +128,7 @@ torch::jit::Node* createCastNode(SegmentedBlock& seg_block, size_t index, bool i
     //    auto cast_node = g->prependNode(g->createClone(cast_node, env));
   } else {
     // if there is no explicit cast aten::to operation, we need to create a node
-    auto const_type = is_input ? g->insertConstant(4) : g->insertConstant(3);
+    auto const_type = g->insertConstant(dtype);
     auto const_zero = g->insertConstant(0);
     const_zero->setType(torch::jit::BoolType::get());
     auto cuda = g->insertConstant(device);
@@ -222,27 +228,56 @@ void getSegmentsOutputByRunning(
 
   auto target_device = partitioning_info.getGPUDeviceString();
 
-  // auto int64 <=> int32 conversion
-  if (seg_block.target() == SegmentedBlock::kTorch && partitioning_info.truncate_long_and_double) {
+  // auto int64 <=> int32 conversion + int8 <=> int32 conversion for non-quantized models
+  if (seg_block.target() == SegmentedBlock::kTorch) {
     // First, check if there is Int64 input
     for (size_t i = 0; i < seg_block.inputs().size(); ++i) {
       if (ivalues_maps[seg_block.raw_inputs()[i]].isTensor()) {
         auto cur_ivalue = ivalues_maps[seg_block.raw_inputs()[i]];
         at::ScalarType t = cur_ivalue.toTensor().scalar_type();
-        if (t == at::kLong) {
+        if (t == at::kLong && partitioning_info.truncate_long_and_double) {
+          LOG_DEBUG(
+              "Detected graph Long tensor input type during shape analysis, "
+              << "inserting aten::to cast to Long to ensure this Torch block receives "
+              << "a Long-type tensor input.");
           // we add a cast operation to cast the type to Int64
-          auto cast_node = createCastNode(seg_block, i, true, target_device);
+          auto cast_node = createCastNode(seg_block, i, true, at::kLong, target_device);
+          seg_block.g()->prependNode(cast_node);
+          seg_block.inputs()[i]->replaceAllUsesAfterNodeWith(cast_node, cast_node->outputs()[0]);
+        } else if (t == at::kByte && partitioning_info.cast_int8_inputs) {
+          LOG_DEBUG(
+              "Detected graph Byte tensor input type during shape analysis, "
+              << "inserting aten::to cast to Byte to ensure this Torch block receives "
+              << "a Byte-type tensor input.");
+          // If the input has type Byte, ensure it is casted to the correct type
+          auto cast_node = createCastNode(seg_block, i, true, at::kByte, target_device, /*force_create_node=*/true);
           seg_block.g()->prependNode(cast_node);
           seg_block.inputs()[i]->replaceAllUsesAfterNodeWith(cast_node, cast_node->outputs()[0]);
         }
       }
     }
+
     for (size_t i = 0; i < seg_block.outputs().size(); ++i) {
       if (ivalues_maps[seg_block.raw_outputs()[i]].isTensor()) {
         auto cur_ivalue = ivalues_maps[seg_block.raw_outputs()[i]];
         at::ScalarType t = cur_ivalue.toTensor().scalar_type();
-        if (t == at::kLong) {
-          auto cast_node = createCastNode(seg_block, i, false, target_device);
+
+        // If the output has type Long and truncation was requested, insert truncate
+        if (t == at::kLong && partitioning_info.truncate_long_and_double) {
+          LOG_DEBUG(
+              "Detected graph Long tensor output type during shape analysis, "
+              << "inserting aten::to cast to Int to ensure the subsequent TensorRT block "
+              << "receives an Int-type tensor input.");
+          auto cast_node = createCastNode(seg_block, i, false, at::kInt, target_device);
+          seg_block.g()->appendNode(cast_node);
+          seg_block.g()->block()->replaceOutput(i, cast_node->outputs()[0]);
+        } else if (t == at::kByte && partitioning_info.cast_int8_inputs) {
+          LOG_DEBUG(
+              "Detected graph Byte tensor output type during shape analysis, "
+              << "inserting aten::to cast to Int to ensure the subsequent TensorRT block "
+              << "receives an Int-type tensor input.");
+          // If the output has type Byte and casting was requested, insert Integer cast
+          auto cast_node = createCastNode(seg_block, i, false, at::kInt, target_device, /*force_create_node=*/true);
           seg_block.g()->appendNode(cast_node);
           seg_block.g()->block()->replaceOutput(i, cast_node->outputs()[0]);
         }
@@ -254,11 +289,13 @@ void getSegmentsOutputByRunning(
   std::vector<std::vector<int64_t>> input_shapes;
   std::vector<at::ScalarType> input_types;
   for (size_t i = 0; i < seg_block.inputs().size(); ++i) {
-    if (ivalues_maps[seg_block.raw_inputs()[i]].isTensor()) {
+    auto current_input = seg_block.raw_inputs()[i];
+
+    if (ivalues_maps[current_input].isTensor()) {
       // set the input_shape and data_type
       // we can use a temp value here instead of replacing the values in ivalues_map since we only use ivalues_map for
       // shape inference
-      auto cur_ivalue = ivalues_maps[seg_block.raw_inputs()[i]];
+      auto cur_ivalue = ivalues_maps[current_input];
       at::ScalarType t = cur_ivalue.toTensor().scalar_type();
 
       if (!partitioning_info.truncate_long_and_double && (t == at::kLong || t == at::kDouble)) {
@@ -271,10 +308,16 @@ void getSegmentsOutputByRunning(
         cur_ivalue = cur_ivalue.toTensor().to(at::kFloat);
         LOG_WARNING("Truncating graph input type from at::kDouble to at::kFloat");
       }
+
       c10::optional<nvinfer1::DataType> dtype = util::optTypeMetaToTRTDataType(cur_ivalue.toTensor().dtype());
       if (dtype == c10::nullopt) {
         TORCHTRT_THROW_ERROR("Unsupported input data type " << cur_ivalue.toTensor().dtype());
+      } else if (dtype && dtype.value() == nvinfer1::DataType::kINT8 && partitioning_info.cast_int8_inputs) {
+        // Special case to ensure input IValues to TensorRT engine are not Int8 type if the
+        // model itself is not quantized
+        cur_ivalue = cur_ivalue.toTensor().to(at::kInt);
       }
+
       if (cur_ivalue.toTensor().sizes().size() == 0) {
         // handle Scalar types, which has sizes of []
         input_shapes.push_back(util::toVec(util::toDims(c10::List<int64_t>({1}))));
@@ -297,6 +340,7 @@ void runShapeAnalysis(
     const ir::ShapeMode& shape_mode) {
   // register every segment's input shape, and it's running output IValues
   for (auto& seg_block : ctx->partitioned_blocks[block]) {
+    LOG_GRAPH("Running shape analysis on block " << seg_block);
     torch::jit::ConstantPooling(seg_block.g());
     getSegmentsOutputByRunning(seg_block, example_tensor_map, ctx->settings, shape_mode);
   }
 
@@ -252,6 +252,7 @@ const std::unordered_map<at::ScalarType, nvinfer1::DataType>& get_at_trt_type_ma
       {at::kHalf, nvinfer1::DataType::kHALF},
       {at::kInt, nvinfer1::DataType::kINT32},
       {at::kChar, nvinfer1::DataType::kINT8},
+      {at::kByte, nvinfer1::DataType::kINT8},
       {at::kBool, nvinfer1::DataType::kBOOL}};
   return at_trt_type_map;
 }
 
@@ -167,8 +167,11 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) {
   internal.convert_info.engine_settings.dla_local_dram_size = external.dla_local_dram_size;
   internal.convert_info.engine_settings.dla_global_dram_size = external.dla_global_dram_size;
 
+  internal.partitioning_info.cast_int8_inputs = true;
+
   if (internal.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) !=
       internal.convert_info.engine_settings.enabled_precisions.end()) {
+    internal.partitioning_info.cast_int8_inputs = false;
     if (external.ptq_calibrator) {
       internal.convert_info.engine_settings.calibrator = external.ptq_calibrator;
     } else {
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Class DataType &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Class DataType &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Class Device::DeviceType &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Class Device::DeviceType &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Class TensorFormat &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Class TensorFormat &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Template Class Int8CacheCalibrator &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Template Class Int8CacheCalibrator &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Template Class Int8Calibrator &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Template Class Int8Calibrator &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Define STR &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Define STR &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Define TORCH_TENSORRT_PATCH_VERSION &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Define TORCH_TENSORRT_PATCH_VERSION &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Define TORCH_TENSORRT_MAJOR_VERSION &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Define TORCH_TENSORRT_MAJOR_VERSION &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Define TORCH_TENSORRT_MINOR_VERSION &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Define TORCH_TENSORRT_MINOR_VERSION &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Define TORCHTRT_API &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Define TORCHTRT_API &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Define XSTR &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Define XSTR &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Define TORCHTRT_HIDDEN &mdash; Torch-TensorRT v1.4.0dev0+b638e78 documentation</title>
+  <title>Define TORCHTRT_HIDDEN &mdash; Torch-TensorRT v1.4.0dev0+544654f documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+b638e78
+                  v1.4.0dev0+544654f
                 </div>
Original file line number	Diff line number	Diff line change
`@@ -252,6 +252,7 @@ const std::unordered_map<at::ScalarType, nvinfer1::DataType>& get_at_trt_type_ma`
`252`	`252`	`{at::kHalf, nvinfer1::DataType::kHALF},`
`253`	`253`	`{at::kInt, nvinfer1::DataType::kINT32},`
`254`	`254`	`{at::kChar, nvinfer1::DataType::kINT8},`
	`255`	`+ {at::kByte, nvinfer1::DataType::kINT8},`
`255`	`256`	`{at::kBool, nvinfer1::DataType::kBOOL}};`
`256`	`257`	`return at_trt_type_map;`
`257`	`258`	`}`