feat(element_wise): Auto cast to higher precision for mismatched types

narendasan · narendasan · commit bce8464b2ce9 · 2022-08-05T22:03:06.000-07:00
Signed-off-by: Naren Dasan &lt;naren@narendasan.com&gt;
Signed-off-by: Naren Dasan &lt;narens@nvidia.com&gt;
diff --git a/core/conversion/converters/converter_util.cpp b/core/conversion/converters/converter_util.cpp
@@ -65,6 +65,13 @@ nvinfer1::ILayer* add_elementwise(
     nvinfer1::ITensor* self,
     nvinfer1::ITensor* other,
     const std::string& name) {
+  if (self->getType() == nvinfer1::DataType::kFLOAT && other->getType() == nvinfer1::DataType::kINT32) {
+    LOG_DEBUG("Type mismatch, casting other to " << self->getType());
+    other = castITensor(ctx, other, self->getType());
+  } else if (self->getType() == nvinfer1::DataType::kINT32 && other->getType() == nvinfer1::DataType::kFLOAT) {
+    LOG_DEBUG("Type mismatch, casting self to " << other->getType());
+    self = castITensor(ctx, self, other->getType());
+  }
   // ensure self to have larger number of dimension
   bool swapSelfOther = false;
   if (self->getDimensions().nbDims < other->getDimensions().nbDims) {
diff --git a/core/conversion/converters/impl/element_wise.cpp b/core/conversion/converters/impl/element_wise.cpp
@@ -412,6 +412,7 @@ auto element_wise_registrations TORCHTRT_UNUSED =
                     // Should implement self * other
                     auto self = args[0].ITensorOrFreeze(ctx);
                     auto other = args[1].ITensorOrFreeze(ctx);
+
                     auto mul =
                         add_elementwise(ctx, nvinfer1::ElementWiseOperation::kPROD, self, other, util::node_info(n));
                     TORCHTRT_CHECK(mul, "Unable to create mul layer from node: " << *n);
@@ -426,6 +427,7 @@ auto element_wise_registrations TORCHTRT_UNUSED =
                     // TODO: Remove with functionalization
                     auto self = args[0].ITensorOrFreeze(ctx);
                     auto other = scalar_to_tensor(ctx, args[1].unwrapToScalar());
+
                     auto mul =
                         add_elementwise(ctx, nvinfer1::ElementWiseOperation::kPROD, self, other, util::node_info(n));
                     TORCHTRT_CHECK(mul, "Unable to create mul layer from node: " << *n);
diff --git a/tests/core/conversion/converters/test_element_wise.cpp b/tests/core/conversion/converters/test_element_wise.cpp
@@ -12,7 +12,9 @@ void pointwise_test_helper(
     std::vector<int64_t> shape1 = {5},
     std::vector<int64_t> shape2 = {5},
     bool negative_input = false,
-    bool int_tensors = false) {
+    bool int_tensors = false,
+    bool float_int_tensors = false,
+    bool int_float_tensors = false) {
   auto g = std::make_shared<torch::jit::Graph>();
   torch::jit::parseIR(graph_ir, g.get());
 
@@ -27,11 +29,24 @@ void pointwise_test_helper(
   if (!singleInput) {
     torch_inputs.push_back(at::randint(1, 5, shape2, {at::kCUDA}));
   }
+
+  TORCHTRT_CHECK(!((int_tensors && (float_int_tensors || int_float_tensors)) || (float_int_tensors && int_float_tensors)),
+    "Invalid test configuration, only one of int_tensors, float_int_tensors, int_float_tensors can be true");
+
   if(int_tensors){
     for(size_t i = 0UL; i < torch_inputs.size(); ++i){
       torch_inputs[i] = torch_inputs[i].to(at::kInt);
     }
+  } else if(float_int_tensors) {
+    TORCHTRT_CHECK(!singleInput, "float_int_tensors tests require two inputs");
+    torch_inputs[0] = torch_inputs[0].to(at::kFloat);
+    torch_inputs[1] = torch_inputs[1].to(at::kInt);
+  } else if (int_float_tensors) {
+    TORCHTRT_CHECK(!singleInput, "int_float_tensors tests require two inputs");
+    torch_inputs[0] = torch_inputs[0].to(at::kInt);
+    torch_inputs[1] = torch_inputs[1].to(at::kFloat);
   }
+
   auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
   auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, torch_inputs);
 
@@ -62,6 +77,8 @@ TEST(Converters, ATenAddConvertsCorrectly) {
   pointwise_test_helper(graph, false, false, {4}, {3, 4});
   pointwise_test_helper(graph, false, true, {3, 4, 3}, {4, 3});
   pointwise_test_helper(graph, false, true, {4, 3}, {3, 4, 3});
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, false, true);
 }
 
 TEST(Converters, ATenAddWithAlphaConvertsCorrectly) {
@@ -75,9 +92,11 @@ TEST(Converters, ATenAddWithAlphaConvertsCorrectly) {
   pointwise_test_helper(graph, false, false, {4}, {3, 4});
   pointwise_test_helper(graph, false, true, {3, 4, 3}, {4, 3});
   pointwise_test_helper(graph, false, true, {4, 3}, {3, 4, 3});
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, false, true);
 }
 
-TEST(Converters, ATenAddImplicitWithAlphaConvertsCorrectly) {
+TEST(Converters, ATenAddInplaceWithAlphaConvertsCorrectly) {
   const auto graph = R"IR(
       graph(%0 : Tensor, %1 : Tensor):
         %2 : float = prim::Constant[value=7.6]()
@@ -109,6 +128,8 @@ TEST(Converters, ATenSubConvertsCorrectly) {
   pointwise_test_helper(graph, false, false, {4}, {3, 4});
   pointwise_test_helper(graph, false, true, {3, 4, 3}, {4, 3});
   pointwise_test_helper(graph, false, true, {4, 3}, {3, 4, 3});
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, false, true);
 }
 
 TEST(Converters, ATenMulConvertsCorrectly) {
@@ -121,6 +142,8 @@ TEST(Converters, ATenMulConvertsCorrectly) {
   pointwise_test_helper(graph, false, false, {4}, {3, 4});
   pointwise_test_helper(graph, false, true, {3, 4, 3}, {4, 3});
   pointwise_test_helper(graph, false, true, {4, 3}, {3, 4, 3});
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, false, true);
 }
 
 TEST(Converters, ATenMulWithScalarConvertsCorrectly) {
@@ -151,6 +174,8 @@ TEST(Converters, ATenDivConvertsCorrectly) {
   pointwise_test_helper(graph, false, false, {4}, {3, 4});
   pointwise_test_helper(graph, false, true, {3, 4, 3}, {4, 3});
   pointwise_test_helper(graph, false, true, {4, 3}, {3, 4, 3});
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, false, true);
 }
 
 TEST(Converters, ATenDivWithScalarConvertsCorrectly) {
@@ -173,6 +198,8 @@ TEST(Converters, ATenDivRoundingFloorConvertsCorrectly) {
   pointwise_test_helper(graph, false, false, {4}, {3, 4}, true);
   pointwise_test_helper(graph, false, true, {3, 4, 3}, {4, 3}, true);
   pointwise_test_helper(graph, false, true, {4, 3}, {3, 4, 3}, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, false, true);
 }
 
 TEST(Converters, ATenDivRoundingTruncConvertsCorrectly) {
@@ -186,6 +213,8 @@ TEST(Converters, ATenDivRoundingTruncConvertsCorrectly) {
   pointwise_test_helper(graph, false, false, {4}, {3, 4}, true);
   pointwise_test_helper(graph, false, true, {3, 4, 3}, {4, 3}, true);
   pointwise_test_helper(graph, false, true, {4, 3}, {3, 4, 3}, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, false, true);
 }
 
 TEST(Converters, ATenDivRoundingNoneConvertsCorrectly) {
@@ -211,6 +240,8 @@ TEST(Converters, ATenPowTensorConvertsCorrectly) {
   pointwise_test_helper(graph, false, false, {4}, {3, 4});
   pointwise_test_helper(graph, false, true, {3, 4, 3}, {4, 3});
   pointwise_test_helper(graph, false, true, {4, 3}, {3, 4, 3});
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, false, true);
 }
 
 TEST(Converters, ATenPowScalarConvertsCorrectly) {
@@ -251,6 +282,8 @@ TEST(Converters, ATenFloorDivideConvertsCorrectly) {
   pointwise_test_helper(graph, false, false, {4}, {3, 4});
   pointwise_test_helper(graph, false, true, {3, 4, 3}, {4, 3});
   pointwise_test_helper(graph, false, true, {4, 3}, {3, 4, 3});
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, true);
+  pointwise_test_helper(graph, false, true, {5}, {5}, false, false, false, true);
 }
 
 TEST(Converters, ATenFloorDivideWithScalarConvertsCorrectly) {
diff --git a/tests/util/run_graph_engine.cpp b/tests/util/run_graph_engine.cpp
@@ -30,6 +30,7 @@ std::vector<core::ir::Input> toInputsDynamic(std::vector<at::Tensor> ten, bool d
 
   for (auto i : ten) {
     auto opt = core::util::toVec(i.sizes());
+    auto dtype = core::util::ScalarTypeToTRTDataType(i.scalar_type());
 
     if (dynamic_batch) {
       std::vector<int64_t> min_range(opt);
@@ -38,15 +39,15 @@ std::vector<core::ir::Input> toInputsDynamic(std::vector<at::Tensor> ten, bool d
       min_range[0] = ceil(opt[0] / 2.0);
       max_range[0] = 2 * opt[0];
 
-      a.push_back(core::ir::Input(min_range, opt, max_range));
+      a.push_back(core::ir::Input(min_range, opt, max_range, dtype));
     } else {
       std::vector<int64_t> min_range(opt);
       std::vector<int64_t> max_range(opt);
 
       min_range[1] = ceil(opt[1] / 2.0);
       max_range[1] = 2 * opt[1];
 
-      a.push_back(core::ir::Input(min_range, opt, max_range));
+      a.push_back(core::ir::Input(min_range, opt, max_range, dtype));
     }
   }