fix: Addressing some bugs with TS lowering, disabling BERT test

Naren Dasan · Naren Dasan · commit 8dce1e6b8a1f · 2024-08-15T07:29:00.000-06:00
BERT test is failing because of data dependent intermediate values
which aren't supported in the TS frontend

Signed-off-by: Naren Dasan &lt;naren@nvidia.com&gt;
Signed-off-by: Naren Dasan &lt;naren@narendasan.com&gt;
diff --git a/core/lowering/lowering.cpp b/core/lowering/lowering.cpp
@@ -142,11 +142,11 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, std::vector<torch::jit::I
   passes::SiluToSigmoidMultipication(g);
   passes::RemoveSingleUse0DTensors(g);
   passes::RemoveUnnecessaryCasts(g);
+  passes::UnpackScaledDotProductAttention(g);
   passes::ReplaceAtenInt(g);
   if (lower_info.converting_to_trt_engine) {
     passes::RemoveCollectionCast(g);
   }
-  passes::UnpackScaledDotProductAttention(g);
   passes::UnpackAndCastMaskedFill(g, lower_info.getGPUDeviceString());
   passes::UnpackAndCastNumToTensor(g, lower_info.getGPUDeviceString());
   passes::UnpackAndCastFull(g, lower_info.getGPUDeviceString());
diff --git a/core/lowering/passes/remove_unnecessary_casts.cpp b/core/lowering/passes/remove_unnecessary_casts.cpp
@@ -117,77 +117,68 @@ void RemoveSingleUse0DTensors(std::shared_ptr<torch::jit::Graph>& g) {
 
                           // Change intermediate op output type
                           LOG_GRAPH(user->schema());
-
                           torch::jit::Node* new_node;
-                          switch (user->kind()) {
-                            // Use this to handle special cases where the scalar version of the intermediate operator
-                            // has a different schema than the original
-                            case c10::aten::add:
-                              new_node = g->create(
-                                  user->kind(),
-                                  torch::jit::ArrayRef<torch::jit::Value*>({user->inputs()[0], user->inputs()[1]}),
-                                  1);
-                              new_node->insertAfter(user);
-                              new_node->outputs()[0]->setType(c10::IntType::get());
-                              user->outputs()[0]->replaceAllUsesWith(new_node->outputs()[0]);
-                              user->destroy();
-                              break;
-                            case c10::aten::floor_divide:
-                              new_node = g->create(c10::aten::floordiv, user->inputs(), 1);
-                              new_node->insertAfter(user);
-                              new_node->outputs()[0]->setType(c10::IntType::get());
-                              user->outputs()[0]->replaceAllUsesWith(new_node->outputs()[0]);
-                              user->destroy();
-                              break;
-                            case c10::aten::div:
-                              // If the first two entries to aten::div are non-Tensors,
-                              // there cannot be a rounding mode specified (3rd entry)
-                              if (!user->inputs()[0]->type()->isSubtypeOf(c10::TensorType::get()) &&
-                                  !user->inputs()[1]->type()->isSubtypeOf(c10::TensorType::get()) &&
-                                  user->inputs().size() == 3 &&
-                                  user->inputs()[2]->type()->isSubtypeOf(c10::StringType::get()) &&
-                                  torch::jit::toIValue(user->inputs()[2]).has_value()) {
-                                // Select the first 2 entries of the inputs, corresponding to the values
-                                auto div_args = user->inputs().slice(0, 2);
-
-                                // Depending on the rounding mode, create the appropriate nodes
-                                if (torch::jit::toIValue(user->inputs()[2]).value().toStringRef() == "trunc") {
-                                  // Truncate case (round result towards 0)
-                                  torch::jit::Node* new_node_div;
-                                  // Create node which simply divides the two entries
-                                  new_node_div = g->create(c10::aten::div, div_args, 1);
-                                  new_node_div->insertAfter(user);
-                                  new_node_div->outputs()[0]->setType(c10::FloatType::get());
-
-                                  // Create node which casts the result to an integer, effectively truncating
-                                  new_node = g->create(c10::aten::Int, new_node_div->outputs(), 1);
-                                  new_node->insertAfter(new_node_div);
-                                  new_node->outputs()[0]->setType(c10::IntType::get());
-
-                                  user->outputs()[0]->replaceAllUsesWith(new_node->outputs()[0]);
-                                  user->destroy();
-                                  break;
-
-                                } else if (torch::jit::toIValue(user->inputs()[2]).value().toStringRef() == "floor") {
-                                  // Floor case (round result down)
-                                  // Replace aten::div with aten::floordiv
-                                  new_node = g->create(c10::aten::floordiv, div_args, 1);
-                                  new_node->insertAfter(user);
-                                  new_node->outputs()[0]->setType(c10::IntType::get());
-
-                                  user->outputs()[0]->replaceAllUsesWith(new_node->outputs()[0]);
-                                  user->destroy();
-                                  break;
-                                }
+                          // Use this to handle special cases where the scalar version of the intermediate operator
+                          // has a different schema than the original
+                          if (user->kind() == c10::Symbol::fromQualString("aten::add")) {
+                            new_node = g->create(
+                                c10::Symbol::fromQualString("aten::add"),
+                                torch::jit::ArrayRef<torch::jit::Value*>({user->inputs()[0], user->inputs()[1]}),
+                                1);
+                            new_node->insertAfter(user);
+                            new_node->outputs()[0]->setType(c10::IntType::get());
+                            user->outputs()[0]->replaceAllUsesWith(new_node->outputs()[0]);
+                            user->destroy();
+                          } else if (user->kind() == c10::Symbol::fromQualString("aten::floordiv")) {
+                            new_node = g->create(c10::aten::floordiv, user->inputs(), 1);
+                            new_node->insertAfter(user);
+                            new_node->outputs()[0]->setType(c10::IntType::get());
+                            user->outputs()[0]->replaceAllUsesWith(new_node->outputs()[0]);
+                            user->destroy();
+                          } else if (user->kind() == c10::Symbol::fromQualString("aten::div")) {
+                            // If the first two entries to aten::div are non-Tensors,
+                            // there cannot be a rounding mode specified (3rd entry)
+                            if (!user->inputs()[0]->type()->isSubtypeOf(c10::TensorType::get()) &&
+                                !user->inputs()[1]->type()->isSubtypeOf(c10::TensorType::get()) &&
+                                user->inputs().size() == 3 &&
+                                user->inputs()[2]->type()->isSubtypeOf(c10::StringType::get()) &&
+                                torch::jit::toIValue(user->inputs()[2]).has_value()) {
+                              // Select the first 2 entries of the inputs, corresponding to the values
+                              auto div_args = user->inputs().slice(0, 2);
+
+                              // Depending on the rounding mode, create the appropriate nodes
+                              if (torch::jit::toIValue(user->inputs()[2]).value().toStringRef() == "trunc") {
+                                // Truncate case (round result towards 0)
+                                torch::jit::Node* new_node_div;
+                                // Create node which simply divides the two entries
+                                new_node_div = g->create(c10::aten::div, div_args, 1);
+                                new_node_div->insertAfter(user);
+                                new_node_div->outputs()[0]->setType(c10::FloatType::get());
+
+                                // Create node which casts the result to an integer, effectively truncating
+                                new_node = g->create(c10::aten::Int, new_node_div->outputs(), 1);
+                                new_node->insertAfter(new_node_div);
+                                new_node->outputs()[0]->setType(c10::IntType::get());
+
+                                user->outputs()[0]->replaceAllUsesWith(new_node->outputs()[0]);
+                                user->destroy();
+                              } else if (torch::jit::toIValue(user->inputs()[2]).value().toStringRef() == "floor") {
+                                // Floor case (round result down)
+                                // Replace aten::div with aten::floordiv
+                                new_node = g->create(c10::aten::floordiv, div_args, 1);
+                                new_node->insertAfter(user);
+                                new_node->outputs()[0]->setType(c10::IntType::get());
+
+                                user->outputs()[0]->replaceAllUsesWith(new_node->outputs()[0]);
+                                user->destroy();
                               }
-
-                            default:
-                              new_node = g->create(user->kind(), user->inputs(), 1);
-                              new_node->insertAfter(user);
-                              new_node->outputs()[0]->setType(c10::IntType::get());
-                              user->outputs()[0]->replaceAllUsesWith(new_node->outputs()[0]);
-                              user->destroy();
-                              break;
+                            }
+                          } else {
+                            new_node = g->create(user->kind(), user->inputs(), 1);
+                            new_node->insertAfter(user);
+                            new_node->outputs()[0]->setType(c10::IntType::get());
+                            user->outputs()[0]->replaceAllUsesWith(new_node->outputs()[0]);
+                            user->destroy();
                           }
 
                           LOG_GRAPH("New intermediate operation: " << *new_node);
diff --git a/tests/cpp/test_compiled_modules.cpp b/tests/cpp/test_compiled_modules.cpp
@@ -62,9 +62,6 @@ INSTANTIATE_TEST_SUITE_P(
         PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
         PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
         PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
-        PathAndInput({"tests/modules/bert_base_uncased_traced.jit.pt", {{1, 14}, {1, 14}}, {at::kInt, at::kInt}})));
-// NOTE: ViT tests are disabled until Python 3.11 issue is resolved
-// https://github.com/huggingface/pytorch-image-models/issues/1946 PathAndInput({"tests/modules/vit_scripted.jit.pt",
-// {{1, 3, 224, 224}}, {at::kFloat}})));
+        PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
 
 #endif
diff --git a/tests/modules/hub.py b/tests/modules/hub.py
@@ -51,11 +51,10 @@
         "model": timm.create_model("efficientnet_b0", pretrained=True),
         "path": "script",
     },
-    # NOTE: Disabling ViT until support in 3.11 is fixed https://github.com/huggingface/pytorch-image-models/issues/1946
-    # "vit": {
-    #     "model": timm.create_model("vit_base_patch16_224", pretrained=True),
-    #     "path": "script",
-    # },
+    "vit": {
+        "model": timm.create_model("vit_base_patch16_224", pretrained=True),
+        "path": "script",
+    },
     "pooling": {"model": cm.Pool(), "path": "trace"},
     "module_fallback": {"model": cm.ModuleFallbackMain(), "path": "script"},
     "loop_fallback_eval": {"model": cm.LoopFallbackEval(), "path": "script"},
@@ -68,7 +67,7 @@
     "tuple_input_output": {"model": cm.TupleInputOutput(), "path": "script"},
     "list_input_output": {"model": cm.ListInputOutput(), "path": "script"},
     "list_input_tuple_output": {"model": cm.ListInputTupleOutput(), "path": "script"},
-    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
+    # "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
 }
 
 
diff --git a/tests/py/ts/models/custom_models.py b/tests/py/ts/models/custom_models.py
diff --git a/tests/py/ts/models/test_models.py b/tests/py/ts/models/test_models.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import Dict
 
-import custom_models as cm
 import timm
 import torch
 import torch_tensorrt as torchtrt
@@ -92,42 +91,6 @@ def test_efficientnet_b0(self):
             msg=f"EfficientNet-B0 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
         )
 
-    def test_bert_base_uncased(self):
-        self.model = cm.BertModule()
-        self.input = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda")
-
-        compile_spec = {
-            "inputs": [
-                torchtrt.Input(
-                    self.input.shape,
-                    dtype=self.input.dtype,
-                    format=torch.contiguous_format,
-                ),
-                torchtrt.Input(
-                    self.input.shape,
-                    dtype=self.input.dtype,
-                    format=torch.contiguous_format,
-                ),
-            ],
-            "device": {
-                "device_type": torchtrt.DeviceType.GPU,
-                "gpu_id": 0,
-            },
-            "enabled_precisions": {torch.float},
-            "truncate_long_and_double": True,
-        }
-        with torchtrt.logging.debug():
-            trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
-
-        model_outputs = self.model(self.input, self.input)
-        trt_model_outputs = trt_mod(self.input, self.input)
-        for out, trt_out in zip(model_outputs, trt_model_outputs):
-            cos_sim = cosine_similarity(out, trt_out)
-            self.assertTrue(
-                cos_sim > COSINE_THRESHOLD,
-                msg=f"HF BERT base-uncased TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-            )
-
     def test_resnet18_half(self):
         self.model = models.resnet18(pretrained=True).eval().to("cuda")
         self.input = torch.randn((1, 3, 224, 224)).to("cuda")
diff --git a/tests/py/ts/models/test_multiple_registered_engines.py b/tests/py/ts/models/test_multiple_registered_engines.py
@@ -2,7 +2,6 @@
 import unittest
 from typing import Dict
 
-import custom_models as cm
 import timm
 import torch
 import torch_tensorrt as torchtrt