Arm backend: Do not delegate casting to FP dtypes with BI profile (#10906)

YufengShi-dudu · web-flow · commit 4b67dc90ff50 · 2025-05-16T11:23:23.000+02:00
- Casting to floating-point dtypes should be rejected for delegation.
Class ToCopySupported should guarantee this. However, the shallow copy
used in func _merge_supported_types will modify the dict
SUPPORTED_INT_TYPES unintentionally, merging the dict
SUPPORTED_FLOAT_TYPES into SUPPORTED_INT_TYPES. Therefore, casting to
floating-point dtypes can also pass the check under BI profile.
- Fix it by using deepcopy.
- Add unittest in test_to_copy.py to check the castings to FP dtypes are
not delegated.

Signed-off-by: Yufeng Shi &lt;yufeng.shi@arm.com&gt;
diff --git a/backends/arm/operator_support/to_copy_support.py b/backends/arm/operator_support/to_copy_support.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
+import copy
 import logging
 
 import torch
@@ -42,7 +43,9 @@ def _merge_supported_types(
         dtypes1: SupportedTypeDict,
         dtypes2: SupportedTypeDict,
     ) -> SupportedTypeDict:
-        merged_dtypes = dtypes1
+        merged_dtypes = copy.deepcopy(
+            dtypes1
+        )  # Use deepcopy to avoid unintentionally modifying SUPPORTED_INT_TYPES
         for k, v in dtypes2.items():
             merged_dtypes[k] = merged_dtypes.get(k, []) + v
         return merged_dtypes
diff --git a/backends/arm/test/ops/test_to_copy.py b/backends/arm/test/ops/test_to_copy.py
@@ -12,7 +12,10 @@
 import torch
 
 from executorch.backends.arm.test import common
-from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineMI
+from executorch.backends.arm.test.tester.test_pipeline import (
+    OpNotSupportedPipeline,
+    TosaPipelineMI,
+)
 
 input_t1 = Tuple[torch.Tensor]  # Input x
 
@@ -31,11 +34,14 @@ def forward(self, x: torch.Tensor):
 
 Only test unquantized graphs as explicit casting of dtypes messes with the
 quantization.
+However, the model being exported may have some explicit casting to floating
+point dtypes. The casting or their decomposition should be rejected during
+partition. This test will be coveraged by class TestToCopy_BI.
 
 Note: This is also covered by test_scalars.py.
 """
 
-_TO_COPY_TEST_DATA = {
+_TO_COPY_TEST_DATA_MI = {
     "rand_fp16": lambda: (torch.rand((1, 2, 3, 4), dtype=torch.float16), torch.float32),
     "rand_fp32": lambda: (torch.rand((1, 2, 3, 4), dtype=torch.float32), torch.float16),
     "rand_int8": lambda: (
@@ -53,7 +59,7 @@ def forward(self, x: torch.Tensor):
 }
 
 
-@common.parametrize("test_data", _TO_COPY_TEST_DATA)
+@common.parametrize("test_data", _TO_COPY_TEST_DATA_MI)
 def test_copy_tosa_MI(test_data: Tuple):
     test_tensor, new_dtype = test_data()
 
@@ -64,3 +70,49 @@ def test_copy_tosa_MI(test_data: Tuple):
         exir_op=[],
     )
     pipeline.run()
+
+
+"""
+Casting operations that output floating-point dtypes should be rejected under BI profile,
+rather than introducing an invalid dtype into the tosa graph.
+For example, x.to(dtype=torch.float32) will be eventually lowered to
+exir_ops.edge.dim_order_ops._to_dim_order_copy.default. We should reject this operation
+in ToCopySupported::is_node_tosa_supported() before it goes into the delegated graph.
+"""
+_TO_COPY_TEST_DATA_BI = {
+    "rand_int8_fp32": lambda: (
+        torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int8),
+        torch.float32,
+    ),
+    "rand_int16_fp32": lambda: (
+        torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int16),
+        torch.float32,
+    ),
+    "rand_int32_fp32": lambda: (
+        torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int32),
+        torch.float32,
+    ),
+    "rand_int32_fp16": lambda: (
+        torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int32),
+        torch.float16,
+    ),
+    "rand_int32_bf16": lambda: (
+        torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int32),
+        torch.bfloat16,
+    ),
+}
+
+
+@common.parametrize("test_data", _TO_COPY_TEST_DATA_BI)
+def test_copy_tosa_BI(test_data: Tuple):
+    test_tensor, new_dtype = test_data()
+
+    pipeline = OpNotSupportedPipeline[input_t1](
+        Cast(new_dtype),
+        (test_tensor,),
+        {
+            "executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default": 1
+        },
+        quantize=True,
+    )
+    pipeline.run()