Allow int8 type in quantized_linear and quantized_fully_connected (#5900)

mcremon-meta · facebook-github-bot · commit 0ce221e7bf7d · 2024-10-09T14:50:23.000-07:00
Summary: Pull Request resolved: #5900 As titled. Reviewed By: zonglinpeng Differential Revision: D63659950 fbshipit-source-id: 1c28a797ef2a02ba86a2d860bf6ad44194f38ca0
diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py
@@ -111,7 +111,7 @@ def quantized_linear_meta(
     weight_size = list(weight.size())
     assert len(weight_size) == 2
     out_size[-1] = weight_size[0]
-    return src.new_empty(out_size, dtype=torch.uint8)
+    return src.new_empty(out_size, dtype=src.dtype)
 
 
 @register_fake("cadence::quantized_conv")