Allow int8 type in quantized_linear and quantized_fully_connected (#5900)

mcremon-meta · facebook-github-bot · commit 1342c71d9f01 · 2024-10-08T15:08:28.000-07:00
Summary:

As titled.

Reviewed By: zonglinpeng

Differential Revision: D63659950
diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py
@@ -111,7 +111,7 @@ def quantized_linear_meta(
     weight_size = list(weight.size())
     assert len(weight_size) == 2
     out_size[-1] = weight_size[0]
-    return src.new_empty(out_size, dtype=torch.uint8)
+    return src.new_empty(out_size, dtype=src.dtype)
 
 
 @register_fake("cadence::quantized_conv")