You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# These quantizers require float32 input weights. Note that after quantization,
122
122
# the weights will no longer be float32, but lowbit integers
123
123
ifget_precision() !=torch.float32:
124
124
print(f"Quantizer {quantizer} requires float32 inputs, but received {get_precision()}. Changing dtype to float32. Note that after quantization, the weights will be lowbit integers, not float32.")
125
125
set_precision(torch.float32)
126
-
127
-
# We set global precision from quantize options if it is specified at cli.py:485
126
+
127
+
ifquantizer=="linear:fpaxw"anddevice!="mps":
128
+
raiseRuntimeError("linear:fpaxw quantization can only run on mps device!")
129
+
130
+
# We set global precision from quantize options if it is specified at cli.py:485
128
131
# so the precision returned by get_precision() is always the authoritative precision/dtype in torchchat
0 commit comments