We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 66dcd40 commit 1e2efa3Copy full SHA for 1e2efa3
backends/vulkan/_passes/int4_weight_only_quantizer.py
@@ -226,6 +226,12 @@ def _create_quantized_state_dict(
226
self.groupsize,
227
self.precision, # dtype for scales_and_zeros
228
)
229
+ # If the packing of 2 4-bit values into a single 8-bit value was not
230
+ # performed in the previous function call, then do it manually now.
231
+ if w_int4x8.shape == weight.shape:
232
+ w_int4x8 = (w_int4x8[::, ::2] << 4 | w_int4x8[::, 1::2]).to(
233
+ torch.uint8
234
+ )
235
# In the original implementation, w_int4x8 is packed via calling the
236
# _convert_weight_to_int4pack operator before storing the weight. However
237
# the Vulkan implementation does not expect the weights to be packed, so
0 commit comments