Skip to content

Convert: Support DT_BF16 tensors #1309

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 4, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class QuantizedDataType:
{ftype: dtype for (dtype, ftype) in DATA_TYPE_TO_FTYPE.items()}

DATA_TYPE_TO_NUMPY: Dict[DataType, 'np.dtype[Any]'] = {
DT_BF16: np.dtype(np.uint16),
DT_F16: np.dtype(np.float16),
DT_F32: np.dtype(np.float32),
DT_I32: np.dtype(np.int32),
Expand Down Expand Up @@ -276,6 +277,12 @@ def permute(self, n_head: int) -> 'Tensor': ...
def to_ggml(self) -> 'GGMLCompatibleTensor': ...


def bf16_to_fp32(bf16_arr: np.ndarray) -> np.ndarray:
assert bf16_arr.dtype == np.uint16, f"Input array should be of dtype uint16, but got {bf16_arr.dtype}"
fp32_arr = bf16_arr.astype(np.uint32) << 16
return fp32_arr.view(np.float32)


class UnquantizedTensor(Tensor):
def __init__(self, ndarray: NDArray) -> None:
assert isinstance(ndarray, np.ndarray)
Expand All @@ -284,6 +291,8 @@ def __init__(self, ndarray: NDArray) -> None:

def astype(self, data_type: DataType) -> Tensor:
dtype = DATA_TYPE_TO_NUMPY[data_type]
if self.data_type == DT_BF16:
self.ndarray = bf16_to_fp32(self.ndarray)
return UnquantizedTensor(self.ndarray.astype(dtype))

def to_ggml(self) -> 'UnquantizedTensor':
Expand Down Expand Up @@ -686,6 +695,7 @@ def load(offset: int, elm_count: int) -> NDArray:
description = f'storage data_type={data_type} path-in-zip={filename} path={self.zip_file.filename}'
return LazyStorage(load=load, kind=pid[1], description=description)

@staticmethod
def lazy_rebuild_tensor_v2(storage: Any, storage_offset: Any, size: Any, stride: Any, # pyright: ignore[reportSelfClsParameterName]
requires_grad: Any, backward_hooks: Any, metadata: Any = None) -> LazyTensor:
assert isinstance(storage, LazyStorage)
Expand All @@ -696,12 +706,18 @@ def load() -> UnquantizedTensor:
description = f'pickled storage_offset={storage_offset} in {storage.description}'
return LazyTensor(load, list(size), storage.kind.data_type, description)

@staticmethod
def rebuild_from_type_v2(func, new_type, args, state):
return func(*args)

CLASSES: Dict[Any, Any] = {
('torch._tensor', '_rebuild_from_type_v2'): rebuild_from_type_v2,
('torch._utils', '_rebuild_tensor_v2'): lazy_rebuild_tensor_v2,
('torch', 'BFloat16Storage'): LazyStorageKind(DT_BF16),
('torch', 'HalfStorage'): LazyStorageKind(DT_F16),
('torch', 'FloatStorage'): LazyStorageKind(DT_F32),
('torch', 'IntStorage'): LazyStorageKind(DT_I32),
('torch', 'Tensor'): LazyTensor,
}

def find_class(self, module: str, name: str) -> Any:
Expand Down Expand Up @@ -961,7 +977,7 @@ def do_item(item: Tuple[str, LazyTensor]) -> NDArray:

def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFileType:
wq_type = model["layers.0.attention.wq.weight"].data_type
if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32):
if output_type_str == "f32" or (output_type_str is None and wq_type in (DT_F32, DT_BF16)):
return GGMLFileType.AllF32
if output_type_str == "f16" or (output_type_str is None and wq_type == DT_F16):
return GGMLFileType.MostlyF16
Expand Down