Skip to content

Commit 58b515c

Browse files
committed
convert-hf : add --outtype auto-f16
A reason for this to exist is for model quantizers who want an initial GGUF with the most fidelity to the original model while still using a 16-bit float type instead of 32-bit floats.
1 parent 95930da commit 58b515c

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

convert-hf-to-gguf.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,15 @@ def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian:
8383
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
8484
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
8585
self.tensor_names = None
86+
if self.ftype == gguf.LlamaFileType.GUESSED:
87+
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
88+
_, first_tensor = next(self.get_tensors())
89+
if first_tensor.dtype == torch.float16:
90+
logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
91+
self.ftype = gguf.LlamaFileType.MOSTLY_F16
92+
else:
93+
logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})")
94+
self.ftype = gguf.LlamaFileType.MOSTLY_BF16
8695

8796
@classmethod
8897
def __init_subclass__(cls):
@@ -2394,8 +2403,8 @@ def parse_args() -> argparse.Namespace:
23942403
help="path to write to; default: based on input",
23952404
)
23962405
parser.add_argument(
2397-
"--outtype", type=str, choices=["f32", "f16", "bf16"], default="f16",
2398-
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16",
2406+
"--outtype", type=str, choices=["f32", "f16", "bf16", "auto-f16"], default="f16",
2407+
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, auto-f16 for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
23992408
)
24002409
parser.add_argument(
24012410
"--bigendian", action="store_true",
@@ -2453,6 +2462,7 @@ def main() -> None:
24532462
"f32": gguf.LlamaFileType.ALL_F32,
24542463
"f16": gguf.LlamaFileType.MOSTLY_F16,
24552464
"bf16": gguf.LlamaFileType.MOSTLY_BF16,
2465+
"auto-f16": gguf.LlamaFileType.GUESSED, # TODO: use a more appropriate "auto" type
24562466
}
24572467

24582468
if args.outfile is not None:

gguf-py/gguf/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,8 @@ class LlamaFileType(IntEnum):
861861
MOSTLY_IQ1_M = 31 # except 1d tensors
862862
MOSTLY_BF16 = 32 # except 1d tensors
863863

864+
GUESSED = 1024 # not specified in the model file
865+
864866

865867
class GGUFEndian(IntEnum):
866868
LITTLE = 0

0 commit comments

Comments
 (0)