Skip to content

Commit eb57fee

Browse files
authored
gguf-py : Add tokenizer.ggml.pre to gguf-new-metadata.py (#7627)
1 parent 55d6226 commit eb57fee

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

gguf-py/scripts/gguf-new-metadata.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def main() -> None:
144144
parser.add_argument("--general-description", type=str, help="The models general.description", metavar='"Description ..."')
145145
parser.add_argument("--chat-template", type=str, help="Chat template string (or JSON string containing templates)", metavar='"{% ... %} ..."')
146146
parser.add_argument("--chat-template-config", type=Path, help="Config file containing chat template(s)", metavar='tokenizer_config.json')
147+
parser.add_argument("--pre-tokenizer", type=str, help="The models tokenizer.ggml.pre", metavar='"pre tokenizer"')
147148
parser.add_argument("--remove-metadata", action="append", type=str, help="Remove metadata (by key name) from output model", metavar='general.url')
148149
parser.add_argument("--special-token", action="append", type=str, help="Special token by value", nargs=2, metavar=(' | '.join(token_names.keys()), '"<token>"'))
149150
parser.add_argument("--special-token-by-id", action="append", type=str, help="Special token by id", nargs=2, metavar=(' | '.join(token_names.keys()), '0'))
@@ -172,6 +173,9 @@ def main() -> None:
172173
if template:
173174
new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template)
174175

176+
if args.pre_tokenizer:
177+
new_metadata[gguf.Keys.Tokenizer.PRE] = MetadataDetails(gguf.GGUFValueType.STRING, args.pre_tokenizer)
178+
175179
if remove_metadata:
176180
logger.warning('*** Warning *** Warning *** Warning **')
177181
logger.warning('* Most metadata is required for a fully functional GGUF file,')

0 commit comments

Comments
 (0)