@@ -164,38 +164,23 @@ def count_model_parts(dir_model: str) -> int:
164
164
gguf_writer .add_token_scores (scores )
165
165
gguf_writer .add_token_types (toktypes )
166
166
167
- if "added_tokens" in tokenizer_json and Path (dir_model + "/tokenizer_config.json" ).is_file ():
168
- print ("gguf: get special token ids" )
169
-
170
- with open (dir_model + "/tokenizer_config.json" , "r" , encoding = "utf-8" ) as f :
171
- tokenizer_config = json .load (f )
172
-
173
- # find special token ids
174
-
175
- if "bos_token" in tokenizer_config :
176
- for key in tokenizer_json ["added_tokens" ]:
177
- if key ["content" ] == tokenizer_config ["bos_token" ]:
178
- gguf_writer .add_bos_token_id (key ["id" ])
179
-
180
- if "eos_token" in tokenizer_config :
181
- for key in tokenizer_json ["added_tokens" ]:
182
- if key ["content" ] == tokenizer_config ["eos_token" ]:
183
- gguf_writer .add_eos_token_id (key ["id" ])
184
-
185
- if "unk_token" in tokenizer_config :
186
- for key in tokenizer_json ["added_tokens" ]:
187
- if key ["content" ] == tokenizer_config ["unk_token" ]:
188
- gguf_writer .add_unk_token_id (key ["id" ])
189
-
190
- if "sep_token" in tokenizer_config :
191
- for key in tokenizer_json ["added_tokens" ]:
192
- if key ["content" ] == tokenizer_config ["sep_token" ]:
193
- gguf_writer .add_sep_token_id (key ["id" ])
194
-
195
- if "pad_token" in tokenizer_config :
196
- for key in tokenizer_json ["added_tokens" ]:
197
- if key ["content" ] == tokenizer_config ["pad_token" ]:
198
- gguf_writer .add_pad_token_id (key ["id" ])
167
+ print ("gguf: get special token ids" )
168
+ # Look for special tokens in config.json
169
+
170
+ if "bos_token_id" in hparams and hparams ["bos_token_id" ] != None :
171
+ gguf_writer .add_bos_token_id (hparams ["bos_token_id" ])
172
+
173
+ if "eos_token_id" in hparams and hparams ["eos_token_id" ] != None :
174
+ gguf_writer .add_eos_token_id (hparams ["eos_token_id" ])
175
+
176
+ if "unk_token_id" in hparams and hparams ["unk_token_id" ] != None :
177
+ gguf_writer .add_unk_token_id (hparams ["unk_token_id" ])
178
+
179
+ if "sep_token_id" in hparams and hparams ["sep_token_id" ] != None :
180
+ gguf_writer .add_sep_token_id (hparams ["sep_token_id" ])
181
+
182
+ if "pad_token_id" in hparams and hparams ["pad_token_id" ] != None :
183
+ gguf_writer .add_pad_token_id (hparams ["pad_token_id" ])
199
184
200
185
201
186
# TENSORS
0 commit comments