Skip to content

Commit 41a2ed0

Browse files
committed
Ignore unusable json values
1 parent ca1fc20 commit 41a2ed0

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

convert.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -283,21 +283,22 @@ def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path], fn
283283
else:
284284
tokenizer_config = {}
285285
for key, value in tokenizer_config.items():
286-
assert isinstance(value, dict) or isinstance(value, str)
287-
if key not in TOKEN_NAME_TO_ID or TOKEN_NAME_TO_ID[key] == -1:
286+
if not isinstance(value, dict) or not isinstance(value, str):
288287
continue
289-
self.special_tokens_map[TOKEN_NAME_TO_ID[key]] = value["content"] if isinstance(value, dict) else value
288+
token_id = TOKEN_NAME_TO_ID.get(key, -1)
289+
if token_id == -1:
290+
continue
291+
self.special_tokens_map[token_id] = value["content"] if isinstance(value, dict) else value
290292

291293
special_tokens: Dict[str, Any]
292294
if fname_special_tokens is not None:
293295
special_tokens = json.load(open(fname_special_tokens))
294296
else:
295297
special_tokens = {}
296298
for key, value in special_tokens.items():
297-
assert isinstance(value, dict) or isinstance(value, str)
298-
if key not in TOKEN_NAME_TO_ID:
299+
if not isinstance(value, dict) or not isinstance(value, str):
299300
continue
300-
token_id = TOKEN_NAME_TO_ID[key]
301+
token_id = TOKEN_NAME_TO_ID.get(key, -1)
301302
if token_id == -1 or token_id in self.special_tokens_map:
302303
continue
303304
self.special_tokens_map[token_id] = value["content"] if isinstance(value, dict) else value

0 commit comments

Comments
 (0)