@@ -55,8 +55,8 @@ class TOKENIZER_TYPE(IntEnum):
55
55
56
56
# TODO: add models here, base models preferred
57
57
models = [
58
- {"name" : "llama-spm" , "tokt" : TOKENIZER_TYPE .SPM , "repo" : "https://huggingface.co/meta-llama/Llama-2-7b-hf" , },
59
- {"name" : "llama-bpe" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/meta-llama/Meta-Llama-3-8B" , },
58
+ # {"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
59
+ # {"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
60
60
{"name" : "phi-3" , "tokt" : TOKENIZER_TYPE .SPM , "repo" : "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct" , },
61
61
{"name" : "deepseek-llm" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base" , },
62
62
{"name" : "deepseek-coder" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base" , },
@@ -67,6 +67,8 @@ class TOKENIZER_TYPE(IntEnum):
67
67
{"name" : "gpt-2" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/openai-community/gpt2" , },
68
68
{"name" : "refact" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/smallcloudai/Refact-1_6-base" , },
69
69
{"name" : "command-r" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/CohereForAI/c4ai-command-r-v01" , },
70
+ {"name" : "jina-embeddings-v2-base-es" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/jinaai/jina-embeddings-v2-base-es" , },
71
+ {"name" : "jina-embeddings-v2-base-de" , "tokt" : TOKENIZER_TYPE .BPE , "repo" : "https://huggingface.co/jinaai/jina-embeddings-v2-base-de" , },
70
72
]
71
73
72
74
# make directory "models/tokenizers" if it doesn't exist
0 commit comments