|
1 | 1 | # llama.cpp for CANN
|
2 | 2 |
|
3 |
| -- [Background](#background) |
4 |
| -- [News](#news) |
5 |
| -- [OS](#os) |
6 |
| -- [Hardware](#hardware) |
7 |
| -- [Model Supports](#model-supports) |
8 |
| -- [Datatype Supports](#datatype-supports) |
9 |
| -- [Linux](#linux) |
10 |
| -- [TODO](#todo) |
| 3 | + - [Background](#background) |
| 4 | + - [News](#news) |
| 5 | + - [OS](#os) |
| 6 | + - [Hardware](#hardware) |
| 7 | + - [Model Supports](#model-supports) |
| 8 | + - [DataType Supports](#datatype-supports) |
| 9 | + - [Docker](#docker) |
| 10 | + - [Linux](#linux) |
| 11 | + - [TODO](#todo) |
| 12 | + |
| 13 | + |
11 | 14 |
|
12 | 15 | ## Background
|
13 | 16 |
|
@@ -50,24 +53,61 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi
|
50 | 53 |
|
51 | 54 | ## Model Supports
|
52 | 55 |
|
53 |
| -| Model Name | Status | |
54 |
| -|:-----------------------------:|:-------:| |
55 |
| -| Baichuan | Support | |
56 |
| -| Baichuan 2 | Support | |
57 |
| -| Bloom | Support | |
58 |
| -| Falcon 2 | Support | |
59 |
| -| Gpt 2 | Support | |
60 |
| -| InternLM 2 | Support | |
61 |
| -| Llama 2 | Support | |
62 |
| -| Llama 3 | Support | |
63 |
| -| Mamba | Support | |
64 |
| -| Mistral 7B | Support | |
65 |
| -| OLMo | Support | |
66 |
| -| Phi 3 | Support | |
67 |
| -| Qwen 2 | Support | |
68 |
| -| Refact | Support | |
69 |
| -| Starcoder | Support | |
70 |
| -| Yi | Support | |
| 56 | +| Model Name | FP16 | Q8_0 | Q4_0 | |
| 57 | +|:----------------------------|:-----:|:----:|:----:| |
| 58 | +| AquilaChat2-7B | √ | √ | √ | |
| 59 | +| Baichuan-7b | √ | √ | √ | |
| 60 | +| Baichuan2-7B-Chat | √ | √ | √ | |
| 61 | +| bitnet_b1_58-large | √ | √ | √ | |
| 62 | +| bloom-560m | √ | x | √ | |
| 63 | +| bloomz-alpaca-560m | √ | x | √ | |
| 64 | +| c4ai-command-r-35B-v01 | x | x | x | |
| 65 | +| chatglm3-6B | x | x | x | |
| 66 | +| chinese-alpaca-2-1.3b | √ | √ | √ | |
| 67 | +| CodeShell-7B | √ | √ | √ | |
| 68 | +| deepseek-ai_deepseek-coder-1.3B-base | x | x | x | |
| 69 | +| deepseek-ai_DeepSeek-V2-Lite | x | x | x | |
| 70 | +| deepseek-coder-6.7B-instruct | x | x | x | |
| 71 | +| DeepSeek-V2-Lite-64x1.5B | x | x | x | |
| 72 | +| falcon-7b-instruct | √ | √ | √ | |
| 73 | +| flan-t5-large | √ | √ | √ | |
| 74 | +| gemma-2-9b-it | √ | √ | √ | |
| 75 | +| glm-4-9B | x | x | x | |
| 76 | +| gpt2 | √ | √ | √ | |
| 77 | +| Gpt2-163M | √ | √ | √ | |
| 78 | +| granite-3B-code-instruct | √ | √ | √ | |
| 79 | +| GritLM-7B | √ | √ | √ | |
| 80 | +| internlm2_5-7b-chat | √ | √ | √ | |
| 81 | +| koala-7B-HF | √ | √ | √ | |
| 82 | +| Llama-2-7b-chat-hf | √ | √ | √ | |
| 83 | +| Llama-3-Smaug-8B | √ | √ | √ | |
| 84 | +| Llama2-Chinese-7b-Chat | √ | √ | √ | |
| 85 | +| Llama3-8B | √ | √ | √ | |
| 86 | +| Llama3-8b-chinese | √ | √ | √ | |
| 87 | +| mamba-130m-hf | √ | √ | √ | |
| 88 | +| Mistral-7B-Instruct-v0.2 | √ | √ | √ | |
| 89 | +| Mixtral-8x7B-Instruct-v0.1 | x | √ | √ | |
| 90 | +| mpt-7B | √ | √ | √ | |
| 91 | +| OLMo-1B-hf | √ | √ | √ | |
| 92 | +| OpenELM-3B-Instruct | √ | √ | √ | |
| 93 | +| Orion-14b-base | √ | √ | √ | |
| 94 | +| phi1 | x | x | x | |
| 95 | +| phi2 | x | x | x | |
| 96 | +| Phi-3-mini-4k-instruct | √ | √ | √ | |
| 97 | +| plamo-13b | √ | √ | √ | |
| 98 | +| pythia-70M | x | x | x | |
| 99 | +| Qwen-7B | √ | √ | √ | |
| 100 | +| Qwen2-1.5B-Instruct | √ | x | √ | |
| 101 | +| Refact-1_6B-fim | √ | √ | √ | |
| 102 | +| SmolLM-135M | √ | √ | √ | |
| 103 | +| stablelm-zephyr | x | x | x | |
| 104 | +| stablelm-2-zephyr-1_6b | x | x | x | |
| 105 | +| starcoderbase-1b | √ | √ | √ | |
| 106 | +| starcoder2-3b | √ | √ | √ | |
| 107 | +| vigogne-7b-chat | √ | √ | √ | |
| 108 | +| xverse-7b-chat | √ | √ | √ | |
| 109 | +| Yi-6b-Chat | √ | √ | √ | |
| 110 | + |
71 | 111 |
|
72 | 112 |
|
73 | 113 | ## DataType Supports
|
|
0 commit comments