Skip to content

Commit ffb06a3

Browse files
SlyEchoFNsi
andauthored
OpenLLaMA 3B support (#1588)
This adds support to llama.cpp to load the model. Currently missing are changes that are required from convert.py to convert the model correctly. It needs some changes to start reading the JSON configuration for HF models instead of deriving the values by guessing. Co-authored-by: FNsi <[email protected]>
1 parent 7552ac5 commit ffb06a3

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

llama.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
// available llama models
4343
enum e_model {
4444
MODEL_UNKNOWN,
45+
MODEL_3B,
4546
MODEL_7B,
4647
MODEL_13B,
4748
MODEL_30B,
@@ -58,6 +59,7 @@ static const size_t MB = 1024*1024;
5859
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
5960
{
6061
static std::map<e_model, size_t> k_sizes = {
62+
{ MODEL_3B, 128ull * MB },
6163
{ MODEL_7B, 512ull * MB },
6264
{ MODEL_13B, 512ull * MB },
6365
{ MODEL_30B, 512ull * MB },
@@ -69,6 +71,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
6971
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
7072
{
7173
static std::map<e_model, size_t> k_sizes = {
74+
{ MODEL_3B, 128ull * MB },
7275
{ MODEL_7B, 512ull * MB },
7376
{ MODEL_13B, 512ull * MB },
7477
{ MODEL_30B, 512ull * MB },
@@ -81,6 +84,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
8184
static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
8285
{
8386
static std::map<e_model, size_t> k_sizes = {
87+
{ MODEL_3B, 682ull * MB },
8488
{ MODEL_7B, 1026ull * MB },
8589
{ MODEL_13B, 1608ull * MB },
8690
{ MODEL_30B, 3124ull * MB },
@@ -94,6 +98,7 @@ static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
9498
static const std::map<e_model, size_t> & MEM_REQ_EVAL()
9599
{
96100
static std::map<e_model, size_t> k_sizes = {
101+
{ MODEL_3B, 512ull * MB },
97102
{ MODEL_7B, 768ull * MB },
98103
{ MODEL_13B, 1024ull * MB },
99104
{ MODEL_30B, 1280ull * MB },
@@ -899,6 +904,7 @@ static const char *llama_ftype_name(enum llama_ftype ftype) {
899904

900905
static const char *llama_model_type_name(e_model type) {
901906
switch (type) {
907+
case MODEL_3B: return "3B";
902908
case MODEL_7B: return "7B";
903909
case MODEL_13B: return "13B";
904910
case MODEL_30B: return "30B";
@@ -932,6 +938,7 @@ static void llama_model_load_internal(
932938

933939
{
934940
switch (hparams.n_layer) {
941+
case 26: model.type = e_model::MODEL_3B; break;
935942
case 32: model.type = e_model::MODEL_7B; break;
936943
case 40: model.type = e_model::MODEL_13B; break;
937944
case 60: model.type = e_model::MODEL_30B; break;

0 commit comments

Comments
 (0)