Skip to content

Commit 8233c18

Browse files
committed
llama : arch
1 parent 1da4cb0 commit 8233c18

File tree

9 files changed

+113
-105
lines changed

9 files changed

+113
-105
lines changed

src/llama-arch.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,42 @@
11
#include "llama-arch.h"
2+
3+
#include "llama-impl.h"
4+
5+
LLM_KV::LLM_KV(llm_arch arch) : arch(arch) {}
6+
7+
std::string LLM_KV::operator()(llm_kv kv) const {
8+
return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
9+
}
10+
11+
std::string LLM_TN_IMPL::str() const {
12+
if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
13+
return "__missing__";
14+
}
15+
16+
std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
17+
18+
if (suffix != nullptr) {
19+
name += ".";
20+
name += suffix;
21+
}
22+
23+
return name;
24+
}
25+
26+
const char * llm_arch_name(llm_arch arch) {
27+
auto it = LLM_ARCH_NAMES.find(arch);
28+
if (it == LLM_ARCH_NAMES.end()) {
29+
return "unknown";
30+
}
31+
return it->second;
32+
}
33+
34+
llm_arch llm_arch_from_string(const std::string & name) {
35+
for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
36+
if (kv.second == name) {
37+
return kv.first;
38+
}
39+
}
40+
41+
return LLM_ARCH_UNKNOWN;
42+
}

src/llama-arch.h

Lines changed: 5 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#pragma once
22

3-
#include "llama-impl.h"
4-
53
#include <map>
64

75
//
@@ -375,13 +373,11 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
375373
};
376374

377375
struct LLM_KV {
378-
LLM_KV(llm_arch arch) : arch(arch) {}
376+
LLM_KV(llm_arch arch);
379377

380378
llm_arch arch;
381379

382-
std::string operator()(llm_kv kv) const {
383-
return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
384-
}
380+
std::string operator()(llm_kv kv) const;
385381
};
386382

387383
enum llm_tensor {
@@ -1589,16 +1585,6 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
15891585
{ "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
15901586
};
15911587

1592-
static llm_arch llm_arch_from_string(const std::string & name) {
1593-
for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
1594-
if (kv.second == name) {
1595-
return kv.first;
1596-
}
1597-
}
1598-
1599-
return LLM_ARCH_UNKNOWN;
1600-
}
1601-
16021588
// helper to handle gguf constants
16031589
// usage:
16041590
//
@@ -1615,20 +1601,7 @@ struct LLM_TN_IMPL {
16151601
const int bid;
16161602
const int xid;
16171603

1618-
std::string str() const {
1619-
if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
1620-
return "__missing__";
1621-
}
1622-
1623-
std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
1624-
1625-
if (suffix != nullptr) {
1626-
name += ".";
1627-
name += suffix;
1628-
}
1629-
1630-
return name;
1631-
}
1604+
std::string str() const;
16321605

16331606
operator std::string() const {
16341607
return str();
@@ -1657,58 +1630,6 @@ struct LLM_TN {
16571630
}
16581631
};
16591632

1660-
//
1661-
// load LLaMA models
1662-
//
1663-
1664-
static const char * llama_model_arch_name(llm_arch arch) {
1665-
auto it = LLM_ARCH_NAMES.find(arch);
1666-
if (it == LLM_ARCH_NAMES.end()) {
1667-
return "unknown";
1668-
}
1669-
return it->second;
1670-
}
1671-
1672-
static std::string llama_model_ftype_name(llama_ftype ftype) {
1673-
if (ftype & LLAMA_FTYPE_GUESSED) {
1674-
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
1675-
}
1676-
1677-
switch (ftype) {
1678-
case LLAMA_FTYPE_ALL_F32: return "all F32";
1679-
case LLAMA_FTYPE_MOSTLY_F16: return "F16";
1680-
case LLAMA_FTYPE_MOSTLY_BF16: return "BF16";
1681-
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
1682-
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
1683-
case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0";
1684-
case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1";
1685-
case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0";
1686-
case LLAMA_FTYPE_MOSTLY_Q2_K: return "Q2_K - Medium";
1687-
case LLAMA_FTYPE_MOSTLY_Q2_K_S: return "Q2_K - Small";
1688-
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small";
1689-
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "Q3_K - Medium";
1690-
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "Q3_K - Large";
1691-
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "Q4_K - Small";
1692-
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "Q4_K - Medium";
1693-
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "Q5_K - Small";
1694-
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
1695-
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
1696-
case LLAMA_FTYPE_MOSTLY_TQ1_0: return "TQ1_0 - 1.69 bpw ternary";
1697-
case LLAMA_FTYPE_MOSTLY_TQ2_0: return "TQ2_0 - 2.06 bpw ternary";
1698-
case LLAMA_FTYPE_MOSTLY_IQ2_XXS: return "IQ2_XXS - 2.0625 bpw";
1699-
case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
1700-
case LLAMA_FTYPE_MOSTLY_IQ2_S: return "IQ2_S - 2.5 bpw";
1701-
case LLAMA_FTYPE_MOSTLY_IQ2_M: return "IQ2_M - 2.7 bpw";
1702-
case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
1703-
case LLAMA_FTYPE_MOSTLY_IQ3_XXS: return "IQ3_XXS - 3.0625 bpw";
1704-
case LLAMA_FTYPE_MOSTLY_IQ1_S: return "IQ1_S - 1.5625 bpw";
1705-
case LLAMA_FTYPE_MOSTLY_IQ1_M: return "IQ1_M - 1.75 bpw";
1706-
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
1707-
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
1708-
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
1709-
case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";
1710-
1711-
default: return "unknown, may not work";
1712-
}
1713-
}
1633+
const char * llm_arch_name(llm_arch arch);
17141634

1635+
llm_arch llm_arch_from_string(const std::string & name);

src/llama-impl.h

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,8 @@ LLAMA_ATTRIBUTE_FORMAT(2, 3)
2424
void llama_log_internal (ggml_log_level level, const char * format, ...);
2525
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
2626

27-
// TODO: move to source
2827
LLAMA_ATTRIBUTE_FORMAT(1, 2)
29-
static std::string format(const char * fmt, ...) {
30-
va_list ap;
31-
va_list ap2;
32-
va_start(ap, fmt);
33-
va_copy(ap2, ap);
34-
int size = vsnprintf(NULL, 0, fmt, ap);
35-
GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
36-
std::vector<char> buf(size + 1);
37-
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
38-
GGML_ASSERT(size2 == size);
39-
va_end(ap2);
40-
va_end(ap);
41-
return std::string(buf.data(), size);
42-
}
28+
std::string format(const char * fmt, ...);
4329

4430
#define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
4531
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)

src/llama-mmap.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
#include "llama-mmap.h"
2+
3+

src/llama-mmap.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
#include "ggml.h"
66

7-
#include <cstdio>
8-
97
#ifdef __has_include
108
#if __has_include(<unistd.h>)
119
#include <unistd.h>

src/llama-model.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,44 @@
11
#include "llama-model.h"
2+
3+
std::string llama_model_ftype_name(llama_ftype ftype) {
4+
if (ftype & LLAMA_FTYPE_GUESSED) {
5+
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
6+
}
7+
8+
switch (ftype) {
9+
case LLAMA_FTYPE_ALL_F32: return "all F32";
10+
case LLAMA_FTYPE_MOSTLY_F16: return "F16";
11+
case LLAMA_FTYPE_MOSTLY_BF16: return "BF16";
12+
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
13+
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
14+
case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0";
15+
case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1";
16+
case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0";
17+
case LLAMA_FTYPE_MOSTLY_Q2_K: return "Q2_K - Medium";
18+
case LLAMA_FTYPE_MOSTLY_Q2_K_S: return "Q2_K - Small";
19+
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small";
20+
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "Q3_K - Medium";
21+
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "Q3_K - Large";
22+
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "Q4_K - Small";
23+
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "Q4_K - Medium";
24+
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "Q5_K - Small";
25+
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
26+
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
27+
case LLAMA_FTYPE_MOSTLY_TQ1_0: return "TQ1_0 - 1.69 bpw ternary";
28+
case LLAMA_FTYPE_MOSTLY_TQ2_0: return "TQ2_0 - 2.06 bpw ternary";
29+
case LLAMA_FTYPE_MOSTLY_IQ2_XXS: return "IQ2_XXS - 2.0625 bpw";
30+
case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
31+
case LLAMA_FTYPE_MOSTLY_IQ2_S: return "IQ2_S - 2.5 bpw";
32+
case LLAMA_FTYPE_MOSTLY_IQ2_M: return "IQ2_M - 2.7 bpw";
33+
case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
34+
case LLAMA_FTYPE_MOSTLY_IQ3_XXS: return "IQ3_XXS - 3.0625 bpw";
35+
case LLAMA_FTYPE_MOSTLY_IQ1_S: return "IQ1_S - 1.5625 bpw";
36+
case LLAMA_FTYPE_MOSTLY_IQ1_M: return "IQ1_M - 1.75 bpw";
37+
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
38+
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
39+
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
40+
case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";
41+
42+
default: return "unknown, may not work";
43+
}
44+
}

src/llama-model.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,3 +648,5 @@ static ggml_backend_buffer_type_t select_buft(const llama_model::buft_list_t & b
648648
throw std::runtime_error(format("no suitable buffer type found"));
649649
}
650650

651+
652+
std::string llama_model_ftype_name(llama_ftype ftype);

src/llama-vocab.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
22

3-
#include "llama-impl.h"
3+
#include "llama.h"
44

55
#include <string>
66
#include <vector>

src/llama.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,21 @@
5959
// helpers
6060
//
6161

62+
std::string format(const char * fmt, ...) {
63+
va_list ap;
64+
va_list ap2;
65+
va_start(ap, fmt);
66+
va_copy(ap2, ap);
67+
int size = vsnprintf(NULL, 0, fmt, ap);
68+
GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
69+
std::vector<char> buf(size + 1);
70+
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
71+
GGML_ASSERT(size2 == size);
72+
va_end(ap2);
73+
va_end(ap);
74+
return std::string(buf.data(), size);
75+
}
76+
6277
// trim whitespace from the beginning and end of a string
6378
static std::string trim(const std::string & str) {
6479
size_t start = 0;
@@ -16432,9 +16447,9 @@ int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int3
1643216447

1643316448
int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
1643416449
return snprintf(buf, buf_size, "%s %s %s",
16435-
llama_model_arch_name(model->arch),
16436-
llama_model_type_name(model->type),
16437-
llama_model_ftype_name(model->ftype).c_str());
16450+
llm_arch_name(model->arch), // TODO: llama_model_arch_name(model)
16451+
llama_model_type_name(model->type), // TODO: llama_model_type_name(model)
16452+
llama_model_ftype_name(model->ftype).c_str()); // TODO: llama_model_ftype_name(model)
1643816453
}
1643916454

1644016455
uint64_t llama_model_size(const struct llama_model * model) {

0 commit comments

Comments
 (0)