Skip to content

Commit ed0c11c

Browse files
committed
multimodal support enabled by default
1 parent 6c277ea commit ed0c11c

File tree

1 file changed

+3
-41
lines changed

1 file changed

+3
-41
lines changed

examples/server/server.cpp

Lines changed: 3 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,8 @@
33
#include "build-info.h"
44
#include "grammar-parser.h"
55

6-
//#define SERVER_MULTIMODAL_SUPPORT
7-
8-
#ifdef SERVER_MULTIMODAL_SUPPORT
96
#include "../llava/clip.h"
107
#include "stb_image.h"
11-
#endif
128

139
#ifndef NDEBUG
1410
// crash the server in debug mode, otherwise send an http 500 error
@@ -63,7 +59,6 @@ static bool server_verbose = false;
6359
#define LOG_WARNING(MSG, ...) server_log("WARNING", __func__, __LINE__, MSG, __VA_ARGS__)
6460
#define LOG_INFO(MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
6561

66-
#ifdef SERVER_MULTIMODAL_SUPPORT
6762
static const std::string base64_chars =
6863
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
6964
"abcdefghijklmnopqrstuvwxyz"
@@ -112,7 +107,6 @@ std::vector<uint8_t> base64_decode(std::string const& encoded_string) {
112107

113108
return ret;
114109
}
115-
#endif
116110

117111
// parallel
118112
enum slot_state
@@ -267,7 +261,6 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
267261
return out;
268262
}
269263

270-
#ifdef SERVER_MULTIMODAL_SUPPORT
271264
struct slot_image {
272265
clip_image_u8 img_data;
273266
bool request_encode_image = false;
@@ -276,7 +269,6 @@ struct slot_image {
276269
int id;
277270
std::string prefix_prompt = ""; // before of this image
278271
};
279-
#endif
280272

281273
struct llama_client_slot
282274
{
@@ -322,9 +314,8 @@ struct llama_client_slot
322314
grammar_parser::parse_state parsed_grammar;
323315
llama_grammar *grammar = nullptr;
324316

325-
#ifdef SERVER_MULTIMODAL_SUPPORT
317+
// multimodal
326318
std::vector<slot_image> images;
327-
#endif
328319

329320
void reset() {
330321
num_prompt_tokens = 0;
@@ -347,15 +338,12 @@ struct llama_client_slot
347338
ctx_sampling.grammar = NULL;
348339
}
349340

350-
#ifdef SERVER_MULTIMODAL_SUPPORT
351341
for(slot_image img : images) {
352342
free(img.image_embedding);
353343
delete[] img.img_data.data;
354344
img.prefix_prompt = "";
355345
}
356346
images.clear();
357-
#endif
358-
359347
// llama_set_rng_seed(ctx, params.seed); in batched the seed matter???????
360348
}
361349

@@ -452,11 +440,9 @@ struct llama_server_context
452440
std::string user_name = ""; // this should be the anti prompt
453441
std::string assistant_name = ""; // this is for generate the prompt
454442

455-
#ifdef SERVER_MULTIMODAL_SUPPORT
456443
bool multimodal = false;
457444
clip_ctx *clp_ctx = nullptr;
458445
int n_embd;
459-
#endif
460446

461447
llama_model *model = nullptr;
462448
llama_context *ctx = nullptr;
@@ -490,7 +476,6 @@ struct llama_server_context
490476
bool loadModel(const gpt_params &params_)
491477
{
492478
params = params_;
493-
#ifdef SERVER_MULTIMODAL_SUPPORT
494479
if(!params.mmproj.empty()) {
495480
multimodal = true;
496481
LOG_TEE("Multi Modal Mode Enabled");
@@ -504,15 +489,13 @@ struct llama_server_context
504489
params.n_ctx = 2048;
505490
}
506491
}
507-
#endif
508492
std::tie(model, ctx) = llama_init_from_gpt_params(params);
509493
if (model == nullptr)
510494
{
511495
LOG_ERROR("unable to load model", {{"model", params.model}});
512496
return false;
513497
}
514498

515-
#ifdef SERVER_MULTIMODAL_SUPPORT
516499
if(multimodal) {
517500
int n_img_embd = clip_n_mmproj_embd(clp_ctx);
518501
n_embd = llama_n_embd(model);
@@ -523,7 +506,6 @@ struct llama_server_context
523506
return false;
524507
}
525508
}
526-
#endif
527509
n_ctx = llama_n_ctx(ctx);
528510
n_vocab = llama_n_vocab(model);
529511
candidates.reserve(n_vocab);
@@ -829,7 +811,6 @@ struct llama_server_context
829811
return slot.has_next_token; // continue
830812
}
831813

832-
#ifdef SERVER_MULTIMODAL_SUPPORT
833814
bool processImages(llama_client_slot &slot) {
834815
for(slot_image &img : slot.images) {
835816
if(!img.request_encode_image) {
@@ -914,7 +895,6 @@ struct llama_server_context
914895
}
915896
return true;
916897
}
917-
#endif
918898

919899
bool updateSlots() {
920900
// update the system prompt wait until all slots are idle state
@@ -1088,7 +1068,6 @@ struct llama_server_context
10881068
{"to_eval", tokens_to_str(ctx, slot.cache_tokens.cbegin() + slot.n_past, slot.cache_tokens.cend())},
10891069
});
10901070

1091-
#ifdef SERVER_MULTIMODAL_SUPPORT
10921071
bool ingest_images = processImages(slot); // has images?
10931072

10941073
// process the prefix of first image
@@ -1105,15 +1084,7 @@ struct llama_server_context
11051084
LOG_TEE("failed processing images\n");
11061085
return false;
11071086
}
1108-
#else
1109-
for (; slot.n_past < prompt_tokens.size(); ++slot.n_past) {
1110-
batch.token [batch.n_tokens] = prompt_tokens[slot.n_past];
1111-
batch.pos [batch.n_tokens] = slot.n_past + num_tokens_system;
1112-
batch.seq_id[batch.n_tokens] = slot.id;
1113-
batch.logits[batch.n_tokens] = false;
1114-
batch.n_tokens += 1;
1115-
}
1116-
#endif
1087+
11171088
// extract the logits only for the last token
11181089
if (batch.n_tokens > 0) {
11191090
batch.logits[batch.n_tokens - 1] = true;
@@ -1277,9 +1248,7 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
12771248
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
12781249
printf(" -spf FNAME, --system-prompt-file FNAME\n");
12791250
printf(" Set a file to load a system prompt (initial prompt of all slots), this is useful for chat applications.\n");
1280-
#ifdef SERVER_MULTIMODAL_SUPPORT
12811251
printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA.\n");
1282-
#endif
12831252
printf("\n");
12841253
}
12851254

@@ -1570,7 +1539,6 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
15701539
);
15711540
llama.processSystemPromptData(json::parse(systm_content));
15721541
}
1573-
#ifdef SERVER_MULTIMODAL_SUPPORT
15741542
else if(arg == "--mmproj") {
15751543
if (++i >= argc)
15761544
{
@@ -1579,7 +1547,6 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
15791547
}
15801548
params.mmproj = argv[i];
15811549
}
1582-
#endif
15831550
else
15841551
{
15851552
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
@@ -1697,11 +1664,7 @@ static json format_partial_response(
16971664
{"content", content },
16981665
{"stop", false},
16991666
{ "slot_id", slot->id },
1700-
#ifdef SERVER_MULTIMODAL_SUPPORT
17011667
{"multimodal", llama.multimodal }
1702-
#else
1703-
{"multimodal", false }
1704-
#endif
17051668
};
17061669

17071670
if (slot->sparams.n_probs > 0)
@@ -1810,8 +1773,8 @@ static void parse_options_completion(const json &body, llama_client_slot* slot,
18101773
}
18111774
}
18121775
}
1776+
18131777
LOG_VERBOSE("completion parameters parsed", format_generation_settings(llama, slot));
1814-
#ifdef SERVER_MULTIMODAL_SUPPORT
18151778
if(!llama.multimodal) {
18161779
return;
18171780
}
@@ -1882,7 +1845,6 @@ static void parse_options_completion(const json &body, llama_client_slot* slot,
18821845
slot->params.cache_prompt = false; // multimodal doesn't support cache prompt
18831846
}
18841847
}
1885-
#endif
18861848
}
18871849

18881850
static void parse_options_infill(const json &body, llama_server_context &llama, llama_client_slot *slot)

0 commit comments

Comments
 (0)