Skip to content

Commit 064089b

Browse files
author
mike dupont
committed
update
1 parent 8ee192c commit 064089b

File tree

5 files changed

+20
-14
lines changed

5 files changed

+20
-14
lines changed

CMakeLists.txt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
2222
set(CUDACXX /usr/local/cuda-12.2/bin/nvcc)
2323
#GGML_USE_CUBLAS
2424

25+
#set(CMAKE_EXE_LINKER_FLAGS -pg)
26+
#set(CMAKE_SHARED_LINKER_FLAGS -pg)
27+
2528
set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE)
2629

2730
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
@@ -447,8 +450,8 @@ if (LLAMA_ALL_WARNINGS)
447450
# todo : msvc
448451
endif()
449452

450-
set(c_flags ${c_flags} -save-temps --verbose ${warning_flags})
451-
set(cxx_flags ${cxx_flags} -save-temps --verbose ${warning_flags})
453+
set(c_flags ${c_flags} -save-temps --verbose ${warning_flags})
454+
set(cxx_flags ${cxx_flags} -save-temps --verbose ${warning_flags})
452455
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
453456
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
454457
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
@@ -515,7 +518,7 @@ if (NOT MSVC)
515518
add_link_options("-Wl,-Map=${TARGET}.map")
516519

517520
if (LLAMA_GPROF)
518-
add_compile_options(-pg)
521+
add_compile_options(-pg)
519522
endif()
520523
endif()
521524

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ From the unzipped folder, open a terminal/cmd window here and place a pre-conver
592592

593593
### Memory/Disk Requirements
594594

595-
As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same.
595+
As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same.
596596

597597
| Model | Original size | Quantized size (4-bit) |
598598
|------:|--------------:|-----------------------:|
@@ -696,7 +696,7 @@ PROMPT_TEMPLATE=./prompts/chat-with-bob.txt PROMPT_CACHE_FILE=bob.prompt.bin \
696696
697697
The `grammars/` folder contains a handful of sample grammars. To write your own, check out the [GBNF Guide](./grammars/README.md).
698698
699-
For authoring more complex JSON grammars, you can also check out https://grammar.intrinsiclabs.ai/, a browser app that lets you write TypeScript interfaces which it compiles to GBNF grammars that you can save for local use. Note that the app is built and maintained by members of the community, please file any issues or FRs on [its repo](http://github.com/intrinsiclabsai/gbnfgen) and not this one.
699+
For authoring more complex JSON grammars, you can also check out https://grammar.intrinsiclabs.ai/, a browser app that lets ygou write TypeScript interfaces which it compiles to GBNF grammars that you can save for local use. Note that the app is built and maintained by members of the community, please file any issues or FRs on [its repo](http://github.com/intrinsiclabsai/gbnfgen) and not this one.
700700
701701
### Instruction mode with Alpaca
702702

examples/main/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -850,8 +850,8 @@ int main(int argc, char ** argv) {
850850
llama_print_timings(ctx);
851851
write_logfile(ctx, params, model, input_tokens, output_ss.str(), output_tokens);
852852

853-
// dump core
854-
int *ptr = 0; *ptr = 1;
853+
// dont dump core
854+
//int *ptr = 0; *ptr = 1;
855855

856856
if (ctx_guidance) { llama_free(ctx_guidance); }
857857
llama_free(ctx);

ggml-quants.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <string.h>
66
#include <assert.h>
77
#include <float.h>
8-
8+
#include <stdio.h>
99
#ifdef __ARM_NEON
1010

1111
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
@@ -2424,6 +2424,7 @@ static inline __m128i get_scale_shuffle(int i) {
24242424
#endif
24252425

24262426
void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy) {
2427+
//fprintf(stderr, "%s: n:%d s:%f vx:%p vy:%p\n", __func__, n,*s, vx, vy);
24272428
const int qk = QK8_0;
24282429
const int nb = n / qk;
24292430

ggml.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6365,7 +6365,7 @@ static void ggml_compute_forward_dup_f16(
63656365
GGML_ASSERT(false); // TODO: implement
63666366
}
63676367
} else {
6368-
//printf("%s: this is not optimal - fix me\n", __func__);
6368+
printf("%s: this is not optimal - fix me\n", __func__);
63696369

63706370
if (dst->type == GGML_TYPE_F32) {
63716371
size_t id = 0;
@@ -6612,7 +6612,7 @@ static void ggml_compute_forward_dup_f32(
66126612
GGML_ASSERT(false); // TODO: implement
66136613
}
66146614
} else {
6615-
//printf("%s: this is not optimal - fix me\n", __func__);
6615+
printf("%s: this is not optimal - fix me\n", __func__);
66166616

66176617
if (dst->type == GGML_TYPE_F32) {
66186618
size_t id = 0;
@@ -9390,6 +9390,7 @@ static void ggml_compute_forward_mul_mat(
93909390
const struct ggml_tensor * src0,
93919391
const struct ggml_tensor * src1,
93929392
struct ggml_tensor * dst) {
9393+
93939394
int64_t t0 = ggml_perf_time_us();
93949395
UNUSED(t0);
93959396

@@ -9427,7 +9428,8 @@ static void ggml_compute_forward_mul_mat(
94279428

94289429
// nb01 >= nb00 - src0 is not transposed
94299430
// compute by src0 rows
9430-
9431+
fprintf(stderr, "%s: params_type:%d src0:%p ->data %p src1:%p ->data %p\n", __func__, params->type, (void*)src0, src0->data, (void*)src1, src1->data);
9432+
94319433
#if defined(GGML_USE_CLBLAST)
94329434
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
94339435
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
@@ -9484,7 +9486,7 @@ static void ggml_compute_forward_mul_mat(
94849486
}
94859487
}
94869488

9487-
//printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
9489+
printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
94889490

94899491
return;
94909492
}
@@ -9518,7 +9520,7 @@ static void ggml_compute_forward_mul_mat(
95189520
const int64_t nr0 = ne01; // src0 rows
95199521
const int64_t nr1 = ne11*ne12*ne13; // src1 rows
95209522

9521-
//printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1);
9523+
printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1);
95229524

95239525
// distribute the thread work across the inner or outer loop based on which one is larger
95249526

@@ -9537,7 +9539,7 @@ static void ggml_compute_forward_mul_mat(
95379539
const int64_t ir110 = dr1*ith1;
95389540
const int64_t ir111 = MIN(ir110 + dr1, nr1);
95399541

9540-
//printf("ir010 = %6lld, ir011 = %6lld, ir110 = %6lld, ir111 = %6lld\n", ir010, ir011, ir110, ir111);
9542+
printf("ir010 = %6lld, ir011 = %6lld, ir110 = %6lld, ir111 = %6lld\n", ir010, ir011, ir110, ir111);
95419543

95429544
// threads with no work simply yield (not sure if it helps)
95439545
if (ir010 >= ir011 || ir110 >= ir111) {

0 commit comments

Comments
 (0)