now printing tensors

mike dupont · mike dupont · commit ec2b03e5049c · 2023-11-25T20:06:00.000-05:00
diff --git a/binding.py b/binding.py
@@ -14,9 +14,9 @@
 cxxClientRoot = "/home/mdupont/experiments/llama.cpp/"
 
 fileList = [
-#    "ggml.cpp",
+    "ggml.cpp",
 #    "llama.cpp",
-    "examples/server/server.cpp",
+#    "examples/server/server.cpp",
 ]
 
 typeList = [
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -30,7 +30,8 @@
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
-
+#include "ggml-internal.hpp"
+#include "llama-internal.hpp"
 #include "print.hpp"
 
 static llama_context           ** g_ctx;
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -24,6 +24,7 @@
 #include <thread>
 #include <mutex>
 #include <chrono>
+#include "llama-internal.hpp"
 #include "print.hpp"
 
 #ifndef SERVER_VERBOSE
diff --git a/ggml.cpp b/ggml.cpp
@@ -4,6 +4,7 @@
 #include "ggml-impl.h"
 #include "ggml-quants.h"
 
+
 #if defined(_MSC_VER) || defined(__MINGW32__)
 #include <malloc.h> // using malloc.h with MSC/MINGW
 #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
@@ -46,6 +47,12 @@ void type_traits_init();
 void GGUF_TYPE_SIZE_init();
 void GGUF_TYPE_NAME_init();
 
+#include "llama.h"
+struct ggml_allocr;
+//#include "ggml-internal.hpp"
+#include "llama-internal.hpp"
+#include "print.hpp"
+
 #if defined(_WIN32)
 
 #include <windows.h>
@@ -9412,7 +9419,10 @@ static void ggml_compute_forward_mul_mat(
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
               struct ggml_tensor * dst) {
-
+  print_fields(*params);
+  print_fields(*src0);
+  print_fields(*src1);
+  print_fields(*dst);
     int64_t t0 = ggml_perf_time_us();
     UNUSED(t0);
 
@@ -9456,17 +9466,20 @@ static void ggml_compute_forward_mul_mat(
         if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
             ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
         }
+	print_fields(*dst);
         return;
     }
 #endif
 
 #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
     if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
         if (params->ith != 0) {
+	  print_fields(*dst);
             return;
         }
 
         if (params->type == GGML_TASK_INIT) {
+	  
             return;
         }
 
@@ -9508,7 +9521,7 @@ static void ggml_compute_forward_mul_mat(
         }
 
         //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
-
+	print_fields(*dst);
         return;
     }
 #endif
@@ -9527,11 +9540,12 @@ static void ggml_compute_forward_mul_mat(
                 }
             }
         }
-
+	print_fields(*dst);
         return;
     }
 
     if (params->type == GGML_TASK_FINALIZE) {
+      print_fields(*dst);
         return;
     }
 
@@ -9565,6 +9579,7 @@ static void ggml_compute_forward_mul_mat(
     // threads with no work simply yield (not sure if it helps)
     if (ir010 >= ir011 || ir110 >= ir111) {
         sched_yield();
+	
         return;
     }
 
@@ -9617,6 +9632,7 @@ static void ggml_compute_forward_mul_mat(
             }
         }
     }
+    print_fields(*dst);
 }
 
 // ggml_compute_forward_out_prod
diff --git a/llama-internal.hpp b/llama-internal.hpp
@@ -1,5 +1,10 @@
 #include <set>
 #include <queue>
+#include <map>
+#include <random>
+#include <functional>
+#include <unordered_map>
+#include <memory>
 enum llm_arch {
     LLM_ARCH_LLAMA,
     LLM_ARCH_FALCON,
@@ -451,6 +456,7 @@ struct llama_model {
     }
 };
 
+struct ggml_allocr;
 struct llama_context {
     llama_context(const llama_model & model) : model(model), t_start_us(model.t_start_us), t_load_us(model.t_load_us) {}
   ~llama_context();
diff --git a/print.hpp b/print.hpp

Original file line number	Diff line number	Diff line change
`@@ -4,6 +4,7 @@`
`4`	`4`	`#include "ggml-impl.h"`
`5`	`5`	`#include "ggml-quants.h"`
`6`	`6`
	`7`	`+`
`7`	`8`	`#if defined(_MSC_VER) \|\| defined(__MINGW32__)`
`8`	`9`	`#include <malloc.h> // using malloc.h with MSC/MINGW`
`9`	`10`	`#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)`
`@@ -46,6 +47,12 @@ void type_traits_init();`
`46`	`47`	`void GGUF_TYPE_SIZE_init();`
`47`	`48`	`void GGUF_TYPE_NAME_init();`
`48`	`49`
	`50`	`+#include "llama.h"`
	`51`	`+struct ggml_allocr;`
	`52`	`+//#include "ggml-internal.hpp"`
	`53`	`+#include "llama-internal.hpp"`
	`54`	`+#include "print.hpp"`
	`55`	`+`
`49`	`56`	`#if defined(_WIN32)`
`50`	`57`
`51`	`58`	`#include <windows.h>`
`@@ -9412,7 +9419,10 @@ static void ggml_compute_forward_mul_mat(`
`9412`	`9419`	`const struct ggml_tensor * src0,`
`9413`	`9420`	`const struct ggml_tensor * src1,`
`9414`	`9421`	`struct ggml_tensor * dst) {`
`9415`		`-`
	`9422`	`+ print_fields(*params);`
	`9423`	`+ print_fields(*src0);`
	`9424`	`+ print_fields(*src1);`
	`9425`	`+ print_fields(*dst);`
`9416`	`9426`	`int64_t t0 = ggml_perf_time_us();`
`9417`	`9427`	`UNUSED(t0);`
`9418`	`9428`
`@@ -9456,17 +9466,20 @@ static void ggml_compute_forward_mul_mat(`
`9456`	`9466`	`if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {`
`9457`	`9467`	`ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);`
`9458`	`9468`	`}`
	`9469`	`+ print_fields(*dst);`
`9459`	`9470`	`return;`
`9460`	`9471`	`}`
`9461`	`9472`	`#endif`
`9462`	`9473`
`9463`	`9474`	`#if defined(GGML_USE_ACCELERATE) \|\| defined(GGML_USE_OPENBLAS)`
`9464`	`9475`	`if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {`
`9465`	`9476`	`if (params->ith != 0) {`
	`9477`	`+ print_fields(*dst);`
`9466`	`9478`	`return;`
`9467`	`9479`	`}`
`9468`	`9480`
`9469`	`9481`	`if (params->type == GGML_TASK_INIT) {`
	`9482`	`+`
`9470`	`9483`	`return;`
`9471`	`9484`	`}`
`9472`	`9485`
`@@ -9508,7 +9521,7 @@ static void ggml_compute_forward_mul_mat(`
`9508`	`9521`	`}`
`9509`	`9522`
`9510`	`9523`	`//printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);`
`9511`		`-`
	`9524`	`+ print_fields(*dst);`
`9512`	`9525`	`return;`
`9513`	`9526`	`}`
`9514`	`9527`	`#endif`
`@@ -9527,11 +9540,12 @@ static void ggml_compute_forward_mul_mat(`
`9527`	`9540`	`}`
`9528`	`9541`	`}`
`9529`	`9542`	`}`
`9530`		`-`
	`9543`	`+ print_fields(*dst);`
`9531`	`9544`	`return;`
`9532`	`9545`	`}`
`9533`	`9546`
`9534`	`9547`	`if (params->type == GGML_TASK_FINALIZE) {`
	`9548`	`+ print_fields(*dst);`
`9535`	`9549`	`return;`
`9536`	`9550`	`}`
`9537`	`9551`
`@@ -9565,6 +9579,7 @@ static void ggml_compute_forward_mul_mat(`
`9565`	`9579`	`// threads with no work simply yield (not sure if it helps)`
`9566`	`9580`	`if (ir010 >= ir011 \|\| ir110 >= ir111) {`
`9567`	`9581`	`sched_yield();`
	`9582`	`+`
`9568`	`9583`	`return;`
`9569`	`9584`	`}`
`9570`	`9585`
`@@ -9617,6 +9632,7 @@ static void ggml_compute_forward_mul_mat(`
`9617`	`9632`	`}`
`9618`	`9633`	`}`
`9619`	`9634`	`}`
	`9635`	`+ print_fields(*dst);`
`9620`	`9636`	`}`
`9621`	`9637`
`9622`	`9638`	`// ggml_compute_forward_out_prod`