@@ -62,9 +62,6 @@ struct pca_model {
62
62
struct ggml_tensor * dev_square;
63
63
struct ggml_tensor * dev_eigenvector;
64
64
65
- // tensors to store output data on host
66
- struct ggml_tensor * host_eigenvector;
67
-
68
65
pca_model (struct ggml_tensor * t_input) {
69
66
#ifdef GGML_USE_CUDA
70
67
fprintf (stderr, " %s: using CUDA backend\n " , __func__);
@@ -129,17 +126,16 @@ struct pca_model {
129
126
}
130
127
131
128
// init host context
132
- struct ggml_init_params host_params = {
133
- /* .mem_size =*/ (n_embd * sizeof (float ) + ggml_tensor_overhead ()) * 2u ,
134
- /* .mem_buffer =*/ NULL ,
135
- /* .no_alloc =*/ false ,
136
- };
137
- ctx_host = ggml_init (host_params);
138
- host_eigenvector = ggml_new_tensor_1d (ctx_host, GGML_TYPE_F32, n_embd);
129
+ // struct ggml_init_params host_params = {
130
+ // /*.mem_size =*/ (n_embd * sizeof(float) + ggml_tensor_overhead()) * 2u,
131
+ // /*.mem_buffer =*/ NULL,
132
+ // /*.no_alloc =*/ false,
133
+ // };
134
+ // ctx_host = ggml_init(host_params);
135
+ // host_eigenvector = ggml_new_tensor_1d(ctx_host, GGML_TYPE_F32, n_embd);
139
136
}
140
137
141
138
~pca_model () {
142
- ggml_free (ctx_host);
143
139
ggml_free (ctx);
144
140
ggml_backend_buffer_free (buffer);
145
141
ggml_backend_free (backend);
@@ -299,6 +295,14 @@ static void power_iteration(
299
295
ggml_backend_tensor_set (model.dev_square , tmp_buf.data (), 0 , tmp_buf.size ());
300
296
}
301
297
298
+ {
299
+ // copy last eigen vector and store as input for next iteration
300
+ GGML_ASSERT (last_eigenvector != NULL );
301
+ std::vector<uint8_t > tmp_buf (ggml_nbytes (last_eigenvector));
302
+ ggml_backend_tensor_get (last_eigenvector, tmp_buf.data (), 0 , tmp_buf.size ());
303
+ ggml_backend_tensor_set (model.dev_eigenvector , tmp_buf.data (), 0 , tmp_buf.size ());
304
+ }
305
+
302
306
printf (" %s: layer %d/%d, iteration: %d / total: %d (batch = %d) ...\n " ,
303
307
__func__, params.i_layer +1 , params.n_layers , iter, n_iters, params.n_batch );
304
308
}
0 commit comments