@@ -1062,7 +1062,7 @@ void get_example_targets(int example_id, struct ggml_tensor * tokens_input, stru
1062
1062
ggml_set_f32 (targets, -1 .0f );
1063
1063
ggml_set_i32_1d (tokens_input, 0 , 0 );
1064
1064
for (int i=1 ; i<n_tokens+1 ; ++i) {
1065
- float x = example_id + i * 3 .14159f * 2 .0f * 1 .0f / n_tokens;
1065
+ float x = example_id + i * 3 .14159f * 2 .0f * 1 .0f * 0 . 5f / n_tokens;
1066
1066
float y = sinf (x);// *cosf(x*1.1f+1.0f);
1067
1067
float z = (y+1 .0f )*0 .5f ; // scale to [0..1]
1068
1068
z += (frand ()-0 .5f )*(randomness/n_vocab);
@@ -1113,12 +1113,12 @@ int main(int argc, char ** argv) {
1113
1113
1114
1114
struct llama_model model;
1115
1115
model.hparams .n_vocab = 8 ;
1116
- model.hparams .n_ctx = 32 ;
1116
+ model.hparams .n_ctx = 8 ;
1117
1117
model.hparams .n_embd = 32 ;
1118
1118
model.hparams .n_mult = 2 ;
1119
1119
model.hparams .n_head = 8 ;
1120
- model.hparams .n_layer = 8 ;
1121
- model.hparams .n_rot = model.hparams .n_embd / model.hparams .n_head ;
1120
+ model.hparams .n_layer = 1 ;
1121
+ model.hparams .n_rot = MIN ( 16 , model.hparams .n_embd / model.hparams .n_head ) ;
1122
1122
1123
1123
// model.hparams.n_embd = 32;
1124
1124
// model.hparams.n_mult = 2;
@@ -1177,7 +1177,7 @@ int main(int argc, char ** argv) {
1177
1177
size_t compute_size = 1024ll *1024ll *1024ll ;
1178
1178
uint8_t * compute_addr = new uint8_t [compute_size];
1179
1179
1180
- int n_examples = 128 ;
1180
+ int n_examples = 256 ;
1181
1181
int n_tokens = model.hparams .n_ctx ;
1182
1182
int n_vocab = model.hparams .n_vocab ;
1183
1183
@@ -1285,7 +1285,7 @@ int main(int argc, char ** argv) {
1285
1285
1286
1286
{
1287
1287
int n_gen = 128 ;
1288
- int sample_ctx = n_tokens/ 2 -n_tokens/16 ;
1288
+ int sample_ctx = n_tokens-n_tokens/8 ;
1289
1289
1290
1290
printf (" Generating %d tokens.\n " , n_gen);
1291
1291
0 commit comments