@@ -883,7 +883,7 @@ bool clip_text_encode(const clip_ctx * ctx, int n_threads, const std::vector<cli
883
883
884
884
// layernorm1
885
885
{
886
- cur = ggml_norm (ctx0, cur);
886
+ cur = ggml_norm (ctx0, cur, 1e- 5f );
887
887
888
888
cur = ggml_add (ctx0, ggml_mul (ctx0, ggml_repeat (ctx0, model.layers [il].ln_1_w , cur), cur),
889
889
ggml_repeat (ctx0, model.layers [il].ln_1_b , cur));
@@ -933,7 +933,7 @@ bool clip_text_encode(const clip_ctx * ctx, int n_threads, const std::vector<cli
933
933
934
934
// layernorm2
935
935
{
936
- cur = ggml_norm (ctx0, cur);
936
+ cur = ggml_norm (ctx0, cur, 1e- 5f );
937
937
938
938
cur = ggml_add (ctx0, ggml_mul (ctx0, ggml_repeat (ctx0, model.layers [il].ln_2_w , cur), cur),
939
939
ggml_repeat (ctx0, model.layers [il].ln_2_b , cur));
@@ -959,7 +959,7 @@ bool clip_text_encode(const clip_ctx * ctx, int n_threads, const std::vector<cli
959
959
960
960
// final -layer_norm
961
961
{
962
- embeddings = ggml_norm (ctx0, embeddings);
962
+ embeddings = ggml_norm (ctx0, embeddings, 1e- 5f );
963
963
964
964
embeddings = ggml_add (ctx0, ggml_mul (ctx0, ggml_repeat (ctx0, model.post_ln_w , embeddings), embeddings),
965
965
ggml_repeat (ctx0, model.post_ln_b , embeddings));
@@ -1136,7 +1136,7 @@ bool clip_image_batch_encode(const clip_ctx * ctx, int n_threads, const std::vec
1136
1136
1137
1137
// pre-layernorm
1138
1138
{
1139
- embeddings = ggml_norm (ctx0, embeddings);
1139
+ embeddings = ggml_norm (ctx0, embeddings, 1e- 5f );
1140
1140
1141
1141
embeddings = ggml_add (ctx0, ggml_mul (ctx0, ggml_repeat (ctx0, model.pre_ln_w , embeddings), embeddings),
1142
1142
ggml_repeat (ctx0, model.pre_ln_b , embeddings));
@@ -1152,7 +1152,7 @@ bool clip_image_batch_encode(const clip_ctx * ctx, int n_threads, const std::vec
1152
1152
1153
1153
// layernorm1
1154
1154
{
1155
- cur = ggml_norm (ctx0, cur);
1155
+ cur = ggml_norm (ctx0, cur, 1e- 5f );
1156
1156
1157
1157
cur = ggml_add (ctx0, ggml_mul (ctx0, ggml_repeat (ctx0, model.layers [il].ln_1_w , cur), cur),
1158
1158
ggml_repeat (ctx0, model.layers [il].ln_1_b , cur));
@@ -1202,7 +1202,7 @@ bool clip_image_batch_encode(const clip_ctx * ctx, int n_threads, const std::vec
1202
1202
1203
1203
// layernorm2
1204
1204
{
1205
- cur = ggml_norm (ctx0, cur);
1205
+ cur = ggml_norm (ctx0, cur, 1e- 5f );
1206
1206
1207
1207
cur = ggml_add (ctx0, ggml_mul (ctx0, ggml_repeat (ctx0, model.layers [il].ln_2_w , cur), cur),
1208
1208
ggml_repeat (ctx0, model.layers [il].ln_2_b , cur));
@@ -1235,7 +1235,7 @@ bool clip_image_batch_encode(const clip_ctx * ctx, int n_threads, const std::vec
1235
1235
1236
1236
// post-layernorm
1237
1237
{
1238
- embeddings = ggml_norm (ctx0, embeddings);
1238
+ embeddings = ggml_norm (ctx0, embeddings, 1e- 4f );
1239
1239
1240
1240
embeddings = ggml_add (ctx0, ggml_mul (ctx0, ggml_repeat (ctx0, model.post_ln_w , embeddings), embeddings),
1241
1241
ggml_repeat (ctx0, model.post_ln_b , embeddings));
@@ -1260,6 +1260,7 @@ bool clip_image_batch_encode(const clip_ctx * ctx, int n_threads, const std::vec
1260
1260
// run the computation
1261
1261
ggml_build_forward_expand (&gf, output);
1262
1262
ggml_cplan cplan = ggml_graph_plan (&gf, n_threads);
1263
+ cplan.work_size *= batch_size;
1263
1264
if (cplan.work_size != 0 ) {
1264
1265
cplan.work_data = (uint8_t *)malloc (cplan.work_size );
1265
1266
}
@@ -1395,16 +1396,18 @@ bool softmax_with_sorting(float * arr, int length, float * sorted_scores, int *
1395
1396
}
1396
1397
1397
1398
// Calculate softmax probabilities
1399
+ /*
1398
1400
float max_val = arr[0];
1399
1401
for (int i = 1; i < length; i++) {
1400
1402
if (arr[i] > max_val) {
1401
1403
max_val = arr[i];
1402
1404
}
1403
1405
}
1406
+ */
1404
1407
1405
1408
float sum = 0.0 ;
1406
1409
for (int i = 0 ; i < length; i++) {
1407
- arr[i] = exp (arr[i] - max_val );
1410
+ arr[i] = exp (arr[i]);
1408
1411
sum += arr[i];
1409
1412
}
1410
1413
0 commit comments