@@ -8708,6 +8708,73 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
8708
8708
}
8709
8709
}
8710
8710
8711
+ void llama_sample_entropy (struct llama_context * ctx, llama_token_data_array * candidates_p, float min_temp, float max_temp, float exponent_val) {
8712
+ const int64_t t_start_sample_us = ggml_time_us ();
8713
+
8714
+ // no need to do anything if there is only one (or zero) candidates
8715
+ if (candidates_p->size <= 1 ) {
8716
+ return ;
8717
+ }
8718
+
8719
+ // Calculate maximum possible entropy
8720
+ float max_entropy = -logf (1 .0f / candidates_p->size );
8721
+
8722
+ llama_sample_softmax (nullptr , candidates_p);
8723
+
8724
+ // Calculate entropy of the softmax probabilities
8725
+ float entropy = 0 .0f ;
8726
+ for (size_t i = 0 ; i < candidates_p->size ; ++i) {
8727
+ float prob = candidates_p->data [i].p ;
8728
+ if (prob > 0 .0f ) { // Ensure no log(0)
8729
+ entropy -= prob * logf (prob);
8730
+ }
8731
+ }
8732
+
8733
+ // Normalize the entropy (max_entropy cannot be 0 here because we checked candidates_p->size != 1 above)
8734
+ float normalized_entropy = entropy / max_entropy;
8735
+
8736
+ // Map the normalized entropy to the desired temperature range using the power function
8737
+ float dyn_temp = min_temp + (max_temp - min_temp) * powf (normalized_entropy, exponent_val);
8738
+
8739
+ #ifdef DEBUG
8740
+ LLAMA_LOG_INFO (" Your text maxtemp value is: %f\n " , max_temp);
8741
+ LLAMA_LOG_INFO (" Entropy: %f\n " , entropy);
8742
+ LLAMA_LOG_INFO (" Max Possible Entropy: %f\n " , max_entropy);
8743
+ LLAMA_LOG_INFO (" Normalized Entropy: %f\n " , normalized_entropy);
8744
+ LLAMA_LOG_INFO (" Exponent: %f\n " , exponent_val);
8745
+ LLAMA_LOG_INFO (" Dynamic Temperature (dyn_temp): %f\n " , dyn_temp);
8746
+ #endif
8747
+
8748
+ // Apply the dynamically calculated temperature scaling
8749
+ for (size_t i = 0 ; i < candidates_p->size ; ++i) {
8750
+ candidates_p->data [i].logit /= dyn_temp;
8751
+ }
8752
+
8753
+ // Re-compute softmax probabilities after scaling logits with dynamic temperature
8754
+ double max_l_double = candidates_p->data [0 ].logit ;
8755
+ double cum_sum_double = 0.0 ;
8756
+ for (size_t i = 0 ; i < candidates_p->size ; ++i) {
8757
+ double p = exp (candidates_p->data [i].logit - max_l_double);
8758
+ candidates_p->data [i].p = p; // Store the scaled probability
8759
+ cum_sum_double += p;
8760
+ }
8761
+ for (size_t i = 0 ; i < candidates_p->size ; ++i) {
8762
+ candidates_p->data [i].p /= cum_sum_double; // Re-normalize the probabilities
8763
+ }
8764
+
8765
+ #ifdef DEBUG
8766
+ // Print the updated top 25 probabilities after temperature scaling
8767
+ LLAMA_LOG_INFO (" \n Updated Top 25 Probabilities After Dynamic Temperature Scaling (in percentages):\n " );
8768
+ for (size_t i = 0 ; i < 25 && i < candidates_p->size ; ++i) {
8769
+ LLAMA_LOG_INFO (" Token %zu: %f%%\n " , i + 1 , candidates_p->data [i].p * 100 .0f );
8770
+ }
8771
+ #endif
8772
+
8773
+ if (ctx) {
8774
+ ctx->t_sample_us += ggml_time_us () - t_start_sample_us;
8775
+ }
8776
+ }
8777
+
8711
8778
void llama_sample_temp (struct llama_context * ctx, llama_token_data_array * candidates_p, float temp) {
8712
8779
const int64_t t_start_sample_us = ggml_time_us ();
8713
8780
0 commit comments