-
Notifications
You must be signed in to change notification settings - Fork 12.1k
Unit test for quantization functions #953
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
// Unit tests for quantization specific functions - quantize, dequantize and dot product | ||
|
||
#include "ggml.h" | ||
|
||
#undef NDEBUG | ||
#include <assert.h> | ||
#include <math.h> | ||
#include <stdio.h> | ||
#include <string> | ||
#include <vector> | ||
|
||
|
||
const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001; | ||
const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002; | ||
const float MAX_DOT_PRODUCT_ERROR = 0.02; | ||
|
||
const char* RESULT_STR[] = {"ok", "FAILED"}; | ||
|
||
|
||
// Generate synthetic data | ||
void generate_data(float offset, size_t n, float * dst) { | ||
for (size_t i = 0; i < n; i++) { | ||
dst[i] = 0.1 + 2*cosf(i + offset); | ||
} | ||
} | ||
|
||
// Calculate RMSE between two float arrays | ||
float array_rmse(const float * a1, const float * a2, size_t n) { | ||
double sum = 0; | ||
for (size_t i = 0; i < n; i++) { | ||
double diff = a1[i] - a2[i]; | ||
sum += diff * diff; | ||
} | ||
return sqrtf(sum) / n; | ||
} | ||
|
||
// Total quantization error on test data | ||
float total_quantization_error(quantize_fns_t & qfns, size_t test_size, const float * test_data) { | ||
std::vector<uint8_t> tmp_q(test_size); | ||
std::vector<float> tmp_out(test_size); | ||
|
||
qfns.quantize_row_q(test_data, tmp_q.data(), test_size); | ||
qfns.dequantize_row_q(tmp_q.data(), tmp_out.data(), test_size); | ||
return array_rmse(test_data, tmp_out.data(), test_size); | ||
} | ||
|
||
// Total quantization error on test data | ||
float reference_quantization_error(quantize_fns_t & qfns, size_t test_size, const float * test_data) { | ||
std::vector<uint8_t> tmp_q(test_size); | ||
std::vector<float> tmp_out(test_size); | ||
std::vector<float> tmp_out_ref(test_size); | ||
|
||
qfns.quantize_row_q(test_data, tmp_q.data(), test_size); | ||
qfns.dequantize_row_q(tmp_q.data(), tmp_out.data(), test_size); | ||
|
||
qfns.quantize_row_q_reference(test_data, tmp_q.data(), test_size); | ||
qfns.dequantize_row_q(tmp_q.data(), tmp_out_ref.data(), test_size); | ||
|
||
return array_rmse(tmp_out.data(), tmp_out_ref.data(), test_size); | ||
} | ||
|
||
float dot_product(const float * a1, const float * a2, size_t test_size) { | ||
double sum = 0; | ||
for (size_t i = 0; i < test_size; i++) { | ||
sum += a1[i] * a2[i]; | ||
} | ||
return sum; | ||
} | ||
|
||
// Total dot product error | ||
float dot_product_error(quantize_fns_t & qfns, size_t test_size, const float * test_data1, const float *test_data2) { | ||
std::vector<uint8_t> tmp_q1(test_size); | ||
std::vector<uint8_t> tmp_q2(test_size*2); | ||
|
||
qfns.quantize_row_q(test_data1, tmp_q1.data(), test_size); | ||
qfns.quantize_row_q_dot(test_data2, tmp_q2.data(), test_size); | ||
|
||
float result = INFINITY; | ||
qfns.vec_dot_q(test_size, &result, tmp_q1.data(), tmp_q2.data()); | ||
|
||
const float dot_ref = dot_product(test_data1, test_data2, test_size); | ||
|
||
return fabsf(result - dot_ref) / test_size; | ||
} | ||
|
||
int main(int argc, char * argv[]) { | ||
bool verbose = false; | ||
const size_t test_size = 32 * 128; | ||
|
||
std::string arg; | ||
for (int i = 1; i < argc; i++) { | ||
arg = argv[i]; | ||
|
||
if (arg == "-v") { | ||
verbose = true; | ||
} else { | ||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); | ||
return 1; | ||
} | ||
} | ||
|
||
std::vector<float> test_data(test_size); | ||
std::vector<float> test_data2(test_size); | ||
|
||
generate_data(0.0, test_data.size(), test_data.data()); | ||
generate_data(1.0, test_data2.size(), test_data2.data()); | ||
|
||
// Initialize GGML, ensures float conversion tables are initialized | ||
struct ggml_init_params ggml_params = { | ||
/* .mem_size = */ 1*1024, | ||
/* .mem_buffer = */ NULL, | ||
/* .no_alloc = */ true, | ||
}; | ||
struct ggml_context * ctx = ggml_init(ggml_params); | ||
|
||
int num_failed = 0; | ||
bool failed = false; | ||
|
||
for (int i = 0; i < GGML_TYPE_COUNT; i++) { | ||
ggml_type type = (ggml_type) i; | ||
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i); | ||
|
||
if (qfns.quantize_row_q) { | ||
const float total_error = total_quantization_error(qfns, test_size, test_data.data()); | ||
failed = !(total_error < MAX_QUANTIZATION_TOTAL_ERROR); | ||
num_failed += failed; | ||
if (failed || verbose) { | ||
printf("%5s absolute quantization error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], total_error); | ||
} | ||
|
||
const float reference_error = reference_quantization_error(qfns, test_size, test_data.data()); | ||
failed = !(reference_error < MAX_QUANTIZATION_REFERENCE_ERROR); | ||
num_failed += failed; | ||
if (failed || verbose) { | ||
printf("%5s reference implementation error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], reference_error); | ||
} | ||
|
||
const float vec_dot_error = dot_product_error(qfns, test_size, test_data.data(), test_data2.data()); | ||
failed = !(vec_dot_error < MAX_DOT_PRODUCT_ERROR); | ||
num_failed += failed; | ||
if (failed || verbose) { | ||
printf("%5s dot product error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], vec_dot_error); | ||
} | ||
} | ||
} | ||
|
||
if (num_failed || verbose) { | ||
printf("%d tests failed\n", num_failed); | ||
} | ||
|
||
ggml_free(ctx); | ||
|
||
return num_failed > 0; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this (or the maximum errors) needs improvement.
I tried varying this slightly, and with
-0.2 + 2*cosf(i + offset)
, q4_0 dot product fails.We should try to create data that matches the distribution in the actual model, maybe using
std::normal_distribution
. @prusnak made some histograms of the models: #397 (comment)Since Q4_0 and Q4_1 effectively differ in how they handle a bias in the data (
0.1
in your case), we might want to test separately with and without bias.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can try to match the distribution better but I somewhat disagree with the reasoning here - it doesn't matter if the data matches the model, as long as the test fails when an implementation is broken.
If anything it might be good to add some "unusual" patterns like all zeroes, all negative/positive etc.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, maybe it's better to have deterministic test data. So it's just a matter of the thresholds being set too tight?
Edit: I can't seem to reproduce the problem right now. So I guess the maximum errors are okay as they are.