Skip to content

Commit 3290a9d

Browse files
committed
Update code to fix issues occuring due to non alignment of elements to be processed as multiple of 16 in MSVC
1 parent 9737b2e commit 3290a9d

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

ggml/src/ggml-aarch64.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2436,7 +2436,8 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
24362436
__m256i requiredOrder = _mm256_set_epi32(3 ,2 ,1 ,0, 7 ,6, 5, 4);
24372437

24382438
// Take group of four block_q8_0x4 structures at each pass of the loop and perform dot product operation
2439-
for (; y < nr / 4; y += 4) {
2439+
int anr = nr - nr %16; // Used to align nr with boundary of 16
2440+
for (; y < anr / 4; y += 4) {
24402441
const block_q8_0x4 * a_ptrs[4];
24412442

24422443
a_ptrs[0] = a_ptr_start + (y * nb);

0 commit comments

Comments
 (0)