Skip to content

Commit 6ed3047

Browse files
Fix array length mismatches
1 parent c5fd9c5 commit 6ed3047

File tree

1 file changed

+11
-21
lines changed

1 file changed

+11
-21
lines changed

ggml/src/ggml-vulkan/vulkan-shaders/types.comp

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -383,10 +383,8 @@ shared uvec2 iq2xxs_grid[256];
383383
void init_iq_shmem()
384384
{
385385
// copy the table into shared memory and sync
386-
if (gl_LocalInvocationIndex.x < 32) {
387-
for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) {
388-
iq2xxs_grid[i] = iq2xxs_grid_const[i];
389-
}
386+
for (uint i = gl_LocalInvocationIndex.x; i < iq2xxs_grid.length(); i += gl_WorkGroupSize.x) {
387+
iq2xxs_grid[i] = iq2xxs_grid_const[i];
390388
}
391389
barrier();
392390
}
@@ -552,10 +550,8 @@ shared uvec2 iq2xs_grid[512];
552550
void init_iq_shmem()
553551
{
554552
// copy the table into shared memory and sync
555-
if (gl_LocalInvocationIndex.x < 32) {
556-
for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) {
557-
iq2xs_grid[i] = iq2xs_grid_const[i];
558-
}
553+
for (uint i = gl_LocalInvocationIndex.x; i < iq2xs_grid.length(); i += gl_WorkGroupSize.x) {
554+
iq2xs_grid[i] = iq2xs_grid_const[i];
559555
}
560556
barrier();
561557
}
@@ -843,10 +839,8 @@ shared uvec2 iq2s_grid[1024];
843839
void init_iq_shmem()
844840
{
845841
// copy the table into shared memory and sync
846-
if (gl_LocalInvocationIndex.x < 32) {
847-
for (uint i = gl_LocalInvocationIndex.x; i < 1024; i += 32) {
848-
iq2s_grid[i] = iq2s_grid_const[i];
849-
}
842+
for (uint i = gl_LocalInvocationIndex.x; i < iq2s_grid.length(); i += gl_WorkGroupSize.x) {
843+
iq2s_grid[i] = iq2s_grid_const[i];
850844
}
851845
barrier();
852846
}
@@ -908,15 +902,13 @@ const uint32_t iq3xxs_grid_const[256] = {
908902
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
909903
};
910904

911-
shared uint32_t iq3xxs_grid[512];
905+
shared uint32_t iq3xxs_grid[256];
912906

913907
void init_iq_shmem()
914908
{
915909
// copy the table into shared memory and sync
916-
if (gl_LocalInvocationIndex.x < 32) {
917-
for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) {
918-
iq3xxs_grid[i] = iq3xxs_grid_const[i];
919-
}
910+
for (uint i = gl_LocalInvocationIndex.x; i < iq3xxs_grid.length(); i += gl_WorkGroupSize.x) {
911+
iq3xxs_grid[i] = iq3xxs_grid_const[i];
920912
}
921913
barrier();
922914
}
@@ -1022,10 +1014,8 @@ shared uint32_t iq3s_grid[512];
10221014
void init_iq_shmem()
10231015
{
10241016
// copy the table into shared memory and sync
1025-
if (gl_LocalInvocationIndex.x < 32) {
1026-
for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) {
1027-
iq3s_grid[i] = iq3s_grid_const[i];
1028-
}
1017+
for (uint i = gl_LocalInvocationIndex.x; i < iq3s_grid.length(); i += gl_WorkGroupSize.x) {
1018+
iq3s_grid[i] = iq3s_grid_const[i];
10291019
}
10301020
barrier();
10311021
}

0 commit comments

Comments
 (0)