@@ -383,10 +383,8 @@ shared uvec2 iq2xxs_grid[256];
383
383
void init_iq_shmem()
384
384
{
385
385
// copy the table into shared memory and sync
386
- if (gl_LocalInvocationIndex.x < 32) {
387
- for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) {
388
- iq2xxs_grid[i] = iq2xxs_grid_const[i];
389
- }
386
+ for (uint i = gl_LocalInvocationIndex.x; i < iq2xxs_grid.length(); i += gl_WorkGroupSize.x) {
387
+ iq2xxs_grid[i] = iq2xxs_grid_const[i];
390
388
}
391
389
barrier();
392
390
}
@@ -552,10 +550,8 @@ shared uvec2 iq2xs_grid[512];
552
550
void init_iq_shmem()
553
551
{
554
552
// copy the table into shared memory and sync
555
- if (gl_LocalInvocationIndex.x < 32) {
556
- for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) {
557
- iq2xs_grid[i] = iq2xs_grid_const[i];
558
- }
553
+ for (uint i = gl_LocalInvocationIndex.x; i < iq2xs_grid.length(); i += gl_WorkGroupSize.x) {
554
+ iq2xs_grid[i] = iq2xs_grid_const[i];
559
555
}
560
556
barrier();
561
557
}
@@ -843,10 +839,8 @@ shared uvec2 iq2s_grid[1024];
843
839
void init_iq_shmem()
844
840
{
845
841
// copy the table into shared memory and sync
846
- if (gl_LocalInvocationIndex.x < 32) {
847
- for (uint i = gl_LocalInvocationIndex.x; i < 1024; i += 32) {
848
- iq2s_grid[i] = iq2s_grid_const[i];
849
- }
842
+ for (uint i = gl_LocalInvocationIndex.x; i < iq2s_grid.length(); i += gl_WorkGroupSize.x) {
843
+ iq2s_grid[i] = iq2s_grid_const[i];
850
844
}
851
845
barrier();
852
846
}
@@ -908,15 +902,13 @@ const uint32_t iq3xxs_grid_const[256] = {
908
902
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
909
903
};
910
904
911
- shared uint32_t iq3xxs_grid[512 ];
905
+ shared uint32_t iq3xxs_grid[256 ];
912
906
913
907
void init_iq_shmem()
914
908
{
915
909
// copy the table into shared memory and sync
916
- if (gl_LocalInvocationIndex.x < 32) {
917
- for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) {
918
- iq3xxs_grid[i] = iq3xxs_grid_const[i];
919
- }
910
+ for (uint i = gl_LocalInvocationIndex.x; i < iq3xxs_grid.length(); i += gl_WorkGroupSize.x) {
911
+ iq3xxs_grid[i] = iq3xxs_grid_const[i];
920
912
}
921
913
barrier();
922
914
}
@@ -1022,10 +1014,8 @@ shared uint32_t iq3s_grid[512];
1022
1014
void init_iq_shmem()
1023
1015
{
1024
1016
// copy the table into shared memory and sync
1025
- if (gl_LocalInvocationIndex.x < 32) {
1026
- for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) {
1027
- iq3s_grid[i] = iq3s_grid_const[i];
1028
- }
1017
+ for (uint i = gl_LocalInvocationIndex.x; i < iq3s_grid.length(); i += gl_WorkGroupSize.x) {
1018
+ iq3s_grid[i] = iq3s_grid_const[i];
1029
1019
}
1030
1020
barrier();
1031
1021
}
0 commit comments