@@ -299,7 +299,6 @@ struct vk_device_struct {
299
299
vk_pipeline pipeline_cpy_f32_f32, pipeline_cpy_f32_f16, pipeline_cpy_f16_f16;
300
300
vk_pipeline pipeline_contig_cpy_f32_f32, pipeline_contig_cpy_f32_f16, pipeline_contig_cpy_f16_f16;
301
301
vk_pipeline pipeline_cpy_f32_quant[GGML_TYPE_COUNT];
302
- vk_pipeline pipeline_cpy_f32_quant_rte[GGML_TYPE_COUNT];
303
302
vk_pipeline pipeline_cpy_quant_f32[GGML_TYPE_COUNT];
304
303
vk_pipeline pipeline_norm_f32;
305
304
vk_pipeline pipeline_group_norm_f32;
@@ -2283,12 +2282,12 @@ static void ggml_vk_load_shaders(vk_device& device) {
2283
2282
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f32_f16 , " contig_cpy_f32_f16" , contig_cpy_f32_f16_len, contig_cpy_f32_f16_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2284
2283
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f16_f16 , " contig_cpy_f16_f16" , contig_cpy_f16_f16_len, contig_cpy_f16_f16_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2285
2284
if (device->float_controls_rte_fp16 ) {
2286
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_len, cpy_f32_q4_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2287
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_rte_len, cpy_f32_q4_1_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2288
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_rte_len, cpy_f32_q5_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2289
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_rte_len, cpy_f32_q5_1_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2290
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_rte_len, cpy_f32_q8_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2291
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant_rte [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_rte_len, cpy_f32_iq4_nl_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2285
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_len, cpy_f32_q4_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2286
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_rte_len, cpy_f32_q4_1_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2287
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_rte_len, cpy_f32_q5_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2288
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_rte_len, cpy_f32_q5_1_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2289
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_rte_len, cpy_f32_q8_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2290
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_rte_len, cpy_f32_iq4_nl_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2292
2291
} else {
2293
2292
ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_len, cpy_f32_q4_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2294
2293
ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_len, cpy_f32_q4_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
@@ -4150,7 +4149,7 @@ static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const
4150
4149
case GGML_TYPE_Q5_1:
4151
4150
case GGML_TYPE_Q8_0:
4152
4151
case GGML_TYPE_IQ4_NL:
4153
- return ctx->device ->pipeline_cpy_f32_quant_rte [to];
4152
+ return ctx->device ->pipeline_cpy_f32_quant [to];
4154
4153
default :
4155
4154
break ;
4156
4155
}
0 commit comments