@@ -2285,13 +2285,21 @@ static void ggml_vk_load_shaders(vk_device& device) {
2285
2285
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f32_f32 , " contig_cpy_f32_f32" , contig_cpy_f32_f32_len, contig_cpy_f32_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2286
2286
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f32_f16 , " contig_cpy_f32_f16" , contig_cpy_f32_f16_len, contig_cpy_f32_f16_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2287
2287
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f16_f16 , " contig_cpy_f16_f16" , contig_cpy_f16_f16_len, contig_cpy_f16_f16_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2288
-
2289
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_len, cpy_f32_q4_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2290
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_len, cpy_f32_q4_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2291
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_len, cpy_f32_q5_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2292
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_len, cpy_f32_q5_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2293
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_len, cpy_f32_q8_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2294
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_len, cpy_f32_iq4_nl_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2288
+ if (device->float_controls_rte_fp16 ) {
2289
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_rte_len, cpy_f32_q4_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2290
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_rte_len, cpy_f32_q4_1_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2291
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_rte_len, cpy_f32_q5_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2292
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_rte_len, cpy_f32_q5_1_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2293
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_rte_len, cpy_f32_q8_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2294
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_rte_len, cpy_f32_iq4_nl_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2295
+ } else {
2296
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_len, cpy_f32_q4_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2297
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_len, cpy_f32_q4_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2298
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_len, cpy_f32_q5_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2299
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_len, cpy_f32_q5_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2300
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_len, cpy_f32_q8_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2301
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_len, cpy_f32_iq4_nl_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2302
+ }
2295
2303
2296
2304
ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q4_0], " cpy_q4_0_f32" , cpy_q4_0_f32_len, cpy_q4_0_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2297
2305
ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q4_1], " cpy_q4_1_f32" , cpy_q4_1_f32_len, cpy_q4_1_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
0 commit comments