@@ -1673,31 +1673,31 @@ static void ggml_vk_load_shaders(vk_device& device) {
1673
1673
CREATE_MM2 (pipeline_matmul_f16_f32, matmul_f16_f32, wg_denoms, warptile, vk_mat_mat_push_constants, 3 , );
1674
1674
1675
1675
if (device->coopmat_acc_f16_support ) {
1676
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc , matmul_q4_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1677
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1].f16acc , matmul_q4_1_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1678
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0].f16acc , matmul_q5_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1679
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1].f16acc , matmul_q5_1_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1680
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0].f16acc , matmul_q8_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1681
-
1682
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K].f16acc , matmul_q2_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1683
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K].f16acc , matmul_q3_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1684
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K].f16acc , matmul_q4_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1685
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K].f16acc , matmul_q5_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1686
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K].f16acc , matmul_q6_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1687
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc , matmul_iq4_nl_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1676
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc , matmul_q4_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1677
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1].f16acc , matmul_q4_1_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1678
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0].f16acc , matmul_q5_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1679
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1].f16acc , matmul_q5_1_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1680
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0].f16acc , matmul_q8_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1681
+
1682
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K].f16acc , matmul_q2_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1683
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K].f16acc , matmul_q3_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1684
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K].f16acc , matmul_q4_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1685
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K].f16acc , matmul_q5_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1686
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K].f16acc , matmul_q6_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1687
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc , matmul_iq4_nl_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1688
1688
} else {
1689
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc , matmul_q4_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1690
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1].f16acc , matmul_q4_1_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1691
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0].f16acc , matmul_q5_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1692
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1].f16acc , matmul_q5_1_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1693
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0].f16acc , matmul_q8_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1689
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc , matmul_q4_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1690
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1].f16acc , matmul_q4_1_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1691
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0].f16acc , matmul_q5_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1692
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1].f16acc , matmul_q5_1_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1693
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0].f16acc , matmul_q8_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1694
1694
1695
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K].f16acc , matmul_q2_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1696
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K].f16acc , matmul_q3_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1697
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K].f16acc , matmul_q4_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1698
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K].f16acc , matmul_q5_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1699
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K].f16acc , matmul_q6_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1700
- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc , matmul_iq4_nl_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1695
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K].f16acc , matmul_q2_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1696
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K].f16acc , matmul_q3_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1697
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K].f16acc , matmul_q4_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1698
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K].f16acc , matmul_q5_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1699
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K].f16acc , matmul_q6_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1700
+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc , matmul_iq4_nl_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1701
1701
}
1702
1702
1703
1703
// If there's not enough shared memory for row_ids and the result tile, don't create these pipelines.
@@ -1707,31 +1707,31 @@ static void ggml_vk_load_shaders(vk_device& device) {
1707
1707
CREATE_MM2 (pipeline_matmul_id_f16_f32, matmul_id_f16_f32, wg_denoms, warptile, vk_mat_mat_push_constants, 4 , _id);
1708
1708
1709
1709
if (device->coopmat_acc_f16_support ) {
1710
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc , matmul_id_q4_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1711
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1].f16acc , matmul_id_q4_1_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1712
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0].f16acc , matmul_id_q5_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1713
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1].f16acc , matmul_id_q5_1_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1714
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0].f16acc , matmul_id_q8_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1715
-
1716
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K].f16acc , matmul_id_q2_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1717
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K].f16acc , matmul_id_q3_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1718
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K].f16acc , matmul_id_q4_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1719
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K].f16acc , matmul_id_q5_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1720
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K].f16acc , matmul_id_q6_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1721
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc , matmul_id_iq4_nl_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1710
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc , matmul_id_q4_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1711
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1].f16acc , matmul_id_q4_1_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1712
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0].f16acc , matmul_id_q5_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1713
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1].f16acc , matmul_id_q5_1_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1714
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0].f16acc , matmul_id_q8_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1715
+
1716
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K].f16acc , matmul_id_q2_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1717
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K].f16acc , matmul_id_q3_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1718
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K].f16acc , matmul_id_q4_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1719
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K].f16acc , matmul_id_q5_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1720
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K].f16acc , matmul_id_q6_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1721
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc , matmul_id_iq4_nl_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1722
1722
} else {
1723
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc , matmul_id_q4_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1724
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1].f16acc , matmul_id_q4_1_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1725
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0].f16acc , matmul_id_q5_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1726
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1].f16acc , matmul_id_q5_1_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1727
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0].f16acc , matmul_id_q8_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1728
-
1729
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K].f16acc , matmul_id_q2_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1730
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K].f16acc , matmul_id_q3_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1731
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K].f16acc , matmul_id_q4_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1732
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K].f16acc , matmul_id_q5_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1733
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K].f16acc , matmul_id_q6_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1734
- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc , matmul_id_iq4_nl_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1723
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc , matmul_id_q4_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1724
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1].f16acc , matmul_id_q4_1_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1725
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0].f16acc , matmul_id_q5_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1726
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1].f16acc , matmul_id_q5_1_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1727
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0].f16acc , matmul_id_q8_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1728
+
1729
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K].f16acc , matmul_id_q2_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1730
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K].f16acc , matmul_id_q3_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1731
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K].f16acc , matmul_id_q4_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1732
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K].f16acc , matmul_id_q5_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1733
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K].f16acc , matmul_id_q6_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1734
+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc , matmul_id_iq4_nl_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1735
1735
}
1736
1736
}
1737
1737
#undef CREATE_MM2
0 commit comments