@@ -1695,14 +1695,13 @@ static bool falcon_eval_internal(
1695
1695
1696
1696
struct ggml_tensor * K = ggml_permute (
1697
1697
ctx0,
1698
- ggml_reshape_3d (
1698
+ ggml_view_3d (
1699
1699
ctx0,
1700
- ggml_view_1d (ctx0, kv_self.k , (n_past + N) * n_head_kv * head_dim,
1701
- il * n_ctx *
1702
- ggml_element_size (kv_self.k ) *
1703
- n_head_kv *
1704
- head_dim),
1705
- head_dim, n_head_kv, n_past + N),
1700
+ kv_self.k ,
1701
+ head_dim, n_head_kv, n_past + N,
1702
+ head_dim * sizeof_wtype,
1703
+ head_dim * n_head_kv * sizeof_wtype,
1704
+ il * n_ctx * ggml_element_size (kv_self.k ) * n_head_kv * head_dim),
1706
1705
0 , 2 , 1 , 3 );
1707
1706
1708
1707
// K * Q
@@ -1741,14 +1740,13 @@ static bool falcon_eval_internal(
1741
1740
// V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous()
1742
1741
struct ggml_tensor * V = ggml_permute (
1743
1742
ctx0,
1744
- ggml_reshape_3d (
1743
+ ggml_view_3d (
1745
1744
ctx0,
1746
- ggml_view_1d (ctx0, kv_self.v , (n_past + N) * n_head_kv * head_dim,
1747
- il * n_ctx *
1748
- ggml_element_size (model.kv_self .v ) *
1749
- n_head_kv *
1750
- head_dim),
1751
- head_dim, n_head_kv, n_past + N),
1745
+ kv_self.v ,
1746
+ head_dim, n_head_kv, n_past + N,
1747
+ head_dim * sizeof_wtype,
1748
+ head_dim * n_head_kv * sizeof_wtype,
1749
+ il * n_ctx * ggml_element_size (kv_self.v ) * n_head_kv * head_dim),
1752
1750
0 , 2 , 1 , 3 );
1753
1751
1754
1752
if (0 )
0 commit comments