Skip to content

Commit c51daef

Browse files
committed
llama : advanced batch splits
This includes equal-sequence-length batch splits which are useful to simplify recurrent model operators. * llama : always make recurrent state slots contiguous * ggml : simplify mamba operators
1 parent a38b884 commit c51daef

File tree

3 files changed

+1060
-647
lines changed

3 files changed

+1060
-647
lines changed

ggml/include/ggml.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,10 +1760,8 @@ extern "C" {
17601760

17611761
GGML_API struct ggml_tensor * ggml_ssm_conv(
17621762
struct ggml_context * ctx,
1763-
struct ggml_tensor * s,
1764-
struct ggml_tensor * x,
1765-
struct ggml_tensor * c,
1766-
struct ggml_tensor * sq);
1763+
struct ggml_tensor * sx,
1764+
struct ggml_tensor * c);
17671765

17681766
GGML_API struct ggml_tensor * ggml_ssm_scan(
17691767
struct ggml_context * ctx,
@@ -1772,8 +1770,7 @@ extern "C" {
17721770
struct ggml_tensor * dt,
17731771
struct ggml_tensor * A,
17741772
struct ggml_tensor * B,
1775-
struct ggml_tensor * C,
1776-
struct ggml_tensor * sq);
1773+
struct ggml_tensor * C);
17771774

17781775
// partition into non-overlapping windows with padding if needed
17791776
// example:

0 commit comments

Comments
 (0)