Skip to content

Commit 231cff5

Browse files
committed
sync : ggml
1 parent 3246fe8 commit 231cff5

21 files changed

+1420
-176
lines changed

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ extern "C" {
6363
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
6464
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
6565

66+
// "offset" refers to the offset of the tensor data for setting/getting data
6667
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
6768
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
6869

ggml/include/ggml.h

Lines changed: 85 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@
220220
#include <stdio.h>
221221

222222
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
223-
#define GGML_FILE_VERSION 1
223+
#define GGML_FILE_VERSION 2
224224

225225
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
226226
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
@@ -453,6 +453,8 @@ extern "C" {
453453
GGML_OP_SQR,
454454
GGML_OP_SQRT,
455455
GGML_OP_LOG,
456+
GGML_OP_SIN,
457+
GGML_OP_COS,
456458
GGML_OP_SUM,
457459
GGML_OP_SUM_ROWS,
458460
GGML_OP_MEAN,
@@ -490,9 +492,11 @@ extern "C" {
490492
GGML_OP_CLAMP,
491493
GGML_OP_CONV_TRANSPOSE_1D,
492494
GGML_OP_IM2COL,
495+
GGML_OP_IM2COL_BACK,
493496
GGML_OP_CONV_TRANSPOSE_2D,
494497
GGML_OP_POOL_1D,
495498
GGML_OP_POOL_2D,
499+
GGML_OP_POOL_2D_BACK,
496500
GGML_OP_UPSCALE, // nearest interpolate
497501
GGML_OP_PAD,
498502
GGML_OP_ARANGE,
@@ -969,6 +973,22 @@ extern "C" {
969973
struct ggml_context * ctx,
970974
struct ggml_tensor * a);
971975

976+
GGML_API struct ggml_tensor * ggml_sin(
977+
struct ggml_context * ctx,
978+
struct ggml_tensor * a);
979+
980+
GGML_API struct ggml_tensor * ggml_sin_inplace(
981+
struct ggml_context * ctx,
982+
struct ggml_tensor * a);
983+
984+
GGML_API struct ggml_tensor * ggml_cos(
985+
struct ggml_context * ctx,
986+
struct ggml_tensor * a);
987+
988+
GGML_API struct ggml_tensor * ggml_cos_inplace(
989+
struct ggml_context * ctx,
990+
struct ggml_tensor * a);
991+
972992
// return scalar
973993
GGML_API struct ggml_tensor * ggml_sum(
974994
struct ggml_context * ctx,
@@ -1566,34 +1586,49 @@ extern "C" {
15661586
float min,
15671587
float max);
15681588

1589+
// im2col
1590+
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
15691591
GGML_API struct ggml_tensor * ggml_im2col(
15701592
struct ggml_context * ctx,
1571-
struct ggml_tensor * a,
1572-
struct ggml_tensor * b,
1573-
int s0,
1574-
int s1,
1575-
int p0,
1576-
int p1,
1577-
int d0,
1578-
int d1,
1579-
bool is_2D,
1580-
enum ggml_type dst_type);
1593+
struct ggml_tensor * a, // convolution kernel
1594+
struct ggml_tensor * b, // data
1595+
int s0, // stride dimension 0
1596+
int s1, // stride dimension 1
1597+
int p0, // padding dimension 0
1598+
int p1, // padding dimension 1
1599+
int d0, // dilation dimension 0
1600+
int d1, // dilation dimension 1
1601+
bool is_2D,
1602+
enum ggml_type dst_type);
1603+
1604+
GGML_API struct ggml_tensor * ggml_im2col_back(
1605+
struct ggml_context * ctx,
1606+
struct ggml_tensor * a, // convolution kernel
1607+
struct ggml_tensor * b, // gradient of im2col output
1608+
int64_t * ne, // shape of im2col input
1609+
int s0, // stride dimension 0
1610+
int s1, // stride dimension 1
1611+
int p0, // padding dimension 0
1612+
int p1, // padding dimension 1
1613+
int d0, // dilation dimension 0
1614+
int d1, // dilation dimension 1
1615+
bool is_2D);
15811616

15821617
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
15831618
struct ggml_context * ctx,
1584-
struct ggml_tensor * a,
1585-
struct ggml_tensor * b,
1586-
int s0,
1587-
int s1,
1588-
int p0,
1589-
int p1,
1590-
int d0,
1591-
int d1);
1619+
struct ggml_tensor * a, // convolution kernel
1620+
struct ggml_tensor * b, // data
1621+
int s0, // stride dimension 0
1622+
int s1, // stride dimension 1
1623+
int p0, // padding dimension 0
1624+
int p1, // padding dimension 1
1625+
int d0, // dilation dimension 0
1626+
int d1); // dilation dimension 1
15921627

15931628
GGML_API struct ggml_tensor * ggml_conv_1d(
15941629
struct ggml_context * ctx,
1595-
struct ggml_tensor * a,
1596-
struct ggml_tensor * b,
1630+
struct ggml_tensor * a, // convolution kernel
1631+
struct ggml_tensor * b, // data
15971632
int s0, // stride
15981633
int p0, // padding
15991634
int d0); // dilation
@@ -1602,29 +1637,29 @@ extern "C" {
16021637
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
16031638
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
16041639
struct ggml_context * ctx,
1605-
struct ggml_tensor * a,
1606-
struct ggml_tensor * b,
1607-
int s,
1608-
int d);
1640+
struct ggml_tensor * a, // convolution kernel
1641+
struct ggml_tensor * b, // data
1642+
int s, // stride
1643+
int d); // dilation
16091644

16101645
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
16111646
struct ggml_context * ctx,
1612-
struct ggml_tensor * a,
1613-
struct ggml_tensor * b,
1614-
int s0,
1615-
int p0,
1616-
int d0);
1647+
struct ggml_tensor * a, // convolution kernel
1648+
struct ggml_tensor * b, // data
1649+
int s0, // stride
1650+
int p0, // padding
1651+
int d0); // dilation
16171652

16181653
GGML_API struct ggml_tensor * ggml_conv_2d(
16191654
struct ggml_context * ctx,
1620-
struct ggml_tensor * a,
1621-
struct ggml_tensor * b,
1622-
int s0,
1623-
int s1,
1624-
int p0,
1625-
int p1,
1626-
int d0,
1627-
int d1);
1655+
struct ggml_tensor * a, // convolution kernel
1656+
struct ggml_tensor * b, // data
1657+
int s0, // stride dimension 0
1658+
int s1, // stride dimension 1
1659+
int p0, // padding dimension 0
1660+
int p1, // padding dimension 1
1661+
int d0, // dilation dimension 0
1662+
int d1); // dilation dimension 1
16281663

16291664

16301665
// kernel size is a->ne[0] x a->ne[1]
@@ -1686,6 +1721,18 @@ extern "C" {
16861721
float p0,
16871722
float p1);
16881723

1724+
GGML_API struct ggml_tensor * ggml_pool_2d_back(
1725+
struct ggml_context * ctx,
1726+
struct ggml_tensor * a,
1727+
struct ggml_tensor * af, // "a"/input used in forward pass
1728+
enum ggml_op_pool op,
1729+
int k0,
1730+
int k1,
1731+
int s0,
1732+
int s1,
1733+
float p0,
1734+
float p1);
1735+
16891736
// nearest interpolate
16901737
// multiplies ne0 and ne1 by scale factor
16911738
// used in stable-diffusion

ggml/src/ggml-cuda.cu

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
#include "ggml-cuda/binbcast.cuh"
1010
#include "ggml-cuda/clamp.cuh"
1111
#include "ggml-cuda/concat.cuh"
12+
#include "ggml-cuda/conv-transpose-1d.cuh"
1213
#include "ggml-cuda/convert.cuh"
1314
#include "ggml-cuda/cpy.cuh"
15+
#include "ggml-cuda/cross-entropy-loss.cuh"
1416
#include "ggml-cuda/diagmask.cuh"
1517
#include "ggml-cuda/dmmv.cuh"
1618
#include "ggml-cuda/fattn.cuh"
@@ -29,7 +31,6 @@
2931
#include "ggml-cuda/tsembd.cuh"
3032
#include "ggml-cuda/unary.cuh"
3133
#include "ggml-cuda/upscale.cuh"
32-
#include "ggml-cuda/conv-transpose-1d.cuh"
3334

3435
#include <algorithm>
3536
#include <array>
@@ -2181,6 +2182,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
21812182
case GGML_OP_ADD:
21822183
ggml_cuda_op_add(ctx, dst);
21832184
break;
2185+
case GGML_OP_SUB:
2186+
ggml_cuda_op_sub(ctx, dst);
2187+
break;
21842188
case GGML_OP_ACC:
21852189
ggml_cuda_op_acc(ctx, dst);
21862190
break;
@@ -2267,6 +2271,12 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
22672271
case GGML_OP_SQRT:
22682272
ggml_cuda_op_sqrt(ctx, dst);
22692273
break;
2274+
case GGML_OP_SIN:
2275+
ggml_cuda_op_sin(ctx, dst);
2276+
break;
2277+
case GGML_OP_COS:
2278+
ggml_cuda_op_cos(ctx, dst);
2279+
break;
22702280
case GGML_OP_CLAMP:
22712281
ggml_cuda_op_clamp(ctx, dst);
22722282
break;
@@ -2303,6 +2313,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
23032313
case GGML_OP_FLASH_ATTN_EXT:
23042314
ggml_cuda_flash_attn_ext(ctx, dst);
23052315
break;
2316+
case GGML_OP_CROSS_ENTROPY_LOSS:
2317+
ggml_cuda_cross_entropy_loss(ctx, dst);
2318+
break;
23062319
default:
23072320
return false;
23082321
}
@@ -2610,6 +2623,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
26102623
assert(node->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device));
26112624
for (int j = 0; j < GGML_MAX_SRC; j++) {
26122625
if (node->src[j] != nullptr) {
2626+
assert(node->src[j]->buffer);
26132627
assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) || ggml_backend_buffer_is_cuda_split(node->src[j]->buffer));
26142628
}
26152629
}
@@ -2853,12 +2867,15 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
28532867
case GGML_OP_TRANSPOSE:
28542868
case GGML_OP_NORM:
28552869
case GGML_OP_ADD:
2870+
case GGML_OP_SUB:
28562871
case GGML_OP_MUL:
28572872
case GGML_OP_DIV:
28582873
case GGML_OP_RMS_NORM:
28592874
case GGML_OP_SCALE:
28602875
case GGML_OP_SQR:
28612876
case GGML_OP_SQRT:
2877+
case GGML_OP_SIN:
2878+
case GGML_OP_COS:
28622879
case GGML_OP_CLAMP:
28632880
case GGML_OP_CONT:
28642881
case GGML_OP_DIAG_MASK_INF:
@@ -2890,6 +2907,8 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
28902907
}
28912908
return ggml_cuda_info().devices[cuda_ctx->device].cc >= CC_VOLTA &&
28922909
op->src[1]->type == GGML_TYPE_F16 && op->src[2]->type == GGML_TYPE_F16;
2910+
case GGML_OP_CROSS_ENTROPY_LOSS:
2911+
return true;
28932912
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
28942913
default:
28952914
return false;

ggml/src/ggml-cuda/binbcast.cu

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ static __device__ __forceinline__ float op_add(const float a, const float b) {
99
return a + b;
1010
}
1111

12+
static __device__ __forceinline__ float op_sub(const float a, const float b) {
13+
return a - b;
14+
}
15+
1216
static __device__ __forceinline__ float op_mul(const float a, const float b) {
1317
return a * b;
1418
}
@@ -271,6 +275,10 @@ void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
271275
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_add>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
272276
}
273277

278+
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
279+
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_sub>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
280+
}
281+
274282
void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
275283
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_mul>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
276284
}

ggml/src/ggml-cuda/binbcast.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22

33
void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
44
void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
5+
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
56
void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
67
void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

0 commit comments

Comments
 (0)