Skip to content

Commit 16b3d96

Browse files
author
Aidan
committed
Add freq factors
1 parent 9b3d833 commit 16b3d96

File tree

1 file changed

+57
-37
lines changed

1 file changed

+57
-37
lines changed

ggml-sycl.cpp

Lines changed: 57 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9232,12 +9232,11 @@ static void rope(
92329232
dst[i + 1] = x0*sin_theta + x1*cos_theta;
92339233
}
92349234

9235-
template<typename T, bool has_pos>
9235+
template<typename T, bool has_pos, bool has_freq_facs>
92369236
static void rope_neox(
92379237
const T * x, T * dst, int ncols, int n_dims, const int32_t * pos, float freq_scale, int p_delta_rows,
9238-
float ext_factor, float attn_factor, rope_corr_dims corr_dims, float theta_scale, float inv_ndims
9239-
,
9240-
const sycl::nd_item<3> &item_ct1) {
9238+
float ext_factor, float attn_factor, rope_corr_dims corr_dims, float theta_scale, float inv_ndims,
9239+
const float * freq_factors, const sycl::nd_item<3> &item_ct1) {
92419240
const int col = 2 * (item_ct1.get_local_range(1) * item_ct1.get_group(1) +
92429241
item_ct1.get_local_id(1));
92439242

@@ -9265,8 +9264,10 @@ static void rope_neox(
92659264
float cur_rot = inv_ndims * ic - ib;
92669265

92679266
const int p = has_pos ? pos[i2] : 0;
9267+
const float freq_factor = has_freq_facs ? freq_factors[ic/2] : 1.0f;
9268+
92689269
const float theta_base =
9269-
p * freq_scale * dpct::pow(theta_scale, col / 2.0f);
9270+
p * freq_scale * dpct::pow(theta_scale, col / 2.0f)/freq_factor;
92709271

92719272
float cos_theta, sin_theta;
92729273
rope_yarn(theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor, &cos_theta, &sin_theta);
@@ -12881,7 +12882,7 @@ static void rope_neox_sycl(const T *x, T *dst, int ncols, int n_dims, int nrows,
1288112882
const int32_t *pos, float freq_scale,
1288212883
int p_delta_rows, float freq_base, float ext_factor,
1288312884
float attn_factor, rope_corr_dims corr_dims,
12884-
dpct::queue_ptr stream) {
12885+
const float * freq_factors, dpct::queue_ptr stream) {
1288512886
GGML_ASSERT(ncols % 2 == 0);
1288612887
const sycl::range<3> block_dims(1, SYCL_ROPE_BLOCK_SIZE, 1);
1288712888
const int num_blocks_x = (ncols + 2*SYCL_ROPE_BLOCK_SIZE - 1) / (2*SYCL_ROPE_BLOCK_SIZE);
@@ -12891,38 +12892,48 @@ static void rope_neox_sycl(const T *x, T *dst, int ncols, int n_dims, int nrows,
1289112892
const float inv_ndims = -1.0f / n_dims;
1289212893

1289312894
if (pos == nullptr) {
12894-
/*
12895-
DPCT1049:42: The work-group size passed to the SYCL kernel may exceed
12896-
the limit. To get the device limit, query
12897-
info::device::max_work_group_size. Adjust the work-group size if needed.
12898-
*/
1289912895
dpct::has_capability_or_fail(stream->get_device(),
1290012896
{sycl::aspect::fp16});
12901-
12902-
stream->parallel_for(
12903-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
12904-
[=](sycl::nd_item<3> item_ct1) {
12905-
rope_neox<T, false>(x, dst, ncols, n_dims, pos, freq_scale,
12906-
p_delta_rows, ext_factor, attn_factor,
12907-
corr_dims, theta_scale, inv_ndims,
12908-
item_ct1);
12909-
});
12897+
if (freq_factors == nullptr) {
12898+
stream->parallel_for(
12899+
sycl::nd_range<3>(block_nums * block_dims, block_dims),
12900+
[=](sycl::nd_item<3> item_ct1) {
12901+
rope_neox<T, false, false>(x, dst, ncols, n_dims, pos, freq_scale,
12902+
p_delta_rows, ext_factor, attn_factor,
12903+
corr_dims, theta_scale, inv_ndims, freq_factors,
12904+
item_ct1);
12905+
});
12906+
} else {
12907+
stream->parallel_for(
12908+
sycl::nd_range<3>(block_nums * block_dims, block_dims),
12909+
[=](sycl::nd_item<3> item_ct1) {
12910+
rope_neox<T, false, true>(x, dst, ncols, n_dims, pos, freq_scale,
12911+
p_delta_rows, ext_factor, attn_factor,
12912+
corr_dims, theta_scale, inv_ndims, freq_factors,
12913+
item_ct1);
12914+
});
12915+
}
1291012916
} else {
12911-
/*
12912-
DPCT1049:43: The work-group size passed to the SYCL kernel may exceed
12913-
the limit. To get the device limit, query
12914-
info::device::max_work_group_size. Adjust the work-group size if needed.
12915-
*/
1291612917
dpct::has_capability_or_fail(stream->get_device(),
1291712918
{sycl::aspect::fp16});
1291812919

12919-
stream->parallel_for(
12920-
sycl::nd_range<3>(block_nums * block_dims, block_dims),
12921-
[=](sycl::nd_item<3> item_ct1) {
12922-
rope_neox<T, true>(x, dst, ncols, n_dims, pos, freq_scale,
12923-
p_delta_rows, ext_factor, attn_factor,
12924-
corr_dims, theta_scale, inv_ndims, item_ct1);
12925-
});
12920+
if (freq_factors == nullptr) {
12921+
stream->parallel_for(
12922+
sycl::nd_range<3>(block_nums * block_dims, block_dims),
12923+
[=](sycl::nd_item<3> item_ct1) {
12924+
rope_neox<T, true, false>(x, dst, ncols, n_dims, pos, freq_scale,
12925+
p_delta_rows, ext_factor, attn_factor,
12926+
corr_dims, theta_scale, inv_ndims, freq_factors, item_ct1);
12927+
});
12928+
} else {
12929+
stream->parallel_for(
12930+
sycl::nd_range<3>(block_nums * block_dims, block_dims),
12931+
[=](sycl::nd_item<3> item_ct1) {
12932+
rope_neox<T, true, true>(x, dst, ncols, n_dims, pos, freq_scale,
12933+
p_delta_rows, ext_factor, attn_factor,
12934+
corr_dims, theta_scale, inv_ndims, freq_factors, item_ct1);
12935+
});
12936+
}
1292612937
}
1292712938
}
1292812939

@@ -14454,9 +14465,7 @@ inline void ggml_sycl_op_rope(const ggml_tensor *src0, const ggml_tensor *src1,
1445414465
ggml_tensor *dst, const float *src0_dd,
1445514466
const float *src1_dd, float *dst_dd,
1445614467
const dpct::queue_ptr &main_stream) {
14457-
#pragma message("TODO: implement phi3 frequency factors support")
14458-
#pragma message(" https://github.com/ggerganov/llama.cpp/pull/7225")
14459-
GGML_ASSERT(dst->src[2] == nullptr && "phi3 frequency factors not implemented yet");
14468+
const ggml_tensor * src2 = dst->src[2];
1446014469

1446114470
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
1446214471
GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
@@ -14482,6 +14491,7 @@ inline void ggml_sycl_op_rope(const ggml_tensor *src0, const ggml_tensor *src1,
1448214491
memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float));
1448314492
memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));
1448414493

14494+
const float * freq_factors = nullptr;
1448514495
const int32_t * pos = nullptr;
1448614496
if ((mode & 1) == 0) {
1448714497
GGML_ASSERT(src1->type == GGML_TYPE_I32);
@@ -14492,6 +14502,16 @@ inline void ggml_sycl_op_rope(const ggml_tensor *src0, const ggml_tensor *src1,
1449214502
const bool is_neox = mode & 2;
1449314503
const bool is_glm = mode & 4;
1449414504

14505+
if (is_neox) {
14506+
pos = (const int32_t *) src1_dd;
14507+
14508+
if (src2 != nullptr) {
14509+
freq_factors = (const float *) src2->data;
14510+
}
14511+
} else {
14512+
GGML_ASSERT(src2 == nullptr && "TODO: freq_factors not implemented for !is_neox");
14513+
}
14514+
1449514515
rope_corr_dims corr_dims;
1449614516
ggml_rope_yarn_corr_dims(n_dims, n_orig_ctx, freq_base, beta_fast, beta_slow, corr_dims.v);
1449714517

@@ -14503,13 +14523,13 @@ inline void ggml_sycl_op_rope(const ggml_tensor *src0, const ggml_tensor *src1,
1450314523
if (src0->type == GGML_TYPE_F32) {
1450414524
rope_neox_sycl(
1450514525
(const float *)src0_dd, (float *)dst_dd, ne00, n_dims, nrows, pos, freq_scale, ne01, freq_base, ext_factor,
14506-
attn_factor, corr_dims, main_stream
14526+
attn_factor, corr_dims, freq_factors, main_stream
1450714527
);
1450814528
} else if (src0->type == GGML_TYPE_F16) {
1450914529
rope_neox_sycl((const sycl::half *)src0_dd, (sycl::half *)dst_dd,
1451014530
ne00, n_dims, nrows, pos, freq_scale, ne01,
1451114531
freq_base, ext_factor, attn_factor, corr_dims,
14512-
main_stream);
14532+
freq_factors, main_stream);
1451314533
} else {
1451414534
GGML_ASSERT(false);
1451514535
}

0 commit comments

Comments
 (0)