Skip to content

Commit 75ce9f9

Browse files
committed
Revert "fix memcpy() crash, add missed cmd in guide, fix softmax (ggml-org#6622)"
This reverts commit de17e3f.
1 parent b8109bc commit 75ce9f9

File tree

3 files changed

+6
-16
lines changed

3 files changed

+6
-16
lines changed

examples/sycl/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
2020
#cmake --build . --config Release --target llama-bench
2121

2222
#build all binary
23-
cmake --build . --config Release -j -v
23+
cmake --build . --config Release -v

examples/sycl/run-llama2.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ if [ $# -gt 0 ]; then
1212
GGML_SYCL_SINGLE_GPU=1
1313
else
1414
GGML_SYCL_DEVICE=0
15-
GGML_SYCL_SINGLE_GPU=0
1615
fi
1716

1817
#export GGML_SYCL_DEBUG=1

ggml-sycl.cpp

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3154,6 +3154,7 @@ typedef float (*vec_dot_q_mul_mat_sycl_t)(
31543154
#define SYCL_SCALE_BLOCK_SIZE 256
31553155
#define SYCL_CLAMP_BLOCK_SIZE 256
31563156
#define SYCL_ROPE_BLOCK_SIZE 256
3157+
#define SYCL_SOFT_MAX_BLOCK_SIZE 1024
31573158
#define SYCL_ALIBI_BLOCK_SIZE 32
31583159
#define SYCL_DIAG_MASK_INF_BLOCK_SIZE 32
31593160
#define SYCL_QUANTIZE_BLOCK_SIZE 256
@@ -13079,13 +13080,11 @@ static void soft_max_f32_sycl(const float * x, const float * mask, const float *
1307913080
const int nrows_y, const float scale, const float max_bias,
1308013081
dpct::queue_ptr stream) {
1308113082
int nth = WARP_SIZE;
13082-
int max_block_size = g_work_group_size;
13083-
while (nth < ncols_x && nth < max_block_size) nth *= 2;
13084-
if (nth>max_block_size) nth = max_block_size;
13085-
13083+
while (nth < ncols_x && nth < SYCL_SOFT_MAX_BLOCK_SIZE) nth *= 2;
1308613084
const sycl::range<3> block_dims(1, 1, nth);
1308713085
const sycl::range<3> block_nums(1, 1, nrows_x);
1308813086
const size_t n_local_scratch = (GGML_PAD(ncols_x, WARP_SIZE) + WARP_SIZE);
13087+
static_assert(SYCL_SOFT_MAX_BLOCK_SIZE == 1024, "These values need to be adjusted.");
1308913088

1309013089
const uint32_t n_head_kv = nrows_x/nrows_y;
1309113090
const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head_kv));
@@ -13095,12 +13094,6 @@ static void soft_max_f32_sycl(const float * x, const float * mask, const float *
1309513094

1309613095
const size_t local_mem_size = stream->get_device().get_info<sycl::info::device::local_mem_size>();
1309713096
if (n_local_scratch*sizeof(float) < local_mem_size) {
13098-
if (ncols_x > max_block_size) {
13099-
soft_max_f32_submitter<true, 0, 0>(x, mask, pos, dst, ncols_x, nrows_y, scale,
13100-
max_bias, m0, m1, n_head_log2, block_nums,
13101-
block_dims, n_local_scratch, stream);
13102-
return;
13103-
}
1310413097
switch (ncols_x) {
1310513098
case 32:
1310613099
soft_max_f32_submitter<true, 32, 32>(x, mask, pos, dst, ncols_x, nrows_y, scale,
@@ -16825,13 +16818,11 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
1682516818
const dpct::queue_ptr stream = g_syclStreams[ctx->device][0];
1682616819
SYCL_CHECK(
1682716820
CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw()));
16828-
char* host_buf = (char*)malloc(size);
16829-
memcpy(host_buf, data, size);
16821+
1683016822
SYCL_CHECK(
1683116823
CHECK_TRY_ERROR((*stream)
16832-
.memcpy((char *)tensor->data + offset, host_buf, size)
16824+
.memcpy((char *)tensor->data + offset, data, size)
1683316825
.wait()));
16834-
free(host_buf);
1683516826
}
1683616827
catch (sycl::exception const &exc) {
1683716828
std::cerr << exc.what() << "Exception caught at file:" << __FILE__

0 commit comments

Comments
 (0)