Skip to content

Commit 42cd49c

Browse files
author
mike dupont
committed
renmae
1 parent 05ac2ac commit 42cd49c

File tree

9 files changed

+384
-171
lines changed

9 files changed

+384
-171
lines changed

Makefile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ endif
116116
# keep standard at C11 and C++11
117117
MK_CPPFLAGS = -I. -Icommon
118118
MK_CFLAGS = -std=c11 -fPIC
119-
MK_CXXFLAGS = -std=c++11 -fPIC
119+
MK_CXXFLAGS = -std=c++17 -fPIC -fpermissive
120120

121121
# -Ofast tends to produce faster code, but may not be available for some compilers.
122122
ifdef LLAMA_FAST
@@ -538,16 +538,16 @@ $(info )
538538
#
539539

540540
ggml.o: ggml.c ggml.h ggml-cuda.h
541-
$(CC) $(CFLAGS) -c $< -o $@
541+
$(CXX) $(CXXFLAGS) -c $< -o $@
542542

543543
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
544-
$(CC) $(CFLAGS) -c $< -o $@
544+
$(CXX) $(CXXFLAGS) -c $< -o $@
545545

546546
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
547-
$(CC) $(CFLAGS) -c $< -o $@
547+
$(CXX) $(CXXFLAGS) -c $< -o $@
548548

549549
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
550-
$(CC) $(CFLAGS) -c $< -o $@
550+
$(CXX) $(CXXFLAGS) -c $< -o $@
551551

552552
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o
553553

File renamed without changes.
File renamed without changes.

ggml-cuda.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7623,12 +7623,12 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
76237623
#endif
76247624

76257625
// debug helpers
7626-
printf("JSON { \"data\":{ \"src0\": { \"%s\" :{ \"ne\" : [ %8d, %8d, %8d, %8d ], \"nb\" : [ %8d, %8d, %8d, %8d ], \"contiguous\":\"%d\", \"transposed\":\"%d\", \"type\": \"%s\", \"name\" : \"%s\"}}, \"src1\": { \"%s\" :{ \"ne\" : [ %8d, %8d, %8d, %8d ], \"nb\" : [ %8d, %8d, %8d, %8d ], \"contiguous\":\"%d\", \"transposed\":\"%d\", \"type\": \"%s\", \"name\" : \"%s\"}}, \"dst\" : { \"%s\" :{ \"ne\" : [ %8d, %8d, %8d, %8d ], \"nb\" : [ %8d, %8d, %8d, %8d ], \"contiguous\":\"%d\", \"transposed\":\"%d\", \"type\": \"%s\", \"name\" : \"%s\"}}}}\n",
7627-
src0->name, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
7628-
ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name,
7629-
src1->name, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3], ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name,
7630-
dst->name, dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], ggml_is_contiguous(dst), ggml_is_transposed(dst), ggml_type_name(dst->type), dst->name
7631-
);
7626+
// printf("JSON: { \"data\":{ \"src0\": { \"%s\" :{ \"ne\" : [ %8d, %8d, %8d, %8d ], \"nb\" : [ %8d, %8d, %8d, %8d ], \"contiguous\":\"%d\", \"transposed\":\"%d\", \"type\": \"%s\", \"name\" : \"%s\"}}, \"src1\": { \"%s\" :{ \"ne\" : [ %8d, %8d, %8d, %8d ], \"nb\" : [ %8d, %8d, %8d, %8d ], \"contiguous\":\"%d\", \"transposed\":\"%d\", \"type\": \"%s\", \"name\" : \"%s\"}}, \"dst\" : { \"%s\" :{ \"ne\" : [ %8d, %8d, %8d, %8d ], \"nb\" : [ %8d, %8d, %8d, %8d ], \"contiguous\":\"%d\", \"transposed\":\"%d\", \"type\": \"%s\", \"name\" : \"%s\"}}}}\n",
7627+
// src0->name, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
7628+
// ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name,
7629+
// src1->name, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3], ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name,
7630+
// dst->name, dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], ggml_is_contiguous(dst), ggml_is_transposed(dst), ggml_type_name(dst->type), dst->name
7631+
// );
76327632

76337633
if (!split && all_on_device && !use_tensor_cores && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
76347634
// KQ single-batch

ggml-impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ extern "C" {
2222
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
2323
#define static_assert(cond, msg) _Static_assert(cond, msg)
2424
#else
25-
#define static_assert(cond, msg) struct global_scope_noop_trick
25+
//#define static_assert(cond, msg) struct global_scope_noop_trick
2626
#endif
2727
#endif
2828

ggml-mpi.c renamed to ggml-mpi.cpp

File renamed without changes.
File renamed without changes.

ggml-quants.h

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -167,58 +167,58 @@ static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_
167167

168168

169169
// Quantization
170-
void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k);
171-
void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k);
172-
void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k);
173-
void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k);
174-
void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k);
175-
void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k);
176-
177-
void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k);
178-
void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k);
179-
void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k);
180-
void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k);
181-
void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k);
182-
void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k);
183-
184-
void quantize_row_q4_0(const float * restrict x, void * restrict y, int k);
185-
void quantize_row_q4_1(const float * restrict x, void * restrict y, int k);
186-
void quantize_row_q5_0(const float * restrict x, void * restrict y, int k);
187-
void quantize_row_q5_1(const float * restrict x, void * restrict y, int k);
188-
void quantize_row_q8_0(const float * restrict x, void * restrict y, int k);
189-
void quantize_row_q8_1(const float * restrict x, void * restrict y, int k);
190-
191-
void quantize_row_q2_K(const float * restrict x, void * restrict y, int k);
192-
void quantize_row_q3_K(const float * restrict x, void * restrict y, int k);
193-
void quantize_row_q4_K(const float * restrict x, void * restrict y, int k);
194-
void quantize_row_q5_K(const float * restrict x, void * restrict y, int k);
195-
void quantize_row_q6_K(const float * restrict x, void * restrict y, int k);
196-
void quantize_row_q8_K(const float * restrict x, void * restrict y, int k);
170+
void quantize_row_q4_0_reference(const float * __restrict__ x, block_q4_0 * __restrict__ y, int k);
171+
void quantize_row_q4_1_reference(const float * __restrict__ x, block_q4_1 * __restrict__ y, int k);
172+
void quantize_row_q5_0_reference(const float * __restrict__ x, block_q5_0 * __restrict__ y, int k);
173+
void quantize_row_q5_1_reference(const float * __restrict__ x, block_q5_1 * __restrict__ y, int k);
174+
void quantize_row_q8_0_reference(const float * __restrict__ x, block_q8_0 * __restrict__ y, int k);
175+
void quantize_row_q8_1_reference(const float * __restrict__ x, block_q8_1 * __restrict__ y, int k);
176+
177+
void quantize_row_q2_K_reference(const float * __restrict__ x, block_q2_K * __restrict__ y, int k);
178+
void quantize_row_q3_K_reference(const float * __restrict__ x, block_q3_K * __restrict__ y, int k);
179+
void quantize_row_q4_K_reference(const float * __restrict__ x, block_q4_K * __restrict__ y, int k);
180+
void quantize_row_q5_K_reference(const float * __restrict__ x, block_q5_K * __restrict__ y, int k);
181+
void quantize_row_q6_K_reference(const float * __restrict__ x, block_q6_K * __restrict__ y, int k);
182+
void quantize_row_q8_K_reference(const float * __restrict__ x, block_q8_K * __restrict__ y, int k);
183+
184+
void quantize_row_q4_0(const float * __restrict__ x, void * __restrict__ y, int k);
185+
void quantize_row_q4_1(const float * __restrict__ x, void * __restrict__ y, int k);
186+
void quantize_row_q5_0(const float * __restrict__ x, void * __restrict__ y, int k);
187+
void quantize_row_q5_1(const float * __restrict__ x, void * __restrict__ y, int k);
188+
void quantize_row_q8_0(const float * __restrict__ x, void * __restrict__ y, int k);
189+
void quantize_row_q8_1(const float * __restrict__ x, void * __restrict__ y, int k);
190+
191+
void quantize_row_q2_K(const float * __restrict__ x, void * __restrict__ y, int k);
192+
void quantize_row_q3_K(const float * __restrict__ x, void * __restrict__ y, int k);
193+
void quantize_row_q4_K(const float * __restrict__ x, void * __restrict__ y, int k);
194+
void quantize_row_q5_K(const float * __restrict__ x, void * __restrict__ y, int k);
195+
void quantize_row_q6_K(const float * __restrict__ x, void * __restrict__ y, int k);
196+
void quantize_row_q8_K(const float * __restrict__ x, void * __restrict__ y, int k);
197197

198198
// Dequantization
199-
void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k);
200-
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k);
201-
void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k);
202-
void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k);
203-
void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k);
204-
//void dequantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k);
205-
206-
void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k);
207-
void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k);
208-
void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k);
209-
void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k);
210-
void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k);
211-
void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k);
199+
void dequantize_row_q4_0(const block_q4_0 * __restrict__ x, float * __restrict__ y, int k);
200+
void dequantize_row_q4_1(const block_q4_1 * __restrict__ x, float * __restrict__ y, int k);
201+
void dequantize_row_q5_0(const block_q5_0 * __restrict__ x, float * __restrict__ y, int k);
202+
void dequantize_row_q5_1(const block_q5_1 * __restrict__ x, float * __restrict__ y, int k);
203+
void dequantize_row_q8_0(const block_q8_0 * __restrict__ x, float * __restrict__ y, int k);
204+
//void dequantize_row_q8_1(const block_q8_1 * __restrict__ x, float * __restrict__ y, int k);
205+
206+
void dequantize_row_q2_K(const block_q2_K * __restrict__ x, float * __restrict__ y, int k);
207+
void dequantize_row_q3_K(const block_q3_K * __restrict__ x, float * __restrict__ y, int k);
208+
void dequantize_row_q4_K(const block_q4_K * __restrict__ x, float * __restrict__ y, int k);
209+
void dequantize_row_q5_K(const block_q5_K * __restrict__ x, float * __restrict__ y, int k);
210+
void dequantize_row_q6_K(const block_q6_K * __restrict__ x, float * __restrict__ y, int k);
211+
void dequantize_row_q8_K(const block_q8_K * __restrict__ x, float * __restrict__ y, int k);
212212

213213
// Dot product
214-
void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
215-
void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
216-
void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
217-
void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
218-
void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
219-
220-
void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
221-
void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
222-
void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
223-
void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
224-
void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
214+
void ggml_vec_dot_q4_0_q8_0(int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy);
215+
void ggml_vec_dot_q4_1_q8_1(int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy);
216+
void ggml_vec_dot_q5_0_q8_0(int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy);
217+
void ggml_vec_dot_q5_1_q8_1(int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy);
218+
void ggml_vec_dot_q8_0_q8_0(int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy);
219+
220+
void ggml_vec_dot_q2_K_q8_K(int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy);
221+
void ggml_vec_dot_q3_K_q8_K(int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy);
222+
void ggml_vec_dot_q4_K_q8_K(int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy);
223+
void ggml_vec_dot_q5_K_q8_K(int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy);
224+
void ggml_vec_dot_q6_K_q8_K(int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy);

0 commit comments

Comments
 (0)