Skip to content

Commit b8a9828

Browse files
committed
add amx kernel for gemm
add intel amx isa detection add vnni kernel for gemv cases add vnni and amx kernel support for block_q8_0 code cleanup fix packing B issue enable openmp fine tune amx kernel switch to aten parallel pattern add error message for nested parallelism code cleanup add f16 support in ggml-amx add amx kernels for QK_K quant formats: Q4_K, Q5_K, Q6_K and IQ4_XS
1 parent 081fe43 commit b8a9828

File tree

6 files changed

+2636
-5
lines changed

6 files changed

+2636
-5
lines changed

Makefile

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,6 @@ GGML_METAL := 1
9292
DEPRECATE_WARNING := 1
9393
endif
9494

95-
ifdef LLAMA_OPENMP
96-
GGML_OPENMP := 1
97-
DEPRECATE_WARNING := 1
98-
endif
99-
10095
ifdef LLAMA_RPC
10196
GGML_RPC := 1
10297
DEPRECATE_WARNING := 1
@@ -350,6 +345,12 @@ ifdef LLAMA_SANITIZE_UNDEFINED
350345
MK_LDFLAGS += -fsanitize=undefined -g
351346
endif
352347

348+
ifdef LLAMA_OPENMP
349+
MK_CPPFLAGS += -fopenmp
350+
MK_CFLAGS += -fopenmp
351+
MK_CXXFLAGS += -fopenmp
352+
endif
353+
353354
ifdef LLAMA_SERVER_VERBOSE
354355
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
355356
endif
@@ -567,6 +568,11 @@ ifndef GGML_NO_LLAMAFILE
567568
OBJ_GGML += ggml/src/llamafile/sgemm.o
568569
endif
569570

571+
ifndef GGML_NO_AMX
572+
MK_CPPFLAGS += -DGGML_USE_AMX
573+
OBJ_GGML += ggml/src/ggml-amx/mmq.o
574+
endif
575+
570576
ifdef GGML_RPC
571577
MK_CPPFLAGS += -DGGML_USE_RPC
572578
OBJ_GGML += ggml/src/ggml-rpc.o
@@ -1026,6 +1032,14 @@ ggml/src/llamafile/sgemm.o: \
10261032
$(CXX) $(CXXFLAGS) -c $< -o $@
10271033
endif # GGML_NO_LLAMAFILE
10281034

1035+
ifndef GGML_NO_AMX
1036+
ggml/src/ggml-amx/mmq.o: \
1037+
ggml/src/ggml-amx/mmq.cpp \
1038+
ggml/src/ggml-amx/mmq.h \
1039+
ggml/include/ggml.h
1040+
$(CXX) $(CXXFLAGS) -c $< -o $@
1041+
endif
1042+
10291043
ifdef GGML_RPC
10301044
ggml/src/ggml-rpc.o: \
10311045
ggml/src/ggml-rpc.cpp \
@@ -1144,6 +1158,7 @@ clean:
11441158
rm -vrf ggml/src/ggml-metal-embed.metal
11451159
rm -vrf ggml/src/ggml-cuda/*.o
11461160
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
1161+
rm -vrf ggml/src/ggml-amx/*.o
11471162
rm -rvf $(BUILD_TARGETS)
11481163
rm -rvf $(TEST_TARGETS)
11491164
rm -f vulkan-shaders-gen ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders.cpp

ggml/include/ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2380,6 +2380,7 @@ extern "C" {
23802380
GGML_API int ggml_cpu_has_avx512_vbmi(void);
23812381
GGML_API int ggml_cpu_has_avx512_vnni(void);
23822382
GGML_API int ggml_cpu_has_avx512_bf16(void);
2383+
GGML_API int ggml_cpu_has_amx_int8 (void);
23832384
GGML_API int ggml_cpu_has_fma (void);
23842385
GGML_API int ggml_cpu_has_neon (void);
23852386
GGML_API int ggml_cpu_has_sve (void);

0 commit comments

Comments
 (0)