Skip to content

Commit 3ff0c0e

Browse files
committed
add amx kernel for gemm
add intel amx isa detection add vnni kernel for gemv cases add vnni and amx kernel support for block_q8_0 code cleanup fix packing B issue enable openmp fine tune amx kernel switch to aten parallel pattern add error message for nested parallelism code cleanup add f16 support in ggml-amx add amx kernels for QK_K quant formats: Q4_K, Q5_K, Q6_K and IQ4_XS
1 parent 15fa07a commit 3ff0c0e

File tree

6 files changed

+2636
-5
lines changed

6 files changed

+2636
-5
lines changed

Makefile

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,6 @@ GGML_METAL := 1
9090
DEPRECATE_WARNING := 1
9191
endif
9292

93-
ifdef LLAMA_OPENMP
94-
GGML_OPENMP := 1
95-
DEPRECATE_WARNING := 1
96-
endif
97-
9893
ifdef LLAMA_RPC
9994
GGML_RPC := 1
10095
DEPRECATE_WARNING := 1
@@ -348,6 +343,12 @@ ifdef LLAMA_SANITIZE_UNDEFINED
348343
MK_LDFLAGS += -fsanitize=undefined -g
349344
endif
350345

346+
ifdef LLAMA_OPENMP
347+
MK_CPPFLAGS += -fopenmp
348+
MK_CFLAGS += -fopenmp
349+
MK_CXXFLAGS += -fopenmp
350+
endif
351+
351352
ifdef LLAMA_SERVER_VERBOSE
352353
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
353354
endif
@@ -576,6 +577,11 @@ ifndef GGML_NO_LLAMAFILE
576577
OBJ_GGML += ggml/src/llamafile/sgemm.o
577578
endif
578579

580+
ifndef GGML_NO_AMX
581+
MK_CPPFLAGS += -DGGML_USE_AMX
582+
OBJ_GGML += ggml/src/ggml-amx/mmq.o
583+
endif
584+
579585
ifdef GGML_RPC
580586
MK_CPPFLAGS += -DGGML_USE_RPC
581587
OBJ_GGML += ggml/src/ggml-rpc.o
@@ -1065,6 +1071,14 @@ ggml/src/llamafile/sgemm.o: \
10651071
$(CXX) $(CXXFLAGS) -c $< -o $@
10661072
endif # GGML_NO_LLAMAFILE
10671073

1074+
ifndef GGML_NO_AMX
1075+
ggml/src/ggml-amx/mmq.o: \
1076+
ggml/src/ggml-amx/mmq.cpp \
1077+
ggml/src/ggml-amx/mmq.h \
1078+
ggml/include/ggml.h
1079+
$(CXX) $(CXXFLAGS) -c $< -o $@
1080+
endif
1081+
10681082
ifdef GGML_RPC
10691083
ggml/src/ggml-rpc.o: \
10701084
ggml/src/ggml-rpc.cpp \
@@ -1210,6 +1224,7 @@ clean:
12101224
rm -vrf ggml/src/ggml-metal-embed.metal
12111225
rm -vrf ggml/src/ggml-cuda/*.o
12121226
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
1227+
rm -vrf ggml/src/ggml-amx/*.o
12131228
rm -rvf $(BUILD_TARGETS)
12141229
rm -rvf $(TEST_TARGETS)
12151230
rm -f vulkan-shaders-gen ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders.cpp

ggml/include/ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2387,6 +2387,7 @@ extern "C" {
23872387
GGML_API int ggml_cpu_has_avx512_vbmi(void);
23882388
GGML_API int ggml_cpu_has_avx512_vnni(void);
23892389
GGML_API int ggml_cpu_has_avx512_bf16(void);
2390+
GGML_API int ggml_cpu_has_amx_int8 (void);
23902391
GGML_API int ggml_cpu_has_fma (void);
23912392
GGML_API int ggml_cpu_has_neon (void);
23922393
GGML_API int ggml_cpu_has_sve (void);

0 commit comments

Comments
 (0)