Skip to content

Commit 65c5820

Browse files
ggml : add loongarch lsx and lasx support (#6454)
* add loongarch lsx and lasx optimize code * Add loongarch compilation support to makefile * revert stb_image.h * opt bytes_from_nibbles_32 and sum_i16_pairs_float * fix undeclared * format code * update * update 2 --------- Co-authored-by: Jinyang He <[email protected]>
1 parent 1cc0155 commit 65c5820

File tree

5 files changed

+2389
-81
lines changed

5 files changed

+2389
-81
lines changed

CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeli
134134
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
135135
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
136136
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
137+
option(LLAMA_LASX "llama: enable lasx" ON)
138+
option(LLAMA_LSX "llama: enable lsx" ON)
137139

138140
# add perf arguments
139141
option(LLAMA_PERF "llama: enable perf" OFF)
@@ -1108,6 +1110,17 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
11081110
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
11091111
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
11101112
endif()
1113+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
1114+
message(STATUS "loongarch64 detected")
1115+
1116+
list(APPEND ARCH_FLAGS -march=loongarch64)
1117+
if (LLAMA_LASX)
1118+
list(APPEND ARCH_FLAGS -mlasx)
1119+
endif()
1120+
if (LLAMA_LSX)
1121+
list(APPEND ARCH_FLAGS -mlsx)
1122+
endif()
1123+
11111124
else()
11121125
message(STATUS "Unknown architecture")
11131126
endif()

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,11 @@ ifneq ($(filter ppc64le%,$(UNAME_M)),)
379379
CUDA_POWER_ARCH = 1
380380
endif
381381

382+
ifneq ($(filter loongarch64%,$(UNAME_M)),)
383+
MK_CFLAGS += -mlasx
384+
MK_CXXFLAGS += -mlasx
385+
endif
386+
382387
else
383388
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
384389
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d

ggml-impl.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,34 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
455455
#include <riscv_vector.h>
456456
#endif
457457

458+
#if defined(__loongarch64)
459+
#if defined(__loongarch_asx)
460+
#include <lasxintrin.h>
461+
#endif
462+
#if defined(__loongarch_sx)
463+
#include <lsxintrin.h>
464+
#endif
465+
#endif
466+
467+
#if defined(__loongarch_asx)
468+
469+
typedef union {
470+
int32_t i;
471+
float f;
472+
} ft_union;
473+
474+
/* float type data load instructions */
475+
static __m128 __lsx_vreplfr2vr_s(float val) {
476+
ft_union fi_tmpval = {.f = val};
477+
return (__m128)__lsx_vreplgr2vr_w(fi_tmpval.i);
478+
}
479+
480+
static __m256 __lasx_xvreplfr2vr_s(float val) {
481+
ft_union fi_tmpval = {.f = val};
482+
return (__m256)__lasx_xvreplgr2vr_w(fi_tmpval.i);
483+
}
484+
#endif
485+
458486
#ifdef __F16C__
459487

460488
#ifdef _MSC_VER

0 commit comments

Comments
 (0)