Skip to content

Commit 698c945

Browse files
committed
Merge branch 'master' into stablelm-support
2 parents c959376 + e9c1cec commit 698c945

24 files changed

+1410
-750
lines changed

.github/workflows/build.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ jobs:
288288
OPENBLAS_VERSION: 0.3.23
289289
OPENCL_VERSION: 2023.04.17
290290
CLBLAST_VERSION: 1.6.0
291+
SDE_VERSION: 9.21.1-2023-04-24
291292

292293
strategy:
293294
matrix:
@@ -383,11 +384,23 @@ jobs:
383384
384385
- name: Test
385386
id: cmake_test
386-
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # Test AVX-512 only when possible
387+
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
387388
run: |
388389
cd build
389390
ctest -C Release --verbose --timeout 900
390391
392+
- name: Test (Intel SDE)
393+
id: cmake_test_sde
394+
if: ${{ matrix.build == 'avx512' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
395+
run: |
396+
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/777395/sde-external-${env:SDE_VERSION}-win.tar.xz"
397+
# for some weird reason windows tar doesn't like sde tar.xz
398+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
399+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
400+
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
401+
cd build
402+
& $sde -future -- ctest -C Release --verbose --timeout 900
403+
391404
- name: Determine tag name
392405
id: tag
393406
shell: bash

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ models-mnt
4646
/infill
4747
/libllama.so
4848
/llama-bench
49-
/llava
49+
/llava-cli
5050
/main
5151
/metal
5252
/perplexity

CMakeLists.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ endif()
1010

1111
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
1212

13-
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
13+
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
1414
set(LLAMA_STANDALONE ON)
1515

1616
# configure project version
@@ -44,7 +44,7 @@ endif()
4444

4545
# general
4646
option(LLAMA_STATIC "llama: static link libraries" OFF)
47-
option(LLAMA_NATIVE "llama: enable -march=native flag" OFF)
47+
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
4848
option(LLAMA_LTO "llama: enable link time optimization" OFF)
4949

5050
# debug
@@ -510,6 +510,10 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
510510
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
511511
message(STATUS "x86 detected")
512512
if (MSVC)
513+
# instruction set detection for MSVC only
514+
if (LLAMA_NATIVE)
515+
include(cmake/FindSIMD.cmake)
516+
endif ()
513517
if (LLAMA_AVX512)
514518
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX512>)
515519
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)

Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Define the default target now so that it is always the first target
22
BUILD_TARGETS = \
33
main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4-
simple batched batched-bench save-load-state server gguf llama-bench llava baby-llama beam-search \
4+
simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \
55
speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
66

77
# Binaries only useful for tests
@@ -617,7 +617,10 @@ convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggm
617617
llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
618618
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
619619

620-
llava: examples/llava/llava.cpp examples/llava/llava-utils.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
620+
libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
621+
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ $(LDFLAGS) -Wno-cast-qual
622+
623+
llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
621624
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
622625

623626
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)

cmake/FindSIMD.cmake

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
include(CheckCSourceRuns)
2+
3+
set(AVX_CODE "
4+
#include <immintrin.h>
5+
int main()
6+
{
7+
__m256 a;
8+
a = _mm256_set1_ps(0);
9+
return 0;
10+
}
11+
")
12+
13+
set(AVX512_CODE "
14+
#include <immintrin.h>
15+
int main()
16+
{
17+
__m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
18+
0, 0, 0, 0, 0, 0, 0, 0,
19+
0, 0, 0, 0, 0, 0, 0, 0,
20+
0, 0, 0, 0, 0, 0, 0, 0,
21+
0, 0, 0, 0, 0, 0, 0, 0,
22+
0, 0, 0, 0, 0, 0, 0, 0,
23+
0, 0, 0, 0, 0, 0, 0, 0,
24+
0, 0, 0, 0, 0, 0, 0, 0);
25+
__m512i b = a;
26+
__mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ);
27+
return 0;
28+
}
29+
")
30+
31+
set(AVX2_CODE "
32+
#include <immintrin.h>
33+
int main()
34+
{
35+
__m256i a = {0};
36+
a = _mm256_abs_epi16(a);
37+
__m256i x;
38+
_mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code
39+
return 0;
40+
}
41+
")
42+
43+
set(FMA_CODE "
44+
#include <immintrin.h>
45+
int main()
46+
{
47+
__m256 acc = _mm256_setzero_ps();
48+
const __m256 d = _mm256_setzero_ps();
49+
const __m256 p = _mm256_setzero_ps();
50+
acc = _mm256_fmadd_ps( d, p, acc );
51+
return 0;
52+
}
53+
")
54+
55+
macro(check_sse type flags)
56+
set(__FLAG_I 1)
57+
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
58+
foreach (__FLAG ${flags})
59+
if (NOT ${type}_FOUND)
60+
set(CMAKE_REQUIRED_FLAGS ${__FLAG})
61+
check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I})
62+
if (HAS_${type}_${__FLAG_I})
63+
set(${type}_FOUND TRUE CACHE BOOL "${type} support")
64+
set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags")
65+
endif()
66+
math(EXPR __FLAG_I "${__FLAG_I}+1")
67+
endif()
68+
endforeach()
69+
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
70+
71+
if (NOT ${type}_FOUND)
72+
set(${type}_FOUND FALSE CACHE BOOL "${type} support")
73+
set(${type}_FLAGS "" CACHE STRING "${type} flags")
74+
endif()
75+
76+
mark_as_advanced(${type}_FOUND ${type}_FLAGS)
77+
endmacro()
78+
79+
# flags are for MSVC only!
80+
check_sse("AVX" " ;/arch:AVX")
81+
if (NOT ${AVX_FOUND})
82+
set(LLAMA_AVX OFF)
83+
else()
84+
set(LLAMA_AVX ON)
85+
endif()
86+
87+
check_sse("AVX2" " ;/arch:AVX2")
88+
check_sse("FMA" " ;/arch:AVX2")
89+
if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
90+
set(LLAMA_AVX2 OFF)
91+
else()
92+
set(LLAMA_AVX2 ON)
93+
endif()
94+
95+
check_sse("AVX512" " ;/arch:AVX512")
96+
if (NOT ${AVX512_FOUND})
97+
set(LLAMA_AVX512 OFF)
98+
else()
99+
set(LLAMA_AVX512 ON)
100+
endif()

common/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ endif()
4141
set(TARGET common)
4242

4343
add_library(${TARGET} STATIC
44+
base64.hpp
4445
common.h
4546
common.cpp
4647
sampling.h

0 commit comments

Comments
 (0)