Skip to content

Commit 4218641

Browse files
authored
Separate CuBLAS/hipBLAS (ggml-org#438)
1 parent 63fcbbb commit 4218641

File tree

4 files changed

+67
-33
lines changed

4 files changed

+67
-33
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,7 @@ tests/test-tokenizer-0
8484
/koboldcpp_cublas.dll
8585
/cublas64_11.dll
8686
/cublasLt64_11.dll
87+
/rocblas/
88+
rocblas.dll
89+
hipblas.dll
90+
koboldcpp_hipblas.so

CMakeLists.txt

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,12 @@ if (LLAMA_CUBLAS)
124124
endif()
125125

126126
if (LLAMA_HIPBLAS)
127-
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
127+
if (MSVC)
128+
list(APPEND CMAKE_PREFIX_PATH "C:/Program Files/AMD/ROCm/5.5")
129+
else()
130+
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
131+
endif()
132+
128133

129134
if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
130135
message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
@@ -387,16 +392,29 @@ target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump
387392
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
388393
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
389394

395+
if (LLAMA_CUBLAS)
396+
set(TARGET koboldcpp_cublas)
397+
add_library(${TARGET} SHARED expose.cpp expose.h)
398+
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples ./common)
399+
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
400+
set_target_properties(${TARGET} PROPERTIES PREFIX "")
401+
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
402+
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
403+
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS})
404+
target_compile_features(${TARGET} PRIVATE cxx_std_11)
405+
endif()
390406

391-
set(TARGET koboldcpp_cublas)
392-
add_library(${TARGET} SHARED expose.cpp expose.h)
393-
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples ./common)
394-
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
395-
set_target_properties(${TARGET} PROPERTIES PREFIX "")
396-
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
397-
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
398-
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS})
399-
target_compile_features(${TARGET} PRIVATE cxx_std_11)
407+
if (LLAMA_HIPBLAS)
408+
set(TARGET koboldcpp_hipblas)
409+
add_library(${TARGET} SHARED expose.cpp expose.h)
410+
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples ./common)
411+
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
412+
set_target_properties(${TARGET} PROPERTIES PREFIX "")
413+
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
414+
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
415+
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS})
416+
target_compile_features(${TARGET} PRIVATE cxx_std_11)
417+
endif()
400418

401419

402420
if (MAKE_MISC_FILES)

Makefile

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
default: koboldcpp_default koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_cublas
1+
default: koboldcpp_default koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_cublas koboldcpp_hipblas
22
tools: quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt
33
dev: koboldcpp_openblas
44
dev2: koboldcpp_clblast
@@ -39,8 +39,8 @@ endif
3939
#
4040

4141
# keep standard at C11 and C++11
42-
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS
43-
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS
42+
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS -D_GNU_SOURCE
43+
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS -D_GNU_SOURCE
4444
LDFLAGS =
4545

4646
# these are used on windows, to build some libraries with extra old device compatibility
@@ -211,18 +211,15 @@ endif # LLAMA_CUDA_FORCE_DMMV
211211
ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
212212
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
213213
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
214-
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
215-
-DCC_TURING=1000000000
214+
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
216215
ggml_v2-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
217216
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
218217
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
219-
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
220-
-DCC_TURING=1000000000
218+
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
221219
ggml_v2-cuda-legacy.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
222220
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
223221
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
224-
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
225-
-DCC_TURING=1000000000 # DGGML_CUDA_DMMV_F16 does not currently work with AMD.
222+
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
226223
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
227224
$(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
228225
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
@@ -417,7 +414,7 @@ gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER)
417414
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
418415

419416
clean:
420-
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so
417+
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so koboldcpp_hipblas.so
421418

422419
main: examples/main/main.cpp build-info.h ggml.o k_quants.o ggml-alloc.o llama.o common.o console.o grammar-parser.o $(OBJS)
423420
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
@@ -439,8 +436,10 @@ koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o com
439436
$(NOAVX2_BUILD)
440437
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o ggml-alloc.o $(OBJS)
441438
$(CLBLAST_BUILD)
442-
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o $(CUBLAS_OBJS) $(HIP_OBJS) $(OBJS)
443-
$(CUBLAS_BUILD) $(HIPBLAS_BUILD)
439+
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o $(CUBLAS_OBJS) $(OBJS)
440+
$(CUBLAS_BUILD)
441+
koboldcpp_hipblas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o $(HIP_OBJS) $(OBJS)
442+
$(HIPBLAS_BUILD)
444443

445444
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o k_quants.o ggml-alloc.o
446445
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)

koboldcpp.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def pick_existant_file(ntoption,nonntoption):
103103
lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so")
104104
lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so")
105105
lib_cublas = pick_existant_file("koboldcpp_cublas.dll","koboldcpp_cublas.so")
106+
lib_hipblas = pick_existant_file("koboldcpp_hipblas.dll","koboldcpp_hipblas.so")
106107

107108

108109
def init_library():
@@ -113,6 +114,7 @@ def init_library():
113114
use_openblas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir.
114115
use_clblast = False #uses CLBlast instead
115116
use_cublas = False #uses cublas instead
117+
use_hipblas = False #uses hipblas instead
116118
use_noavx2 = False #uses no avx2 instructions
117119
use_failsafe = False #uses no intrinsics, failsafe mode
118120
if args.noavx2:
@@ -131,11 +133,16 @@ def init_library():
131133
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
132134
use_clblast = True
133135
elif (args.usecublas is not None):
134-
if not file_exists(lib_cublas):
136+
if not file_exists(lib_cublas) and not file_exists(lib_hipblas):
135137
print("Warning: CuBLAS library file not found. Non-BLAS library will be used.")
136138
else:
137-
print("Attempting to use CuBLAS library for faster prompt ingestion. A compatible CuBLAS will be required.")
138-
use_cublas = True
139+
if file_exists(lib_cublas):
140+
print("Attempting to use CuBLAS library for faster prompt ingestion. A compatible CuBLAS will be required.")
141+
use_cublas = True
142+
elif file_exists(lib_hipblas):
143+
print("Attempting to use hipBLAS library for faster prompt ingestion. A compatible AMD GPU will be required.")
144+
use_hipblas = True
145+
139146
else:
140147
if not file_exists(lib_openblas) or (os.name=='nt' and not file_exists("libopenblas.dll")):
141148
print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.")
@@ -157,6 +164,8 @@ def init_library():
157164
libname = lib_clblast
158165
elif use_cublas:
159166
libname = lib_cublas
167+
elif use_hipblas:
168+
libname = lib_hipblas
160169
elif use_openblas:
161170
libname = lib_openblas
162171
else:
@@ -766,10 +775,11 @@ def show_new_gui():
766775
(lib_openblas, "Use OpenBLAS"),
767776
(lib_clblast, "Use CLBlast"),
768777
(lib_cublas, "Use CuBLAS"),
778+
(lib_hipblas, "Use hipBLAS (ROCm)"),
769779
(lib_default, "Use No BLAS"),
770780
(lib_noavx2, "NoAVX2 Mode (Old CPU)"),
771781
(lib_failsafe, "Failsafe Mode (Old CPU)")]
772-
openblas_option, clblast_option, cublas_option, default_option, noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
782+
openblas_option, clblast_option, cublas_option, hipblas_option, default_option, noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
773783
# slider data
774784
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
775785
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
@@ -922,15 +932,15 @@ def setup_backend_tooltip(parent):
922932

923933
def changerunmode(a,b,c):
924934
index = runopts_var.get()
925-
if index == "Use CLBlast" or index == "Use CuBLAS":
935+
if index == "Use CLBlast" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
926936
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
927937
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
928938
if index == "Use CLBlast":
929939
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
930940
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
931941
if gpu_choice_var.get()=="All":
932942
gpu_choice_var.set("1")
933-
elif index == "Use CuBLAS":
943+
elif index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
934944
CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
935945
CUDA_quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
936946
else:
@@ -941,7 +951,7 @@ def changerunmode(a,b,c):
941951
quick_gpu_selector_box.grid_forget()
942952
CUDA_quick_gpu_selector_box.grid_forget()
943953

944-
if index == "Use CuBLAS":
954+
if index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
945955
lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw")
946956
quick_lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw")
947957
mmq_box.grid(row=4, column=1, padx=8, pady=1, stick="nw")
@@ -952,7 +962,7 @@ def changerunmode(a,b,c):
952962
mmq_box.grid_forget()
953963
quick_mmq_box.grid_forget()
954964

955-
if index == "Use CLBlast" or index == "Use CuBLAS":
965+
if index == "Use CLBlast" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
956966
gpu_layers_label.grid(row=5, column=0, padx = 8, pady=1, stick="nw")
957967
gpu_layers_entry.grid(row=5, column=1, padx=8, pady=1, stick="nw")
958968
quick_gpu_layers_label.grid(row=5, column=0, padx = 8, pady=1, stick="nw")
@@ -1147,7 +1157,7 @@ def export_vars():
11471157
gpuchoiceidx = int(gpu_choice_var.get())-1
11481158
if runopts_var.get() == "Use CLBlast":
11491159
args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx]
1150-
if runopts_var.get() == "Use CuBLAS":
1160+
if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)":
11511161
if gpu_choice_var.get()=="All":
11521162
args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"]
11531163
else:
@@ -1204,8 +1214,11 @@ def import_vars(dict):
12041214
runopts_var.set(clblast_option)
12051215
gpu_choice_var.set(str(["0 0", "1 0", "0 1", "1 1"].index(str(dict["useclblast"][0]) + " " + str(dict["useclblast"][1])) + 1))
12061216
elif "usecublas" in dict and dict["usecublas"]:
1207-
if cublas_option is not None:
1208-
runopts_var.set(cublas_option)
1217+
if cublas_option is not None or hipblas_option is not None:
1218+
if cublas_option:
1219+
runopts_var.set(cublas_option)
1220+
elif hipblas_option:
1221+
runopts_var.set(cublas_option)
12091222
lowvram_var.set(1 if "lowvram" in dict["usecublas"] else 0)
12101223
mmq_var.set(1 if "mmq" in dict["usecublas"] else 0)
12111224
gpu_choice_var.set("All")

0 commit comments

Comments
 (0)