Skip to content

Vulkan build on MacOS #5311

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -255,11 +255,11 @@ effectiveStdenv.mkDerivation (
# Configurations we don't want even the CI to evaluate. Results in the
# "unsupported platform" messages. This is mostly a no-op, because
# cudaPackages would've refused to evaluate anyway.
badPlatforms = optionals (useCuda || useOpenCL || useVulkan) lib.platforms.darwin;
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;

# Configurations that are known to result in build failures. Can be
# overridden by importing Nixpkgs with `allowBroken = true`.
broken = (useMetalKit && !effectiveStdenv.isDarwin) || (useVulkan && effectiveStdenv.isDarwin);
broken = (useMetalKit && !effectiveStdenv.isDarwin);

description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
homepage = "https://github.com/ggerganov/llama.cpp/";
Expand Down
2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,14 @@
packages =
{
default = config.legacyPackages.llamaPackages.llama-cpp;
vulkan = config.packages.default.override { useVulkan = true; };
}
// lib.optionalAttrs pkgs.stdenv.isLinux {
opencl = config.packages.default.override { useOpenCL = true; };
cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp;

mpi-cpu = config.packages.default.override { useMpi = true; };
mpi-cuda = config.packages.default.override { useMpi = true; };
vulkan = config.packages.default.override { useVulkan = true; };
}
// lib.optionalAttrs (system == "x86_64-linux") {
rocm = config.legacyPackages.llamaPackagesRocm.llama-cpp;
Expand Down
103 changes: 78 additions & 25 deletions ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1091,7 +1091,10 @@ static void ggml_vk_print_gpu_info(size_t idx) {
}
}

static void ggml_vk_instance_init() {
static bool ggml_vk_instance_validation_ext_available(const std::vector<vk::ExtensionProperties>& instance_extensions);
static bool ggml_vk_instance_portability_enumeration_ext_available(const std::vector<vk::ExtensionProperties>& instance_extensions);

void ggml_vk_instance_init() {
if (vk_instance_initialized) {
return;
}
Expand All @@ -1100,28 +1103,42 @@ static void ggml_vk_instance_init() {
#endif

vk::ApplicationInfo app_info{ "ggml-vulkan", 1, nullptr, 0, VK_API_VERSION };
const std::vector<const char*> layers = {
#ifdef GGML_VULKAN_VALIDATE
"VK_LAYER_KHRONOS_validation",
#endif
};
const std::vector<const char*> extensions = {
#ifdef GGML_VULKAN_VALIDATE
"VK_EXT_validation_features",
#endif
};
vk::InstanceCreateInfo instance_create_info(vk::InstanceCreateFlags(), &app_info, layers, extensions);
#ifdef GGML_VULKAN_VALIDATE
const std::vector<vk::ValidationFeatureEnableEXT> features_enable = { vk::ValidationFeatureEnableEXT::eBestPractices };
vk::ValidationFeaturesEXT validation_features = {
features_enable,
{},
};
validation_features.setPNext(nullptr);
instance_create_info.setPNext(&validation_features);

std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl;
#endif
const std::vector<vk::ExtensionProperties> instance_extensions = vk::enumerateInstanceExtensionProperties();
const bool validation_ext = ggml_vk_instance_validation_ext_available(instance_extensions);
const bool portability_enumeration_ext = ggml_vk_instance_portability_enumeration_ext_available(instance_extensions);

std::vector<const char*> layers;

if (validation_ext) {
layers.push_back("VK_LAYER_KHRONOS_validation");
}
std::vector<const char*> extensions;
if (validation_ext) {
extensions.push_back("VK_EXT_validation_features");
}
if (portability_enumeration_ext) {
extensions.push_back("VK_KHR_portability_enumeration");
}
vk::InstanceCreateInfo instance_create_info(vk::InstanceCreateFlags{}, &app_info, layers, extensions);
if (portability_enumeration_ext) {
instance_create_info.flags |= vk::InstanceCreateFlagBits::eEnumeratePortabilityKHR;
}

std::vector<vk::ValidationFeatureEnableEXT> features_enable;
vk::ValidationFeaturesEXT validation_features;

if (validation_ext) {
features_enable = { vk::ValidationFeatureEnableEXT::eBestPractices };
validation_features = {
features_enable,
{},
};
validation_features.setPNext(nullptr);
instance_create_info.setPNext(&validation_features);

std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl;
}
Comment on lines +1111 to +1141
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was hoping that these constants would have one home:

"VK_LAYER_KHRONOS_validation"
"VK_EXT_validation_features"
"VK_KHR_portability_enumeration"

Could you accomplish that by making a function that returned or mutated the two variables features_enable and validation_features?

That way, this generic code could stay unaware of the sort of features and validation that got added, and the system-specific code below would be able to add (as needed) the right features and validation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could, but let's not overextend the scope and duration of this PR. If you wanna do a refactor PR yourself or in collaboration with me, or gather suggestions in an issue, that's fine.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went to try to do that refactoring with a spare hour this morning, and ran into a rebase error (on 8b38eb5) that I could not see how to resolve in a straightforward manner. Could you or @dokterbob rebase this branch on master appropriately?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't have any issues merging upstream into the branch, not sure what your problem was.

To deal with the linker error that Vulkan currently has with cmake, you can use this workaround patch:

diff --git a/examples/gguf/CMakeLists.txt b/examples/gguf/CMakeLists.txt
index 6481f087..85950145 100644
--- a/examples/gguf/CMakeLists.txt
+++ b/examples/gguf/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET gguf)
 add_executable(${TARGET} gguf.cpp)
 install(TARGETS ${TARGET} RUNTIME)
-target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
+target_link_libraries(${TARGET} PRIVATE common ggml ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_11)

vk_instance.instance = vk::createInstance(instance_create_info);

memset(vk_instance.initialized, 0, sizeof(bool) * GGML_VK_MAX_DEVICES);
Expand Down Expand Up @@ -1168,12 +1185,12 @@ static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
vk_instance.devices[idx] = std::make_shared<vk_device>();
ctx->device = vk_instance.devices[idx];
ctx->device.lock()->physical_device = devices[dev_num];
std::vector<vk::ExtensionProperties> ext_props = ctx->device.lock()->physical_device.enumerateDeviceExtensionProperties();
const std::vector<vk::ExtensionProperties> ext_props = ctx->device.lock()->physical_device.enumerateDeviceExtensionProperties();

bool maintenance4_support = false;

// Check if maintenance4 is supported
for (auto properties : ext_props) {
for (const auto& properties : ext_props) {
if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) {
maintenance4_support = true;
}
Expand Down Expand Up @@ -1204,7 +1221,7 @@ static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
bool fp16_storage = false;
bool fp16_compute = false;

for (auto properties : ext_props) {
for (const auto& properties : ext_props) {
if (strcmp("VK_KHR_16bit_storage", properties.extensionName) == 0) {
fp16_storage = true;
} else if (strcmp("VK_KHR_shader_float16_int8", properties.extensionName) == 0) {
Expand Down Expand Up @@ -5301,6 +5318,42 @@ GGML_CALL int ggml_backend_vk_reg_devices() {
return vk_instance.device_indices.size();
}

// Extension availability
static bool ggml_vk_instance_validation_ext_available(const std::vector<vk::ExtensionProperties>& instance_extensions) {
#ifdef GGML_VULKAN_VALIDATE
bool portability_enumeration_ext = false;
// Check for portability enumeration extension for MoltenVK support
for (const auto& properties : instance_extensions) {
if (strcmp("VK_KHR_portability_enumeration", properties.extensionName) == 0) {
return true;
}
}
if (!portability_enumeration_ext) {
std::cerr << "ggml_vulkan: WARNING: Instance extension VK_KHR_portability_enumeration not found." << std::endl;
}
#endif
return false;

UNUSED(instance_extensions);
}
static bool ggml_vk_instance_portability_enumeration_ext_available(const std::vector<vk::ExtensionProperties>& instance_extensions) {
#ifdef __APPLE__
bool portability_enumeration_ext = false;
// Check for portability enumeration extension for MoltenVK support
for (const auto& properties : instance_extensions) {
if (strcmp("VK_KHR_portability_enumeration", properties.extensionName) == 0) {
return true;
}
}
if (!portability_enumeration_ext) {
std::cerr << "ggml_vulkan: WARNING: Instance extension VK_KHR_portability_enumeration not found." << std::endl;
}
#endif
return false;

UNUSED(instance_extensions);
}

// checks

#ifdef GGML_VULKAN_CHECK_RESULTS
Expand Down
2 changes: 2 additions & 0 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,8 @@ inline static void * ggml_calloc(size_t num, size_t size) {
#include <Accelerate/Accelerate.h>
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
#include "ggml-opencl.h"
#elif defined(GGML_USE_VULKAN)
#include "ggml-vulkan.h"
#endif
#elif defined(GGML_USE_OPENBLAS)
#if defined(GGML_BLAS_USE_MKL)
Expand Down