Friday, January 31, 2025

llama.cpp vulkan

This might be a way to do it. Does anyone have a card to test against?

Index: Makefile
===================================================================
RCS file: /cvs/ports/misc/llama.cpp/Makefile,v
retrieving revision 1.1
diff -u -p -u -r1.1 Makefile
--- Makefile 30 Jan 2025 22:55:11 -0000 1.1
+++ Makefile 1 Feb 2025 04:20:02 -0000
@@ -10,6 +10,7 @@ SHARED_LIBS += ggml-cpu 0.0
SHARED_LIBS += ggml 0.0
SHARED_LIBS += llama 0.0
SHARED_LIBS += llava_shared 0.0
+SHARED_LIBS += ggml-vulkan 0.0

CATEGORIES = misc

@@ -18,11 +19,15 @@ HOMEPAGE = https://github.com/ggerganov
# MIT
PERMIT_PACKAGE = Yes

-WANTLIB += m pthread ${COMPILER_LIBCXX}
+WANTLIB += m pthread vulkan ${COMPILER_LIBCXX}

MODULES = devel/cmake

+LIB_DEPENDS = graphics/vulkan-loader
+BUILD_DEPENDS = graphics/shaderc
+
CONFIGURE_ARGS = -DGGML_CCACHE=Off \
- -DGGML_NATIVE=Off
+ -DGGML_NATIVE=Off \
+ -DGGML_VULKAN=On

.include <bsd.port.mk>
Index: pkg/PLIST
===================================================================
RCS file: /cvs/ports/misc/llama.cpp/pkg/PLIST,v
retrieving revision 1.1
diff -u -p -u -r1.1 PLIST
--- pkg/PLIST 30 Jan 2025 22:55:11 -0000 1.1
+++ pkg/PLIST 1 Feb 2025 04:20:02 -0000
@@ -58,6 +58,7 @@ bin/convert_hf_to_gguf.py
@bin bin/test-tokenizer-0
@bin bin/test-tokenizer-1-bpe
@bin bin/test-tokenizer-1-spm
+@bin bin/vulkan-shaders-gen
include/ggml-alloc.h
include/ggml-backend.h
include/ggml-blas.h
@@ -74,7 +75,6 @@ include/ggml.h
include/gguf.h
include/llama-cpp.h
include/llama.h
-lib/cmake/
lib/cmake/ggml/
lib/cmake/ggml/ggml-config.cmake
lib/cmake/ggml/ggml-version.cmake
@@ -83,6 +83,7 @@ lib/cmake/llama/llama-config.cmake
lib/cmake/llama/llama-version.cmake
@lib lib/libggml-base.so.${LIBggml-base_VERSION}
@lib lib/libggml-cpu.so.${LIBggml-cpu_VERSION}
+@lib lib/libggml-vulkan.so.${LIBggml-vulkan_VERSION}
@lib lib/libggml.so.${LIBggml_VERSION}
@lib lib/libllama.so.${LIBllama_VERSION}
@lib lib/libllava_shared.so.${LIBllava_shared_VERSION}

No comments:

Post a Comment