ggml-org · zhouwg · Feb 14, 2025 · Feb 15, 2025 · Feb 16, 2025 · Feb 17, 2025
diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt
@@ -188,6 +188,7 @@ option(GGML_OPENCL                          "ggml: use OpenCL"
 option(GGML_OPENCL_PROFILING                "ggml: use OpenCL profiling (increases overhead)" OFF)
 option(GGML_OPENCL_EMBED_KERNELS            "ggml: embed kernels"                             ON)
 option(GGML_OPENCL_USE_ADRENO_KERNELS       "ggml: use optimized kernels for Adreno"          ON)
+option(GGML_QNN                             "ggml: use QNN"                                   OFF)
 
 # toolchain for vulkan-shaders-gen
 set   (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
@@ -251,6 +252,7 @@ set(GGML_PUBLIC_HEADERS
     include/ggml-rpc.h
     include/ggml-sycl.h
     include/ggml-vulkan.h
+    include/ggml-qnn.h
     include/gguf.h)
 
 set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")

diff --git a/ggml/include/ggml-qnn.h b/ggml/include/ggml-qnn.h
@@ -0,0 +1,68 @@
+ /*
+ * Copyright (c) 2023-2024 The ggml authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GGML_QNN_MAX_DEVICES    3
+#define GGML_QNN_BACKEND_NAME   "qnn"
+
+enum QNNBackend {
+    QNN_BACKEND_CPU,
+    QNN_BACKEND_GPU,
+    QNN_BACKEND_NPU,
+    QNN_BACKEND_GGML, //"fake" QNN backend for compare performance between QNN backend and cpu backend
+};
+
+GGML_BACKEND_API ggml_backend_t ggml_backend_qnn_init(size_t dev_num, const char * qnn_lib_path);
+
+GGML_BACKEND_API bool           ggml_backend_is_qnn(ggml_backend_t backend);
+
+GGML_BACKEND_API void           ggml_backend_qnn_set_n_threads(ggml_backend_t backend, int thread_counts);
+
+GGML_BACKEND_API int            ggml_backend_qnn_get_device_count(void);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_qnn_reg(void);
+
+inline const char * ggml_backend_qnn_get_devname(size_t dev_num) {
+    switch (dev_num) {
+        case QNN_BACKEND_CPU:
+            return "QNN-CPU";
+        case QNN_BACKEND_GPU:
+            return "QNN-GPU";
+        case QNN_BACKEND_NPU:
+            return "QNN-NPU";
+        case QNN_BACKEND_GGML:
+            return "ggml"; //"fake" QNN backend, used for compare performance between QNN backend and original GGML
+        default:
+            return "unknown";
+    }
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
@@ -329,6 +329,7 @@ ggml_add_backend(RPC)
 ggml_add_backend(SYCL)
 ggml_add_backend(Vulkan)
 ggml_add_backend(OpenCL)
+ggml_add_backend(QNN)
 
 foreach (target ggml-base ggml)
     target_include_directories(${target} PUBLIC    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)

diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
@@ -66,6 +66,10 @@
 #include "ggml-kompute.h"
 #endif
 
+#ifdef GGML_USE_QNN
+#include "ggml-qnn.h"
+#endif
+
 // disable C++17 deprecation warning for std::codecvt_utf8
 #if defined(__clang__)
 #    pragma clang diagnostic push
@@ -180,6 +184,9 @@ struct ggml_backend_registry {
 #ifdef GGML_USE_KOMPUTE
         register_backend(ggml_backend_kompute_reg());
 #endif
+#ifdef GGML_USE_QNN
+        register_backend(ggml_backend_qnn_reg());
+#endif
 #ifdef GGML_USE_CPU
         register_backend(ggml_backend_cpu_reg());
 #endif
@@ -573,6 +580,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
     ggml_backend_load_best("vulkan", silent, dir_path);
     ggml_backend_load_best("opencl", silent, dir_path);
     ggml_backend_load_best("musa", silent, dir_path);
+    ggml_backend_load_best("qnn", silent, dir_path);
     ggml_backend_load_best("cpu", silent, dir_path);
     // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
     const char * backend_path = std::getenv("GGML_BACKEND_PATH");

diff --git a/ggml/src/ggml-qnn/CMakeLists.txt b/ggml/src/ggml-qnn/CMakeLists.txt
@@ -0,0 +1,34 @@
+message(STATUS "Using QNN backend")
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Android")
+    find_library(LOG_LIB log)
+    set(QNN_LINK_LIBRARIES ${LOG_LIB})
+    set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend")
+
+if(NOT DEFINED GGML_QNN_SDK_PATH)
+    # try read from environment variable
+    if(DEFINED ENV{QNN_SDK_PATH})
+        set(GGML_QNN_SDK_PATH $ENV{QNN_SDK_PATH})
+    else()
+        message(FATAL_ERROR "GGML_QNN_SDK_PATH not defined")
+    endif()
+endif()
+
+message("QNN_SDK_PATH: ${GGML_QNN_SDK_PATH}")
+
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
+
+file(GLOB QNN_SOURCES "${CMAKE_CURRENT_LIST_DIR}/*.cpp")
+ggml_add_backend_library(ggml-qnn
+    ${QNN_SOURCES}
+)
+
+target_include_directories(ggml-qnn PRIVATE ${GGML_QNN_SDK_PATH}/include/QNN ${CMAKE_CURRENT_LIST_DIR})
+target_link_libraries(ggml-qnn PRIVATE ${QNN_LINK_LIBRARIES})
+
+string(REGEX REPLACE "/$" "" GGML_QNN_DEFAULT_LIB_SEARCH_PATH "${QNN_DEFAULT_LIB_SEARCH_PATH}")
+target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_DEFAULT_LIB_SEARCH_PATH="${QNN_DEFAULT_LIB_SEARCH_PATH}/")
+
+else()
+    message(FATAL_ERROR "QNN now only available on Android")
+endif()