From 00c92f22997b9986c5a584c52ca8153a52a4dab2 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Thu, 19 Dec 2024 04:54:23 +0000 Subject: [PATCH] Add: Apple Neural Engine optimizations Co-authored-by: Kirill Solodskikh <23156352+ArnoldMSU@users.noreply.github.com> Co-authored-by: Azim Kurbanov <33081108+b1n0@users.noreply.github.com> Co-authored-by: Ruslan Aydarkhanov <7030684+Aydarkhan@users.noreply.github.com> Co-authored-by: Andrey Ageev <36598717+AndreyAgeev@users.noreply.github.com> --- swift/Encoders.swift | 52 ++++++++++++++++++++++++++++++-------------- swift/README.md | 30 +++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 18 deletions(-) diff --git a/swift/Encoders.swift b/swift/Encoders.swift index ebfcd73..64b69f1 100644 --- a/swift/Encoders.swift +++ b/swift/Encoders.swift @@ -94,20 +94,26 @@ func readConfig(fromPath path: String) throws -> [String: Any] { } /// Compiles and loads a machine learning model from a URL. -/// - Parameter modelURL: The URL where the model package is located. +/// - Parameters: +/// - modelURL: The URL where the model package is located. +/// - computeUnits: The hardware devices to use for model computation. /// - Returns: An instance of `MLModel`. -func readModel(fromURL modelURL: URL) throws -> MLModel { +func readModel(fromURL modelURL: URL, computeUnits: MLComputeUnits = .all) throws -> MLModel { let compiledModelURL = try MLModel.compileModel(at: modelURL) - return try MLModel(contentsOf: compiledModelURL) + let config = MLModelConfiguration() + config.computeUnits = computeUnits + return try MLModel(contentsOf: compiledModelURL, configuration: config) } /// Loads a machine learning model from a local file path. -/// - Parameter path: The file path where the model file is located. +/// - Parameters: +/// - path: The file path where the model file is located. +/// - computeUnits: The hardware devices to use for model computation. /// - Returns: An instance of `MLModel`. -func readModel(fromPath path: String) throws -> MLModel { +func readModel(fromPath path: String, computeUnits: MLComputeUnits = .all) throws -> MLModel { let absPath = path.hasPrefix("/") ? path : FileManager.default.currentDirectoryPath + "/" + path let modelURL = URL(fileURLWithPath: absPath, isDirectory: true) - return try readModel(fromURL: modelURL) + return try readModel(fromURL: modelURL, computeUnits: computeUnits) } /// Encodes text input into embeddings using a machine learning model. @@ -120,10 +126,16 @@ public class TextEncoder { /// - modelPath: The path to the directory containing the machine learning model. /// - configPath: Optional. The path to the configuration file. Defaults to config.json in the model directory. /// - tokenizerPath: Optional. The path to the tokenizer file. Defaults to tokenizer.json in the model directory. - public init(modelPath: String, configPath: String? = nil, tokenizerPath: String? = nil) throws { + /// - computeUnits: The hardware devices to use for model computation. Use `.cpuAndNeuralEngine` for best performance. + public init( + modelPath: String, + configPath: String? = nil, + tokenizerPath: String? = nil, + computeUnits: MLComputeUnits = .all + ) throws { let finalConfigPath = configPath ?? modelPath + "/config.json" let finalTokenizerPath = tokenizerPath ?? modelPath + "/tokenizer.json" - self.model = try readModel(fromPath: modelPath) + self.model = try readModel(fromPath: modelPath, computeUnits: computeUnits) self.processor = try TextProcessor( configPath: finalConfigPath, tokenizerPath: finalTokenizerPath, @@ -135,16 +147,20 @@ public class TextEncoder { /// - Parameters: /// - modelName: The identifier for the model repository. /// - hubApi: The API object to interact with the model hub. Defaults to a shared instance. - public init(modelName: String, hubApi: HubApi = .shared) async throws { + /// - computeUnits: The hardware devices to use for model computation. Use `.cpuAndNeuralEngine` for best performance. + public init(modelName: String, hubApi: HubApi = .shared, computeUnits: MLComputeUnits = .all) async throws { let repo = Hub.Repo(id: modelName) + let encoderMask = + computeUnits == .cpuAndNeuralEngine ? "text_encoder_neural.mlpackage" : "text_encoder.mlpackage" let modelURL = try await hubApi.snapshot( from: repo, - matching: ["text_encoder.mlpackage/*", "config.json", "tokenizer.json"] + matching: ["\(encoderMask)/*", "config.json", "tokenizer.json"] ) let configPath = modelURL.appendingPathComponent("config.json").path let tokenizerPath = modelURL.appendingPathComponent("tokenizer.json").path self.model = try readModel( - fromURL: modelURL.appendingPathComponent("text_encoder.mlpackage", isDirectory: true) + fromURL: modelURL.appendingPathComponent(encoderMask, isDirectory: true), + computeUnits: computeUnits ) self.processor = try TextProcessor(configPath: configPath, tokenizerPath: tokenizerPath, model: self.model) } @@ -174,9 +190,9 @@ public class ImageEncoder { /// - Parameters: /// - modelPath: The path to the directory containing the machine learning model. /// - configPath: Optional. The path to the configuration file. Defaults to config.json in the model directory. - public init(modelPath: String, configPath: String? = nil) throws { + public init(modelPath: String, configPath: String? = nil, computeUnits: MLComputeUnits = .all) throws { let finalConfigPath = configPath ?? modelPath + "/config.json" - self.model = try readModel(fromPath: modelPath) + self.model = try readModel(fromPath: modelPath, computeUnits: computeUnits) self.processor = try ImageProcessor(configPath: finalConfigPath) } @@ -184,12 +200,16 @@ public class ImageEncoder { /// - Parameters: /// - modelName: The identifier for the model repository. /// - hubApi: The API object to interact with the model hub. Defaults to a shared instance. - public init(modelName: String, hubApi: HubApi = .shared) async throws { + /// - computeUnits: The hardware devices to use for model computation. Use `.cpuAndNeuralEngine` for best performance. + public init(modelName: String, hubApi: HubApi = .shared, computeUnits: MLComputeUnits = .all) async throws { let repo = Hub.Repo(id: modelName) - let modelURL = try await hubApi.snapshot(from: repo, matching: ["image_encoder.mlpackage/*", "config.json"]) + let encoderMask = + computeUnits == .cpuAndNeuralEngine ? "image_encoder_neural.mlpackage" : "image_encoder.mlpackage" + let modelURL = try await hubApi.snapshot(from: repo, matching: ["\(encoderMask)/*", "config.json"]) let configPath = modelURL.appendingPathComponent("config.json").path self.model = try readModel( - fromURL: modelURL.appendingPathComponent("image_encoder.mlpackage", isDirectory: true) + fromURL: modelURL.appendingPathComponent(encoderMask, isDirectory: true), + computeUnits: computeUnits ) self.processor = try ImageProcessor(configPath: configPath) } diff --git a/swift/README.md b/swift/README.md index 8fa0eb8..e6b7a3d 100644 --- a/swift/README.md +++ b/swift/README.md @@ -19,7 +19,10 @@ import UForm ### Text Embeddings ```swift -let textModel = try await TextEncoder(modelName: "unum-cloud/uform3-image-text-english-small") +let textModel = try await TextEncoder( + modelName: "unum-cloud/uform3-image-text-english-small", + computeUnits: .cpuAndNeuralEngine +) let text = "A group of friends enjoy a barbecue on a sandy beach, with one person grilling over a large black grill, while the other sits nearby, laughing and enjoying the camaraderie." let textEmbedding: Embedding = try textModel.encode(text) let textVector: [Float32] = textEmbedding.asFloats() @@ -28,7 +31,10 @@ let textVector: [Float32] = textEmbedding.asFloats() ### Image Embeddings ```swift -let imageModel = try await ImageEncoder(modelName: "unum-cloud/uform3-image-text-english-small") +let imageModel = try await ImageEncoder( + modelName: "unum-cloud/uform3-image-text-english-small", + computeUnits: .cpuAndNeuralEngine +) let imageURL = "https://github.com/ashvardanian/ashvardanian/blob/master/demos/bbq-on-beach.jpg?raw=true" guard let url = URL(string: imageURL), let imageSource = CGImageSourceCreateWithURL(url as CFURL, nil), @@ -40,6 +46,26 @@ var imageEmbedding: Embedding = try imageModel.encode(cgImage) var imageVector: [Float32] = embedding.asFloats() ``` +### Choosing Target Device + +Apple chips provide several functional units capable of high-throughput matrix multiplication and AI inference. +Those `computeUnits` include the CPU, GPU, and Neural Engine. +For maximum compatibility, the `.all` option is used by default. +Sadly, Apple's scheduler is not always optimal, and it might be beneficial to specify the target device explicitly, especially if the models are pre-compiled for the Apple Neural Engine, as it may yield significant performance gains. + +| Model | GPU Text E. | ANE Text E. | GPU Image E. | ANE Image E. | +| :------------------ | ----------: | ----------: | -----------: | -----------: | +| `english-small` | 2.53 ms | 0.53 ms | 6.57 ms | 1.23 ms | +| `english-base` | 2.54 ms | 0.61 ms | 18.90 ms | 3.79 ms | +| `english-large` | 2.30 ms | 0.61 ms | 79.68 ms | 20.94 ms | +| `multilingual-base` | 2.34 ms | 0.50 ms | 18.98 ms | 3.77 ms | + +> On Apple M4 iPad, running iOS 18.2. +> Batch size is 1, and the model is pre-loaded into memory. +> The original encoders use `f32` single-precision numbers for maximum compatibility, and mostly rely on __GPU__ for computation. +> The quantized encoders use a mixture of `i8`, `f16`, and `f32` numbers for maximum performance, and mostly rely on the Apple Neural Engine (__ANE__) for computation. +> The median latency is reported. + ### Computing Distances There are several ways to compute distances between embeddings, once you have them.