Skip to content

Commit

Permalink
Merge pull request #97 from unum-cloud/main-dev
Browse files Browse the repository at this point in the history
Apple Neural Engine optimizations
  • Loading branch information
ashvardanian authored Dec 20, 2024
2 parents 2c15cec + 00c92f2 commit 2dbcc42
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 18 deletions.
52 changes: 36 additions & 16 deletions swift/Encoders.swift
Original file line number Diff line number Diff line change
Expand Up @@ -94,20 +94,26 @@ func readConfig(fromPath path: String) throws -> [String: Any] {
}

/// Compiles and loads a machine learning model from a URL.
/// - Parameter modelURL: The URL where the model package is located.
/// - Parameters:
/// - modelURL: The URL where the model package is located.
/// - computeUnits: The hardware devices to use for model computation.
/// - Returns: An instance of `MLModel`.
func readModel(fromURL modelURL: URL) throws -> MLModel {
func readModel(fromURL modelURL: URL, computeUnits: MLComputeUnits = .all) throws -> MLModel {
let compiledModelURL = try MLModel.compileModel(at: modelURL)
return try MLModel(contentsOf: compiledModelURL)
let config = MLModelConfiguration()
config.computeUnits = computeUnits
return try MLModel(contentsOf: compiledModelURL, configuration: config)
}

/// Loads a machine learning model from a local file path.
/// - Parameter path: The file path where the model file is located.
/// - Parameters:
/// - path: The file path where the model file is located.
/// - computeUnits: The hardware devices to use for model computation.
/// - Returns: An instance of `MLModel`.
func readModel(fromPath path: String) throws -> MLModel {
func readModel(fromPath path: String, computeUnits: MLComputeUnits = .all) throws -> MLModel {
let absPath = path.hasPrefix("/") ? path : FileManager.default.currentDirectoryPath + "/" + path
let modelURL = URL(fileURLWithPath: absPath, isDirectory: true)
return try readModel(fromURL: modelURL)
return try readModel(fromURL: modelURL, computeUnits: computeUnits)
}

/// Encodes text input into embeddings using a machine learning model.
Expand All @@ -120,10 +126,16 @@ public class TextEncoder {
/// - modelPath: The path to the directory containing the machine learning model.
/// - configPath: Optional. The path to the configuration file. Defaults to config.json in the model directory.
/// - tokenizerPath: Optional. The path to the tokenizer file. Defaults to tokenizer.json in the model directory.
public init(modelPath: String, configPath: String? = nil, tokenizerPath: String? = nil) throws {
/// - computeUnits: The hardware devices to use for model computation. Use `.cpuAndNeuralEngine` for best performance.
public init(
modelPath: String,
configPath: String? = nil,
tokenizerPath: String? = nil,
computeUnits: MLComputeUnits = .all
) throws {
let finalConfigPath = configPath ?? modelPath + "/config.json"
let finalTokenizerPath = tokenizerPath ?? modelPath + "/tokenizer.json"
self.model = try readModel(fromPath: modelPath)
self.model = try readModel(fromPath: modelPath, computeUnits: computeUnits)
self.processor = try TextProcessor(
configPath: finalConfigPath,
tokenizerPath: finalTokenizerPath,
Expand All @@ -135,16 +147,20 @@ public class TextEncoder {
/// - Parameters:
/// - modelName: The identifier for the model repository.
/// - hubApi: The API object to interact with the model hub. Defaults to a shared instance.
public init(modelName: String, hubApi: HubApi = .shared) async throws {
/// - computeUnits: The hardware devices to use for model computation. Use `.cpuAndNeuralEngine` for best performance.
public init(modelName: String, hubApi: HubApi = .shared, computeUnits: MLComputeUnits = .all) async throws {
let repo = Hub.Repo(id: modelName)
let encoderMask =
computeUnits == .cpuAndNeuralEngine ? "text_encoder_neural.mlpackage" : "text_encoder.mlpackage"
let modelURL = try await hubApi.snapshot(
from: repo,
matching: ["text_encoder.mlpackage/*", "config.json", "tokenizer.json"]
matching: ["\(encoderMask)/*", "config.json", "tokenizer.json"]
)
let configPath = modelURL.appendingPathComponent("config.json").path
let tokenizerPath = modelURL.appendingPathComponent("tokenizer.json").path
self.model = try readModel(
fromURL: modelURL.appendingPathComponent("text_encoder.mlpackage", isDirectory: true)
fromURL: modelURL.appendingPathComponent(encoderMask, isDirectory: true),
computeUnits: computeUnits
)
self.processor = try TextProcessor(configPath: configPath, tokenizerPath: tokenizerPath, model: self.model)
}
Expand Down Expand Up @@ -174,22 +190,26 @@ public class ImageEncoder {
/// - Parameters:
/// - modelPath: The path to the directory containing the machine learning model.
/// - configPath: Optional. The path to the configuration file. Defaults to config.json in the model directory.
public init(modelPath: String, configPath: String? = nil) throws {
public init(modelPath: String, configPath: String? = nil, computeUnits: MLComputeUnits = .all) throws {
let finalConfigPath = configPath ?? modelPath + "/config.json"
self.model = try readModel(fromPath: modelPath)
self.model = try readModel(fromPath: modelPath, computeUnits: computeUnits)
self.processor = try ImageProcessor(configPath: finalConfigPath)
}

/// Initializes an `ImageEncoder` using a model name and an API for fetching models.
/// - Parameters:
/// - modelName: The identifier for the model repository.
/// - hubApi: The API object to interact with the model hub. Defaults to a shared instance.
public init(modelName: String, hubApi: HubApi = .shared) async throws {
/// - computeUnits: The hardware devices to use for model computation. Use `.cpuAndNeuralEngine` for best performance.
public init(modelName: String, hubApi: HubApi = .shared, computeUnits: MLComputeUnits = .all) async throws {
let repo = Hub.Repo(id: modelName)
let modelURL = try await hubApi.snapshot(from: repo, matching: ["image_encoder.mlpackage/*", "config.json"])
let encoderMask =
computeUnits == .cpuAndNeuralEngine ? "image_encoder_neural.mlpackage" : "image_encoder.mlpackage"
let modelURL = try await hubApi.snapshot(from: repo, matching: ["\(encoderMask)/*", "config.json"])
let configPath = modelURL.appendingPathComponent("config.json").path
self.model = try readModel(
fromURL: modelURL.appendingPathComponent("image_encoder.mlpackage", isDirectory: true)
fromURL: modelURL.appendingPathComponent(encoderMask, isDirectory: true),
computeUnits: computeUnits
)
self.processor = try ImageProcessor(configPath: configPath)
}
Expand Down
30 changes: 28 additions & 2 deletions swift/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ import UForm
### Text Embeddings

```swift
let textModel = try await TextEncoder(modelName: "unum-cloud/uform3-image-text-english-small")
let textModel = try await TextEncoder(
modelName: "unum-cloud/uform3-image-text-english-small",
computeUnits: .cpuAndNeuralEngine
)
let text = "A group of friends enjoy a barbecue on a sandy beach, with one person grilling over a large black grill, while the other sits nearby, laughing and enjoying the camaraderie."
let textEmbedding: Embedding = try textModel.encode(text)
let textVector: [Float32] = textEmbedding.asFloats()
Expand All @@ -28,7 +31,10 @@ let textVector: [Float32] = textEmbedding.asFloats()
### Image Embeddings

```swift
let imageModel = try await ImageEncoder(modelName: "unum-cloud/uform3-image-text-english-small")
let imageModel = try await ImageEncoder(
modelName: "unum-cloud/uform3-image-text-english-small",
computeUnits: .cpuAndNeuralEngine
)
let imageURL = "https://github.com/ashvardanian/ashvardanian/blob/master/demos/bbq-on-beach.jpg?raw=true"
guard let url = URL(string: imageURL),
let imageSource = CGImageSourceCreateWithURL(url as CFURL, nil),
Expand All @@ -40,6 +46,26 @@ var imageEmbedding: Embedding = try imageModel.encode(cgImage)
var imageVector: [Float32] = embedding.asFloats()
```

### Choosing Target Device

Apple chips provide several functional units capable of high-throughput matrix multiplication and AI inference.
Those `computeUnits` include the CPU, GPU, and Neural Engine.
For maximum compatibility, the `.all` option is used by default.
Sadly, Apple's scheduler is not always optimal, and it might be beneficial to specify the target device explicitly, especially if the models are pre-compiled for the Apple Neural Engine, as it may yield significant performance gains.

| Model | GPU Text E. | ANE Text E. | GPU Image E. | ANE Image E. |
| :------------------ | ----------: | ----------: | -----------: | -----------: |
| `english-small` | 2.53 ms | 0.53 ms | 6.57 ms | 1.23 ms |
| `english-base` | 2.54 ms | 0.61 ms | 18.90 ms | 3.79 ms |
| `english-large` | 2.30 ms | 0.61 ms | 79.68 ms | 20.94 ms |
| `multilingual-base` | 2.34 ms | 0.50 ms | 18.98 ms | 3.77 ms |

> On Apple M4 iPad, running iOS 18.2.
> Batch size is 1, and the model is pre-loaded into memory.
> The original encoders use `f32` single-precision numbers for maximum compatibility, and mostly rely on __GPU__ for computation.
> The quantized encoders use a mixture of `i8`, `f16`, and `f32` numbers for maximum performance, and mostly rely on the Apple Neural Engine (__ANE__) for computation.
> The median latency is reported.
### Computing Distances

There are several ways to compute distances between embeddings, once you have them.
Expand Down

0 comments on commit 2dbcc42

Please sign in to comment.