Skip to content

Commit

Permalink
Improve model adapter reliability and stability (#257)
Browse files Browse the repository at this point in the history
* Standarize pod labels and filter out unrelated pod

* Enqueue the model adapter object from pod changes

* Remove the base model deletion bug

ModelAdapter will take the response
  • Loading branch information
Jeffwan authored Sep 30, 2024
1 parent 0df904d commit b8bbe6d
Show file tree
Hide file tree
Showing 18 changed files with 221 additions and 61 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ help: ## Display this help.

.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
$(CONTROLLER_GEN) rbac:roleName=manager-role crd:maxDescLen=0,generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases
$(CONTROLLER_GEN) rbac:roleName=controller-manager-role crd:maxDescLen=0,generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases

.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
Expand Down
4 changes: 2 additions & 2 deletions config/gateway/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ kind: Kustomization
images:
- name: plugins
newName: aibrix/plugins
newTag: v0.1.0-rc.2
newTag: nightly
- name: users
newName: aibrix/users
newTag: v0.1.0-rc.2
newTag: nightly
2 changes: 1 addition & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ kind: Kustomization
images:
- name: controller
newName: aibrix/controller-manager
newTag: v0.1.0-rc.2
newTag: nightly
14 changes: 7 additions & 7 deletions docs/development/app/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@ metadata:
name: llama2-70b
namespace: aibrix-system
labels:
modeladapter.aibricks.ai/enabled: "true"
model.aibrix.ai: "llama2-70b"
model.aibrix.ai/name: "llama2-70b"
model.aibrix.ai/port: "8000"
adapter.model.aibrix.ai/enabled: "true"
spec:
replicas: 3
selector:
matchLabels:
modeladapter.aibricks.ai/enabled: "true"
model.aibrix.ai: "llama2-70b"
adapter.model.aibrix.ai/enabled: "true"
model.aibrix.ai/name: "llama2-70b"
template:
metadata:
labels:
modeladapter.aibricks.ai/enabled: "true"
model.aibrix.ai: "llama2-70b"
adapter.model.aibrix.ai/enabled: "true"
model.aibrix.ai/name: "llama2-70b"
spec:
containers:
- name: llmengine
Expand Down Expand Up @@ -46,7 +46,7 @@ metadata:
namespace: aibrix-system
spec:
selector:
model.aibrix.ai: "llama2-70b"
model.aibrix.ai/name: "llama2-70b"
ports:
- protocol: TCP
port: 8000
Expand Down
4 changes: 2 additions & 2 deletions docs/source/features/lora-dynamic-loading.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,13 @@ Here's one model adapter example.
name: llama-2-7b-sql-lora-test
namespace: aibrix-system
labels:
model.aibrix.ai: "llama-2-7b-sql-lora-test"
model.aibrix.ai/name: "llama-2-7b-sql-lora-test"
model.aibrix.ai/port: "8000"
spec:
baseModel: llama2-70b
podSelector:
matchLabels:
model.aibrix.ai: llama2-70b
model.aibrix.ai/name: llama2-70b
artifactURL: huggingface://yard1/llama-2-7b-sql-lora-test
schedulerName: default
Expand Down
11 changes: 6 additions & 5 deletions docs/source/getting_started/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`.
kind: Deployment
metadata:
labels:
model.aibrix.ai: llama-2-7b-hf
model.aibrix.ai/name: llama-2-7b-hf
model.aibrix.ai/port: "8000"
adapter.model.aibrix.ai/enabled: true
name: llama-2-7b-hf
namespace: aibrix-system
spec:
replicas: 1
selector:
matchLabels:
model.aibrix.ai: llama-2-7b-hf
model.aibrix.ai/name: llama-2-7b-hf
strategy:
rollingUpdate:
maxSurge: 25%
Expand All @@ -44,7 +45,7 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`.
template:
metadata:
labels:
model.aibrix.ai: llama-2-7b-hf
model.aibrix.ai/name: llama-2-7b-hf
spec:
containers:
- command:
Expand Down Expand Up @@ -112,7 +113,7 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`.
kind: Service
metadata:
labels:
model.aibrix.ai: llama-2-7b-hf
model.aibrix.ai/name: llama-2-7b-hf
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
Expand All @@ -130,7 +131,7 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`.
protocol: TCP
targetPort: 8080
selector:
model.aibrix.ai: llama-2-7b-hf
model.aibrix.ai/name: llama-2-7b-hf
type: ClusterIP
Register a user to authenticate the gateway
Expand Down
4 changes: 2 additions & 2 deletions docs/tutorial/lora/model_adapter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ metadata:
name: lora-1
namespace: aibrix-system
labels:
model.aibrix.ai: "lora-1"
model.aibrix.ai/name: "lora-1"
model.aibrix.ai/port: "8000"
spec:
baseModel: llama2-70b
podSelector:
matchLabels:
model.aibrix.ai: llama2-70b
model.aibrix.ai/name: llama2-70b
artifactURL: huggingface://yard1/llama-2-7b-sql-lora-test
schedulerName: default
# ---
Expand Down
1 change: 1 addition & 0 deletions docs/tutorial/runtime/runtime-hf-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
adapter.model.aibrix.ai/enabled: "true"
name: aibricks-model-deepseek-coder-6.7b-instruct
namespace: default
spec:
Expand Down
1 change: 1 addition & 0 deletions docs/tutorial/runtime/runtime-s3-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
adapter.model.aibrix.ai/enabled: "true"
name: aibricks-model-deepseek-coder-6.7b-instruct
namespace: default
spec:
Expand Down
1 change: 1 addition & 0 deletions docs/tutorial/runtime/runtime-tos-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
adapter.model.aibrix.ai/enabled: "true"
name: aibricks-model-deepseek-coder-6.7b-instruct
namespace: default
spec:
Expand Down
2 changes: 1 addition & 1 deletion pkg/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ var (
)

const (
modelIdentifier = "model.aibrix.ai"
modelIdentifier = "model.aibrix.ai/name"
)

func GetCache() (*Cache, error) {
Expand Down
19 changes: 10 additions & 9 deletions pkg/controller/modeladapter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,17 @@ metadata:
name: deepseek-33b-instruct
namespace: default
labels:
model.aibrix.ai: deepseek-33b-instruct
model.aibrix.ai/name: deepseek-33b-instruct
adapter.model.aibrix.ai/enabled: "true"
spec:
replicas: 1
selector:
matchLabels:
model.aibrix.ai: deepseek-33b-instruct
model.aibrix.ai/name: deepseek-33b-instruct
template:
metadata:
labels:
model.aibrix.ai: deepseek-33b-instruct
model.aibrix.ai/name: deepseek-33b-instruct
spec:
containers:
- name: deepseek-33b-instruct
Expand Down Expand Up @@ -59,7 +60,7 @@ spec:
baseModel: llama2-70b
podSelector:
matchLabels:
model.aibrix.ai: llama2-70b
model.aibrix.ai/name: llama2-70b
schedulerName: default-model-adapter-scheduler
status:
phase: Configuring
Expand All @@ -71,8 +72,8 @@ kind: Service
metadata:
creationTimestamp: "2024-07-14T21:42:57Z"
labels:
model.aibrix.ai/base-model: llama2-70b
model.aibrix.ai/model-adapter: text2sql-lora-1
model.aibrix.ai/name: llama2-70b
adapter.model.aibrix.ai/name: text2sql-lora-1
name: text2sql-lora-1
namespace: default
ownerReferences:
Expand All @@ -99,7 +100,7 @@ spec:
targetPort: 8000
publishNotReadyAddresses: true
selector:
model.aibrix.ai: llama2-70b
model.aibrix.ai/name: llama2-70b
sessionAffinity: None
type: ClusterIP
status:
Expand Down Expand Up @@ -152,8 +153,8 @@ metadata:
endpoints.kubernetes.io/last-change-trigger-time: "2024-07-14T21:42:57Z"
creationTimestamp: "2024-07-14T21:42:57Z"
labels:
model.aibrix.ai/base-model: llama2-70b
model.aibrix.ai/model-adapter: text2sql-lora-1
model.aibrix.ai/name: llama2-70b
adapter.model.aibrix.ai/name: text2sql-lora-1
service.kubernetes.io/headless: ""
name: text2sql-lora-1
namespace: default
Expand Down
Loading

0 comments on commit b8bbe6d

Please sign in to comment.