From 4c4dfa8d0cef6230446195f16af553f8dde576d8 Mon Sep 17 00:00:00 2001
From: Fan Ye <fanyecourage@google.com>
Date: Mon, 9 Sep 2024 06:43:09 -0700
Subject: [PATCH] Add Dockerfile for LIT Vertex AI demo

This Dockerfile can be used to build a container image for a LIT Vertex AI demo. The image includes all of the necessary dependencies for running a LIT demo, including the LIT Python package, the Vertex AI SDK, and the gunicorn web server.

The Dockerfile also includes a script for starting the LIT demo. The script takes several arguments, including the name of the demo, the port on which the demo should listen, and the location of the data files for the demo.

The Dockerfile can be used to build a container image for a LIT Vertex AI demo. The image can then be deployed to a Vertex AI endpoint.

PiperOrigin-RevId: 672527408
---
 .dockerignore                                |  1 +
 lit_nlp/examples/vertexai/.dockerignore      |  1 +
 lit_nlp/examples/vertexai/Dockerfile         | 77 ++++++++++++++++++++
 lit_nlp/examples/vertexai/demo.py            | 42 +++++++++--
 lit_nlp/examples/vertexai/gunicorn_config.py | 25 +++++++
 lit_nlp/examples/vertexai/requirements.txt   | 20 +++++
 pyproject.toml                               | 13 +++-
 requirements_examples.txt                    |  1 -
 8 files changed, 171 insertions(+), 9 deletions(-)
 create mode 100644 lit_nlp/examples/vertexai/.dockerignore
 create mode 100644 lit_nlp/examples/vertexai/Dockerfile
 create mode 100644 lit_nlp/examples/vertexai/gunicorn_config.py
 create mode 100644 lit_nlp/examples/vertexai/requirements.txt

diff --git a/.dockerignore b/.dockerignore
index 553582db..d32e30c0 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,3 +9,4 @@ __pycache__
 **/*.pyo
 **/*.pyd
 **/__pycache__
+lit_nlp/examples/vertexai/models_test.py
diff --git a/lit_nlp/examples/vertexai/.dockerignore b/lit_nlp/examples/vertexai/.dockerignore
new file mode 100644
index 00000000..f8d39fc6
--- /dev/null
+++ b/lit_nlp/examples/vertexai/.dockerignore
@@ -0,0 +1 @@
+models_test.py
\ No newline at end of file
diff --git a/lit_nlp/examples/vertexai/Dockerfile b/lit_nlp/examples/vertexai/Dockerfile
new file mode 100644
index 00000000..04244d1d
--- /dev/null
+++ b/lit_nlp/examples/vertexai/Dockerfile
@@ -0,0 +1,77 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Use the official lightweight Python image.
+# https://hub.docker.com/_/python
+
+# TODO(faneycourage): Add a readme file for the demo.
+
+# ---- LIT Base Container ----
+
+FROM python:3.11-slim AS lit-nlp-base
+
+# Update Ubuntu packages and install basic utils
+RUN apt-get update
+RUN apt-get install -y wget curl gnupg2 gcc g++ git
+
+# Copy local code to the container image.
+ENV APP_HOME /app
+WORKDIR $APP_HOME
+
+COPY ./lit_nlp/examples/vertexai/gunicorn_config.py ./
+
+
+
+# ---- LIT Container for Hosted Demos ----
+
+FROM lit-nlp-base AS lit-nlp-prod
+
+RUN python -m pip install 'lit-nlp[vertexai]'
+
+WORKDIR $APP_HOME
+ENTRYPOINT ["gunicorn", "--config=gunicorn_config.py"]
+
+
+
+# ---- LIT Container for Developing and Testing Hosted Demos ----
+
+FROM lit-nlp-base AS lit-nlp-dev
+
+# Install yarn
+RUN curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add -
+RUN echo "deb https://dl.yarnpkg.com/debian/ stable main" | \
+    tee /etc/apt/sources.list.d/yarn.list
+RUN apt update && apt -y install yarn
+
+# Set up python environment with production dependencies
+# This step is slow as it installs many packages.
+COPY requirements_core.txt ./
+RUN python -m pip install -r requirements_core.txt
+
+COPY lit_nlp/examples/vertexai/requirements.txt lit_nlp/examples/vertexai/requirements.txt
+RUN python -m pip install -r lit_nlp/examples/vertexai/requirements.txt
+
+# Copy the rest of the lit_nlp package
+COPY . ./
+
+# Build front-end with yarn
+WORKDIR $APP_HOME/lit_nlp/client
+ENV NODE_OPTIONS "--openssl-legacy-provider"
+RUN yarn && yarn build && rm -rf node_modules/*
+
+# Run LIT server
+# Note that the config file supports configuring the LIT demo that is launched
+# via the DEMO_NAME and DEMO_PORT environment variables.
+WORKDIR $APP_HOME
+ENTRYPOINT ["gunicorn", "--config=gunicorn_config.py"]
\ No newline at end of file
diff --git a/lit_nlp/examples/vertexai/demo.py b/lit_nlp/examples/vertexai/demo.py
index 806bb312..70c5cd46 100644
--- a/lit_nlp/examples/vertexai/demo.py
+++ b/lit_nlp/examples/vertexai/demo.py
@@ -39,9 +39,11 @@
     --alsologtostderr
 
 Then navigate to localhost:5432 to access the demo UI.
+
 """
 
 from collections.abc import Sequence
+import os
 import sys
 from typing import Optional
 from absl import app
@@ -54,16 +56,16 @@
 from lit_nlp.examples.prompt_debugging import datasets as prompt_debugging_datasets
 from lit_nlp.examples.vertexai import models as vertexai_models
 
-FLAGS = flags.FLAGS
+_FLAGS = flags.FLAGS
 
 # Define GCP project information and vertex AI API key.
-LOCATION = flags.DEFINE_string(
+_LOCATION = flags.DEFINE_string(
     'project_location',
     None,
     'Please enter your GCP project location',
     required=True,
 )
-PROJECT_ID = flags.DEFINE_string(
+_PROJECT_ID = flags.DEFINE_string(
     'project_id',
     None,
     'Please enter your project id',
@@ -111,8 +113,36 @@
 
 def get_wsgi_app() -> Optional[dev_server.LitServerType]:
   """Return WSGI app for container-hosted demos."""
-  FLAGS.set_default('server_type', 'external')
-  FLAGS.set_default('demo_mode', True)
+  _FLAGS.set_default('server_type', 'external')
+  _FLAGS.set_default('demo_mode', True)
+
+  location = os.getenv('PROJECT_LOCATION', None)
+  _FLAGS['project_location'].value = location
+
+  project_id = os.getenv('PROJECT_ID', None)
+  _FLAGS['project_id'].value = project_id
+
+  gemini_models = os.getenv('GEMINI_MODELS', None)
+  if gemini_models:
+    gemini_model_list = gemini_models.split(',')
+    _FLAGS['gemini_models'].value = gemini_model_list
+
+  generative_model_endpoints = os.getenv('GENERATIVE_MODEL_ENDPOINTS', None)
+  if generative_model_endpoints:
+    generative_model_endpoints_list = generative_model_endpoints.split(',')
+    _FLAGS['generative_model_endpoints'].value = (
+        generative_model_endpoints_list
+    )
+
+  datasets = os.getenv('DATASETS', None)
+  if datasets:
+    datasets_list = datasets.split(',')
+    _FLAGS['datasets'].value = datasets_list
+
+  max_examples = os.getenv('MAX_EXAMPLES', None)
+  if max_examples:
+    _FLAGS['max_examples'].value = int(max_examples)
+
   # Parse flags without calling app.run(main), to avoid conflict with
   # gunicorn command line flags.
   unused = flags.FLAGS(sys.argv, known_only=True)
@@ -127,7 +157,7 @@ def main(argv: Sequence[str]) -> Optional[dev_server.LitServerType]:
   if len(argv) > 1:
     raise app.UsageError('Too many command-line arguments.')
 
-  vertexai.init(project=PROJECT_ID.value, location=LOCATION.value)
+  vertexai.init(project=_PROJECT_ID.value, location=_LOCATION.value)
 
   models = {}
   if _GEMINI_MODELS.value:
diff --git a/lit_nlp/examples/vertexai/gunicorn_config.py b/lit_nlp/examples/vertexai/gunicorn_config.py
new file mode 100644
index 00000000..92d87851
--- /dev/null
+++ b/lit_nlp/examples/vertexai/gunicorn_config.py
@@ -0,0 +1,25 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""gunicorn configuration for cloud-hosted demos."""
+
+import os
+
+_DEMO_PORT = os.getenv('DEMO_PORT', '5432')
+
+bind = f'0.0.0.0:{_DEMO_PORT}'
+timeout = 3600
+threads = 8
+worker_class = 'gthread'
+wsgi_app = 'lit_nlp.examples.vertexai.demo:get_wsgi_app()'
diff --git a/lit_nlp/examples/vertexai/requirements.txt b/lit_nlp/examples/vertexai/requirements.txt
new file mode 100644
index 00000000..e43911f7
--- /dev/null
+++ b/lit_nlp/examples/vertexai/requirements.txt
@@ -0,0 +1,20 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+-r ../../../requirements_core.txt
+
+google-cloud-aiplatform>=1.60.0
+gunicorn>=20.1.0
+vertexai>=1.49.0
diff --git a/pyproject.toml b/pyproject.toml
index 02376ff8..df70d54b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,19 +77,28 @@ keywords = [
 ]
 
 [project.optional-dependencies]
+examples-common = [
+  "gunicorn>=20.1.0",
+]
 # LINT.IfChange
 examples = [
-  "gunicorn>=20.1.0",
+  "lit-nlp[examples-common]",
   "sentencepiece==0.1.99",
   "tensorflow>=2.10.0,<2.16.0",
   "tensorflow-datasets>=4.9.0",
   "tensorflow-text>=2.10.0,<2.16.0",
   "torch>=2.0.0",
   "transformers>=4.27.1",
-  "vertexai>=1.49.0",
 ]
 # LINT.ThenChange(./requirements_examples.txt)
 # LINT.IfChange
+vertexai = [
+  "lit-nlp[examples-common]",
+  "google-cloud-aiplatform>=1.60.0",
+  "vertexai>=1.49.0",
+]
+# LINT.ThenChange(./lit_nlp/examples/vertexai/requirements.txt)
+# LINT.IfChange
 test = [
   "lime==0.2.0.1",
   "pytest>=7.4.0,<8.0.0",
diff --git a/requirements_examples.txt b/requirements_examples.txt
index 6dd36176..3015bfcb 100644
--- a/requirements_examples.txt
+++ b/requirements_examples.txt
@@ -20,5 +20,4 @@ tensorflow-datasets>=4.9.0
 tensorflow-text>=2.10.0,<2.16.0
 torch>=2.0.0
 transformers>=4.27.1
-vertexai>=1.49.0
 # LINT.ThenChange(./pyproject.toml)