feat(batch-jobs): Add step to pass API server env vars to spark jobs #2222
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Turing CI | |
on: | |
# Automatically run CI on Release and Pre-Release tags and main branch | |
# (except changes to non-relevant paths) | |
push: | |
tags: | |
- "v[0-9]+.[0-9]+.[0-9]+*" | |
branches: | |
- main | |
paths-ignore: | |
- "docs/**" | |
- "engines/pyfunc-ensembler-job/**" | |
- "engines/pyfunc-ensembler-service/**" | |
- "scripts/fluentd-bigquery/**" | |
- "sdk/**" | |
- ".github/workflows/pyfunc-ensembler-job.yaml" | |
- ".github/workflows/pyfunc-ensembler-service.yaml" | |
- ".github/workflows/sdk.yaml" | |
- ".github/workflows/cluster-init.yaml" | |
# Automatically run CI on branches, that have active PR opened | |
pull_request: | |
branches: | |
- main | |
paths-ignore: | |
- "docs/**" | |
- "engines/pyfunc-ensembler-job/**" | |
- "engines/pyfunc-ensembler-service/**" | |
- "scripts/fluentd-bigquery/**" | |
- ".github/workflows/pyfunc-ensembler-job.yaml" | |
- ".github/workflows/pyfunc-ensembler-service.yaml" | |
- ".github/workflows/sdk.yaml" | |
# To make it possible to trigger e2e CI workflow for any arbitrary git ref | |
workflow_dispatch: | |
env: | |
ARTIFACT_RETENTION_DAYS: 7 | |
GO_VERSION: "1.22" | |
GO_LINT_VERSION: v1.56.2 | |
CLUSTER_NAME: turing-e2e | |
ISTIO_VERSION: 1.9.9 | |
KNATIVE_VERSION: 1.7.4 | |
KNATIVE_ISTIO_VERSION: 1.7.1 | |
LOCAL_REGISTRY: registry.localhost:5000 | |
jobs: | |
build-api: | |
runs-on: ubuntu-latest | |
outputs: | |
api-version: ${{ steps.build-image.outputs.api-version }} | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
# This is needed due to the issue raised here: https://github.com/golang/go/issues/61455 | |
- name: Install Go ${{ env.GO_VERSION }} | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache: false | |
cache-dependency-path: api/go.sum | |
- name: Build Docker image | |
id: build-image | |
working-directory: api | |
run: | | |
set -o pipefail | |
make build-image | tee output.log | |
echo "::set-output name=api-version::$(sed -n 's%turing-api version: \(.*\)%\1%p' output.log)" | |
- name: Save Docker image | |
run: | | |
docker image save \ | |
--output turing-api.${{ steps.build-image.outputs.api-version }}.tar \ | |
turing-api:${{ steps.build-image.outputs.api-version }} | |
- name: Publish Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: turing-api.${{ steps.build-image.outputs.api-version }}.tar | |
path: turing-api.${{ steps.build-image.outputs.api-version }}.tar | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
build-router: | |
runs-on: ubuntu-latest | |
outputs: | |
router-version: ${{ steps.build-image.outputs.router-version }} | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
# This is needed due to the issue raised here: https://github.com/golang/go/issues/61455 | |
- name: Install Go ${{ env.GO_VERSION }} | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache: false | |
cache-dependency-path: api/go.sum | |
- name: Build Docker image | |
id: build-image | |
working-directory: engines/router | |
run: | | |
set -o pipefail | |
make build-image | tee output.log | |
echo "::set-output name=router-version::$(sed -n 's%turing-router version: \(.*\)%\1%p' output.log)" | |
- name: Save Docker image | |
run: | | |
docker image save \ | |
--output turing-router.${{ steps.build-image.outputs.router-version }}.tar \ | |
turing-router:${{ steps.build-image.outputs.router-version }} | |
- name: Publish Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: turing-router.${{ steps.build-image.outputs.router-version }}.tar | |
path: turing-router.${{ steps.build-image.outputs.router-version }}.tar | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
build-test-experiment-engine-plugin: | |
runs-on: ubuntu-latest | |
env: | |
VERSION: latest | |
outputs: | |
test-experiment-engine-plugin-version: ${{ env.VERSION }} | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v4 | |
# This is needed due to the issue raised here: https://github.com/golang/go/issues/61455 | |
- name: Install Go ${{ env.GO_VERSION }} | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache: false | |
- name: Build Docker image | |
working-directory: engines/experiment/examples/plugins/hardcoded | |
run: | | |
set -o pipefail | |
make build-image | |
- name: Save Docker image | |
run: | | |
docker image save \ | |
--output test-experiment-engine-plugin.${{ env.VERSION }}.tar \ | |
plugin-example-engine-plugin:${{ env.VERSION }} | |
- name: Publish Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: test-experiment-engine-plugin.${{ env.VERSION }}.tar | |
path: test-experiment-engine-plugin.${{ env.VERSION }}.tar | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
build-ui: | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
working-directory: ui | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v4 | |
- name: Install system dependencies | |
run: sudo apt-get install --no-install-recommends gcc make libpng-dev | |
- name: Set up Node 20.x | |
uses: actions/setup-node@v4 | |
with: | |
node-version: "20.x" | |
cache: yarn | |
cache-dependency-path: ui/yarn.lock | |
- name: Install | |
run: NODE_OPTIONS=--openssl-legacy-provider yarn install --network-concurrency 1 | |
- name: Lint code | |
run: yarn lint | |
- name: Build UI | |
env: | |
NODE_OPTIONS: "--max_old_space_size=4096" | |
run: yarn build | |
- name: Publish Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: turing-ui-dist | |
path: ui/build/ | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
build-cluster-init: | |
runs-on: ubuntu-latest | |
outputs: | |
cluster-init-version: ${{ steps.build-cluster-init.outputs.cluster-init-version }} | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Run action build-cluster-init | |
id: build-cluster-init | |
uses: ./.github/actions/build-cluster-init | |
with: | |
artifact_retention_days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
test-api: | |
runs-on: ubuntu-latest | |
env: | |
GOPATH: ${{ github.workspace }}/api/.go | |
needs: | |
- build-api | |
services: | |
postgres: | |
image: postgres:13-alpine | |
env: | |
POSTGRES_DB: turing | |
POSTGRES_USER: turing-admin | |
POSTGRES_PASSWORD: secret | |
options: >- | |
--health-cmd pg_isready | |
--health-interval 10s | |
--health-timeout 5s | |
--health-retries 5 | |
ports: | |
- 5432:5432 | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v4 | |
- name: Set up Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache-dependency-path: api/go.sum | |
- name: Setup | |
working-directory: api | |
run: make setup | |
- name: Run test | |
working-directory: api | |
env: | |
DATABASE_HOST: localhost | |
DATABASE_NAME: turing | |
DATABASE_USER: turing-admin | |
DATABASE_PASSWORD: secret | |
run: make test | |
- name: Lint code | |
uses: golangci/golangci-lint-action@v3 | |
with: | |
version: ${{ env.GO_LINT_VERSION }} | |
working-directory: api | |
args: --timeout 3m --verbose | |
test-engines-router: | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
working-directory: engines/router | |
env: | |
GOPATH: ${{ github.workspace }}/engines/router/.go | |
needs: | |
- build-router | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v4 | |
- name: Set up Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache-dependency-path: engines/router/go.sum | |
- name: Setup | |
run: make setup | |
- name: Run test | |
run: make test | |
- name: Run Benchmark | |
run: make benchmark | |
- name: Lint code | |
uses: golangci/golangci-lint-action@v3 | |
with: | |
version: ${{ env.GO_LINT_VERSION }} | |
working-directory: engines/router | |
skip-go-installation: true | |
args: --verbose | |
test-engines-experiment: | |
runs-on: ubuntu-latest | |
env: | |
GOPATH: ${{ github.workspace }}/engines/experiment/.go | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v4 | |
- name: Set up Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache-dependency-path: engines/experiment/go.sum | |
- name: Run test | |
working-directory: engines/experiment | |
run: make test | |
- name: Lint code | |
uses: golangci/golangci-lint-action@v2 | |
with: | |
version: ${{ env.GO_LINT_VERSION }} | |
working-directory: engines/experiment | |
skip-go-installation: true | |
args: --verbose | |
test-e2e: | |
runs-on: ubuntu-latest | |
env: | |
CLUSTER_INIT_VERSION: ${{ needs.build-cluster-init.outputs.cluster-init-version }} | |
TURING_API_VERSION: ${{ needs.build-api.outputs.api-version }} | |
TURING_ROUTER_VERSION: ${{ needs.build-router.outputs.router-version }} | |
TEST_EXPERIMENT_ENGINE_PLUGIN_VERSION: ${{ needs.build-test-experiment-engine-plugin.outputs.test-experiment-engine-plugin-version }} | |
needs: | |
- build-api | |
- build-router | |
- build-test-experiment-engine-plugin | |
- build-cluster-init | |
strategy: | |
fail-fast: false | |
matrix: | |
name: | |
[ | |
"In-cluster credentials; API e2e", | |
"Remote cluster credentials; SDK e2e", | |
] | |
include: | |
- name: "In-cluster credentials; API e2e" | |
useInClusterConfig: true | |
valuesFile: "turing.values.in-cluster.yaml" | |
useSDK: false | |
- name: "Remote cluster credentials; SDK e2e" | |
useInClusterConfig: false | |
valuesFile: "turing.values.remote.yaml" | |
useSDK: true | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v4 | |
- name: Set Up Test Cluster | |
uses: ./.github/actions/setup-test-cluster | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
turing_api_tar_archive_name: turing-api.${{ env.TURING_API_VERSION }}.tar | |
turing_router_tar_archive_name: turing-router.${{ env.TURING_ROUTER_VERSION }}.tar | |
experiment_engine_plugin_archive_name: test-experiment-engine-plugin.${{ env.TEST_EXPERIMENT_ENGINE_PLUGIN_VERSION }}.tar | |
cluster_init_tar_archive_name: cluster-init.${{ env.CLUSTER_INIT_VERSION }}.tar | |
use_in_cluster_config: ${{ matrix.useInClusterConfig }} | |
values_file: ${{ matrix.valuesFile }} | |
cluster_name: ${{ env.CLUSTER_NAME }} | |
istio_version: ${{ env.ISTIO_VERSION }} | |
knative_version: ${{ env.KNATIVE_VERSION }} | |
knative_istio_version: ${{ env.KNATIVE_ISTIO_VERSION }} | |
local_registry: ${{ env.LOCAL_REGISTRY }} | |
cluster_init_version: ${{ env.CLUSTER_INIT_VERSION }} | |
- name: Set up Go | |
if: ${{ !matrix.useSDK }} | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache-dependency-path: api/go.sum | |
- name: Setup Python | |
if: ${{ matrix.useSDK }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.9 | |
cache-dependency-path: sdk/requirements.txt | |
- name: Install Python dependencies | |
if: ${{ matrix.useSDK }} | |
working-directory: sdk | |
run: make setup | |
- name: Run End-to-End Test Suite with Turing API | |
if: ${{ !matrix.useSDK }} | |
id: run-e2e-test | |
working-directory: api | |
env: | |
GOPATH: ${{ github.workspace }}/api/.go | |
run: | | |
make setup-e2e | |
make test-e2e | |
- name: Run End-to-End Test Suite with Turing SDK | |
if: ${{ matrix.useSDK }} | |
id: run-e2e-test-sdk | |
working-directory: sdk | |
env: | |
GOPATH: ${{ github.workspace }}/api/.go | |
TEST_ID: ${{ github.run_id }} | |
MOCKSERVER_HTTP_ENDPOINT: http://mockserver | |
MOCKSERVER_UPI_CONTROL_ENDPOINT: mockserver-upi-control:80 | |
MOCKSERVER_UPI_A_ENDPOINT: mockserver-upi-a:80 | |
API_BASE_PATH: http://turing-gateway.127.0.0.1.nip.io/api/turing/ | |
MODEL_CLUSTER_NAME: "dev" | |
PROJECT_ID: "1" | |
PROJECT_NAME: default | |
KUBECONFIG_USE_LOCAL: true | |
TEST_ECHO_IMAGE: eexit/mirror-http-server:1.1.3 | |
KSERVICE_DOMAIN: 127.0.0.1.nip.io | |
run: make e2e-sdk | |
- if: (steps.run-e2e-test.outcome == 'failure' || steps.run-e2e-test-sdk.outcome == 'failure') && always() | |
name: "Debug Deployment Failure" | |
run: | | |
echo "::group::describe deployment/turing-mlp" | |
kubectl describe deployment/turing-mlp | |
echo "::endgroup::" | |
echo "::group::describe deployment/turing-merlin" | |
kubectl describe deployment/turing-merlin | |
echo "::endgroup::" | |
echo "::group::describe deployment/turing" | |
kubectl describe deployment/turing | |
echo "::endgroup::" | |
echo "::group::secret/turing-api-config" | |
kubectl get secret/turing-api-config -o jsonpath='{.data.config\.yaml}' | base64 --decode | |
echo "::endgroup::" | |
echo "::group::logs deployment/turing-mlp" | |
kubectl logs deployment/turing-mlp | |
echo "::endgroup::" | |
echo "::group::logs deployment/turing-merlin" | |
kubectl logs deployment/turing-merlin | |
echo "::endgroup::" | |
echo "::group::logs deployment/turing" | |
kubectl logs deployment/turing | |
echo "::endgroup::" | |
echo "::group::kubernetes events" | |
kubectl get events | |
echo "::endgroup::" | |
echo "::group::kubernetes deployment" | |
kubectl get deploy | |
echo "::endgroup::" | |
echo "::group::knative serving deployment" | |
kubectl get ksvc | |
echo "::endgroup::" | |
echo "::group::kubernetes pod describe" | |
kubectl describe pod | |
echo "::endgroup::" | |
echo "::group::kubernetes config map" | |
kubectl get cm -o yaml | |
echo "::endgroup::" | |
echo "::group::kubernetes pod logs" | |
kubectl get pod | awk '{print $1}' | grep -v "NAME" | while read p; echo "POD LOGS $p"; do kubectl logs "$p" --all-containers; done; | |
echo "::endgroup::" | |
release-rules: | |
runs-on: ubuntu-latest | |
outputs: | |
release-type: ${{ steps.release-rules.outputs.release-type }} | |
steps: | |
- uses: actions/checkout@v4 | |
- id: release-rules | |
uses: ./.github/actions/release-rules | |
publish: | |
# Automatically publish release and pre-release artifacts. | |
# | |
# As for dev releases, make it possible to publish artifacts | |
# manually by approving 'deployment' in the 'manual' environment. | |
# | |
# Dev build can be released either from the 'main' branch or | |
# by running this workflow manually with `workflow_dispatch` event. | |
if: >- | |
contains('release,pre-release', needs.release-rules.outputs.release-type) | |
|| ( github.event_name != 'pull_request' ) | |
|| ( github.event.pull_request.head.repo.full_name == github.repository ) | |
needs: | |
- build-router | |
- build-api | |
- build-ui | |
- release-rules | |
- test-e2e | |
- test-api | |
- test-engines-router | |
- test-engines-experiment | |
uses: ./.github/workflows/turing-publish.yaml | |
with: | |
api_version: ${{ needs.build-api.outputs.api-version }} | |
router_version: ${{ needs.build-router.outputs.router-version }} | |
environment: ${{ needs.release-rules.outputs.release-type == 'dev' && 'manual' || '' }} | |
secrets: | |
ghcr_token: ${{ secrets.GITHUB_TOKEN }} |