From 08cb92a37b5319e1275dc455f7fc37de2e9f2921 Mon Sep 17 00:00:00 2001 From: Kartik Kalamadi Date: Mon, 25 Apr 2022 17:57:11 -0700 Subject: [PATCH] Point to AWS website for v1.5 (#3236) --- content/en/docs/distributions/aws/_index.md | 4 + .../aws/component-guides/_index.md | 5 - .../aws/component-guides/files/rds.yaml | 127 ------------- .../aws/component-guides/notebook-server.md | 28 --- .../aws/component-guides/pipeline.md | 169 ------------------ .../aws/customizing-aws/_index.md | 5 - .../distributions/aws/customizing-aws/rds.md | 62 ------- .../docs/distributions/aws/deploy/_index.md | 5 - .../aws/deploy/eks-compatibility.md | 44 ----- .../docs/distributions/aws/deploy/features.md | 25 --- .../aws/deploy/install-kubeflow.md | 104 ----------- .../aws/deploy/uninstall-kubeflow.md | 36 ---- .../distributions/aws/troubleshooting-aws.md | 93 ---------- 13 files changed, 4 insertions(+), 703 deletions(-) delete mode 100644 content/en/docs/distributions/aws/component-guides/_index.md delete mode 100644 content/en/docs/distributions/aws/component-guides/files/rds.yaml delete mode 100644 content/en/docs/distributions/aws/component-guides/notebook-server.md delete mode 100644 content/en/docs/distributions/aws/component-guides/pipeline.md delete mode 100644 content/en/docs/distributions/aws/customizing-aws/_index.md delete mode 100644 content/en/docs/distributions/aws/customizing-aws/rds.md delete mode 100644 content/en/docs/distributions/aws/deploy/_index.md delete mode 100644 content/en/docs/distributions/aws/deploy/eks-compatibility.md delete mode 100644 content/en/docs/distributions/aws/deploy/features.md delete mode 100644 content/en/docs/distributions/aws/deploy/install-kubeflow.md delete mode 100644 content/en/docs/distributions/aws/deploy/uninstall-kubeflow.md delete mode 100644 content/en/docs/distributions/aws/troubleshooting-aws.md diff --git a/content/en/docs/distributions/aws/_index.md b/content/en/docs/distributions/aws/_index.md index be0c2ebb61..6284f2c84d 100644 --- a/content/en/docs/distributions/aws/_index.md +++ b/content/en/docs/distributions/aws/_index.md @@ -3,3 +3,7 @@ title = "Kubeflow on AWS" description = "Running Kubeflow on Amazon EKS and Amazon Web Services" weight = 20 +++ + +[Kubeflow on AWS](https://awslabs.github.io/kubeflow-manifests/) is an open source distribution of Kubeflow that allows customers to build machine learning systems with ready-made AWS service integrations. Use Kubeflow on AWS to streamline data science tasks and build highly reliable, secure, and scalable machine learning systems with reduced operational overheads. + +For more information, see the [Kubeflow on AWS documentation](https://awslabs.github.io/kubeflow-manifests/docs/). diff --git a/content/en/docs/distributions/aws/component-guides/_index.md b/content/en/docs/distributions/aws/component-guides/_index.md deleted file mode 100644 index b23209ba7d..0000000000 --- a/content/en/docs/distributions/aws/component-guides/_index.md +++ /dev/null @@ -1,5 +0,0 @@ -+++ -title = "Components" -description = "Explore component guides for Kubeflow on AWS" -weight = 20 -+++ diff --git a/content/en/docs/distributions/aws/component-guides/files/rds.yaml b/content/en/docs/distributions/aws/component-guides/files/rds.yaml deleted file mode 100644 index e28e9bbe21..0000000000 --- a/content/en/docs/distributions/aws/component-guides/files/rds.yaml +++ /dev/null @@ -1,127 +0,0 @@ -AWSTemplateFormatVersion: '2010-09-09' -Description: 'AWS CloudFormation Sample Template showing how to create an RDS DBInstance - in an existing Virtual Private Cloud (VPC). - **WARNING** This template creates an Amazon Relational Database Service database - instance. You will be billed for the AWS resources used if you create a stack from - this template.' -Parameters: - VpcId: - Type: AWS::EC2::VPC::Id - Description: VpcId of your existing Virtual Private Cloud (VPC) - ConstraintDescription: must be the VPC Id of an existing Virtual Private Cloud. - Subnets: - Type: 'List' - Description: The list of SubnetIds, for at least two Availability Zones in the - region in your Virtual Private Cloud (VPC) - ConstraintDescription: Select at least two SubnetIds that are Private - SecurityGroupId: - Type: 'List' - Description: SecurityGroup Id of your EKS Worker Node - ConstraintDescription: must be SecurityGroupId of an existing Instance - DBName: - Default: kubeflow - Description: Database name for Kubeflow - Type: String - MinLength: '1' - MaxLength: '64' - AllowedPattern: "[a-zA-Z][a-zA-Z0-9]*" - ConstraintDescription: must begin with a letter and contain only alphanumeric characters. - DBUsername: - Default: admin - NoEcho: 'true' - Description: The database admin account username - Type: String - MinLength: '1' - MaxLength: '16' - AllowedPattern: "[a-zA-Z][a-zA-Z0-9]*" - ConstraintDescription: must begin with a letter and contain only alphanumeric characters. - DBPassword: - Default: Kubefl0w - NoEcho: 'true' - Description: The database admin account password - Type: String - MinLength: '8' - MaxLength: '41' - AllowedPattern: ".*" - ConstraintDescription: must contain only alphanumeric characters. - DBClass: - Default: db.m5.large - Description: Database instance class - Type: String - AllowedValues: - - db.m5.large - - db.m5.xlarge - - db.m5.2xlarge - - db.m5.4xlarge - - db.m5.12xlarge - ConstraintDescription: must select a valid database instance type. - DBAllocatedStorage: - Default: '20' - Description: The size of the database (Gb) - Type: Number - MinValue: '20' - MaxValue: '65536' - ConstraintDescription: must be between 20 and 65536Gb. - MultiAZ: - Description: Multi-AZ master database - Type: String - Default: 'false' - AllowedValues: ['true', 'false'] - ConstraintDescription: must be true or false. -Resources: - MyDBSubnetGroup: - Type: AWS::RDS::DBSubnetGroup - Properties: - DBSubnetGroupDescription: Subnets available for the RDS DB Instance - SubnetIds: - Ref: Subnets - myVPCSecurityGroup: - Type: AWS::EC2::SecurityGroup - Properties: - GroupDescription: Security group for RDS DB Instance. - VpcId: - Ref: VpcId - MyDB: - Type: AWS::RDS::DBInstance - Properties: - DBName: - Ref: DBName - AllocatedStorage: - Ref: DBAllocatedStorage - DBInstanceClass: - Ref: DBClass - Engine: MySQL - EngineVersion: '8.0.17' - MultiAZ: - Ref: MultiAZ - MasterUsername: - Ref: DBUsername - MasterUserPassword: - Ref: DBPassword - DBSubnetGroupName: - Ref: MyDBSubnetGroup - VPCSecurityGroups: - Ref: SecurityGroupId - DeletionPolicy: Snapshot -Outputs: - RDSEndpoint: - Description: RDS Endpoint - Value: - Fn::GetAtt: - - MyDB - - Endpoint.Address - JDBCConnectionString: - Description: JDBC connection string for database - Value: - Fn::Join: - - '' - - - jdbc:mysql:// - - Fn::GetAtt: - - MyDB - - Endpoint.Address - - ":" - - Fn::GetAtt: - - MyDB - - Endpoint.Port - - "/" - - Ref: DBName diff --git a/content/en/docs/distributions/aws/component-guides/notebook-server.md b/content/en/docs/distributions/aws/component-guides/notebook-server.md deleted file mode 100644 index c3f60ba1b2..0000000000 --- a/content/en/docs/distributions/aws/component-guides/notebook-server.md +++ /dev/null @@ -1,28 +0,0 @@ -+++ -title = "AWS-Optimized Kubeflow Notebooks" -description = "Work in AWS-optimized Notebooks based on AWS Deep Learning Containers" -weight = 10 -+++ - -Installing Kubeflow on AWS includes AWS-optimized container images as default options for a Kubeflow Jupyter Notebook server. For more information on gettings started with Kubeflow Notebooks, see the [Quickstart Guide](https://www.kubeflow.org/docs/components/notebooks/quickstart-guide/). - -## AWS-optimized container images - -The following container images are available from the [Amazon Elastic Container Registry (Amazon ECR)](https://gallery.ecr.aws/c9e4w0g3/). - -``` -public.ecr.aws/c9e4w0g3/notebook-servers/jupyter-tensorflow:2.6.0-gpu-py38-cu112 -public.ecr.aws/c9e4w0g3/notebook-servers/jupyter-tensorflow:2.6.0-cpu-py38 -public.ecr.aws/c9e4w0g3/notebook-servers/jupyter-pytorch:1.9.0-gpu-py38-cu111 -public.ecr.aws/c9e4w0g3/notebook-servers/jupyter-pytorch:1.9.0-cpu-py38 -``` - -These images are based on [AWS Deep Learning Containers](https://docs.aws.amazon.com/deep-learning-containers/latest/devguide/what-is-dlc.html). AWS Deep Learning Containers provide optimized environments with popular machine learning frameworks such as TensorFlow and PyTorch, and are available in the Amazon ECR. For more information on AWS Deep Learning Container options, see [Available Deep Learning Containers Images](https://github.com/aws/deep-learning-containers/blob/master/available_images.md). - -Along with specific machine learning frameworks, these container images have additional pre-installed packages: -- `kfp` -- `kfserving` -- `h5py` -- `pandas` -- `awscli` -- `boto3` \ No newline at end of file diff --git a/content/en/docs/distributions/aws/component-guides/pipeline.md b/content/en/docs/distributions/aws/component-guides/pipeline.md deleted file mode 100644 index 85c303aa53..0000000000 --- a/content/en/docs/distributions/aws/component-guides/pipeline.md +++ /dev/null @@ -1,169 +0,0 @@ -+++ -title = "Kubeflow Pipelines on AWS" -description = "Get started with Kubeflow Pipelines on Amazon EKS" -weight = 20 -+++ - -For an overview of connecting to Kubeflow Pipelines using the SDK client, see [the Pipelines SDK guide](https://www.kubeflow.org/docs/components/pipelines/sdk/connect-api/). - -## Authenticate Kubeflow Pipelines using SDK inside cluster - -Refer to the following guide to connect to Kubeflow Pipelines from [inside your cluster](https://www.kubeflow.org/docs/components/pipelines/sdk/connect-api/#connect-to-kubeflow-pipelines-from-the-same-cluster). - -## Authenticate Kubeflow Pipelines using SDK outside cluster - -Refer to the following guide to connect to Kubeflow Pipelines from [outside your cluster](https://www.kubeflow.org/docs/components/pipelines/sdk/connect-api/#connect-to-kubeflow-pipelines-from-outside-your-cluster). - -Refer to the following steps to use `kfp` to pass a cookie from your browser after you log into Kubeflow. The following example uses a Chrome browser. - -KFP SDK Browser Cookie - -KFP SDK Browser Cookie Detail - -Once you get a cookie, authenticate `kfp` by passing the cookie from your browser. Use the session based on the appropriate manifest for your deployment, as done in the following examples. - -### **Dex** - -If you want to use port forwarding to access Kubeflow, run the following command and use `http://localhost:8080/pipeline` as the host. - -```bash -kubectl port-forward svc/istio-ingressgateway -n istio-system 8080:80 -``` - -Pass the cookie from your browser: -```bash -# This is the "Domain" in your cookies. Eg: "localhost:8080" or ".elb.amazonaws.com" -kubeflow_gateway_endpoint="" - -authservice_session_cookie="" - -namespace="" - -client = kfp.Client(host=f"http://{kubeflow_gateway_endpoint}/pipeline", cookies=f"authservice_session={authservice_session_cookie}") -client.list_experiments(namespace=namespace) -``` - -If you want to set up application load balancing (ALB) with Dex, see [Exposing Kubeflow over Load Balancer](https://github.com/awslabs/kubeflow-manifests/tree/v1.3-branch/distributions/aws/examples/vanilla#exposing-kubeflow-over-load-balancer) and use the ALB address as the Kubeflow Endpoint. - -To do programmatic authentication with Dex, refer to the following comments under [issue #140](https://github.com/kubeflow/kfctl/issues/140) in the `kfctl` repository: [#140 (comment)](https://github.com/kubeflow/kfctl/issues/140#issuecomment-578837304) and [#140 (comment)](https://github.com/kubeflow/kfctl/issues/140#issuecomment-719894529). - -### **Cognito** - -```bash -# This is the "Domain" in your cookies. eg: kubeflow. -kubeflow_gateway_endpoint="" - -alb_session_cookie0="" -alb_session_cookie1="" - -namespace="" - -client = kfp.Client(host=f"https://{kubeflow_gateway_endpoint}/pipeline", cookies=f"AWSELBAuthSessionCookie-0={alb_session_cookie0};AWSELBAuthSessionCookie-1={alb_session_cookie1}") -client.list_experiments(namespace=namespace) -``` - -## S3 Access from Kubeflow Pipelines - -It is recommended to use AWS credentials to manage S3 access for Kubeflow Pipelines. [IAM Role for Service Accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) requires applications to use the latest AWS SDK to support the `assume-web-identity-role`. This requirement is in development, and progress can be tracked in the [open GitHub issue](https://github.com/kubeflow/pipelines/issues/3405). - -A Kubernetes Secret is required by Kubeflow Pipelines and applications to access S3. Be sure that the Kubernetes Secret has S3 read and write access. - -``` -apiVersion: v1 -kind: Secret -metadata: - name: aws-secret - namespace: kubeflow -type: Opaque -data: - AWS_ACCESS_KEY_ID: - AWS_SECRET_ACCESS_KEY: -``` - -- YOUR_BASE64_ACCESS_KEY: Base64 string of `AWS_ACCESS_KEY_ID` -- YOUR_BASE64_SECRET_ACCESS: Base64 string of `AWS_SECRET_ACCESS_KEY` - -> Note: To get a Base64 string, run `echo -n $AWS_ACCESS_KEY_ID | base64` - -### Configure containers to use AWS credentials - -In order for `ml-pipeline-ui` to read these artifacts: - -1. Create a Kubernetes secret `aws-secret` in the `kubeflow` namespace. - -2. Update deployment `ml-pipeline-ui` to use AWS credential environment variables by running `kubectl edit deployment ml-pipeline-ui -n kubeflow`. - - ``` - apiVersion: extensions/v1beta1 - kind: Deployment - metadata: - name: ml-pipeline-ui - namespace: kubeflow - ... - spec: - template: - spec: - containers: - - env: - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - key: AWS_ACCESS_KEY_ID - name: aws-secret - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - key: AWS_SECRET_ACCESS_KEY - name: aws-secret - .... - image: gcr.io/ml-pipeline/frontend:0.2.0 - name: ml-pipeline-ui - ``` - -### Example Pipeline - -If you write any files to S3 in your application, use `use_aws_secret` to attach an AWS secret to access S3. - -```python -from kfp.aws import use_aws_secret - -def s3_op(): - import boto3 - s3 = boto3.client("s3", region_name="") - s3.create_bucket( - Bucket="", CreateBucketConfiguration={"LocationConstraint": ""} - ) - -s3_op = create_component_from_func( - s3_op, base_image="python", packages_to_install=["boto3"] -) - -@dsl.pipeline( - name="S3 KFP Component", - description="Tests S3 Access from KFP", -) -def s3_pipeline(): - s3_op().set_display_name("S3 KFP Component").apply( - use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY") - ) - -kfp_client = kfp.Client() -namespace = "kubeflow-user-example-com" -run_id = kfp_client.create_run_from_pipeline_func( - s3_pipeline, namespace=namespace, arguments={} -).run_id -``` - -## Support S3 as a source for Kubeflow Pipelines output viewers - -Support for S3 Artifact Store is in active development. You can track the [open issue](https://github.com/awslabs/kubeflow-manifests/issues/117) to stay up-to-date on progress. - -## Support TensorBoard in Kubeflow Pipelines - -Support for TensorBoard in Kubeflow Pipelines is in active development. You can track the [open issue](https://github.com/awslabs/kubeflow-manifests/issues/118) to stay up-to-date on progress. - - diff --git a/content/en/docs/distributions/aws/customizing-aws/_index.md b/content/en/docs/distributions/aws/customizing-aws/_index.md deleted file mode 100644 index 97429e2ef6..0000000000 --- a/content/en/docs/distributions/aws/customizing-aws/_index.md +++ /dev/null @@ -1,5 +0,0 @@ -+++ -title = "Getting started with Amazon RDS" -description = "Customization guides for Kubeflow on Amazon EKS" -weight = 30 -+++ diff --git a/content/en/docs/distributions/aws/customizing-aws/rds.md b/content/en/docs/distributions/aws/customizing-aws/rds.md deleted file mode 100644 index 9a8c9acfe6..0000000000 --- a/content/en/docs/distributions/aws/customizing-aws/rds.md +++ /dev/null @@ -1,62 +0,0 @@ -+++ -title = "Using Amazon RDS with MySQL" -description = "Using Amazon RDS with MySQL for Kubeflow on AWS" -weight = 30 -+++ - -This guide describes how to deploy a MySQL database using Amazon RDS. - -## Amazon Relational Database Service (Amazon RDS) - -[Amazon RDS](https://aws.amazon.com/rds/) is a managed service that makes it easy to set up, operate, and scale a relational database in the AWS Cloud. It provides cost-efficient, resizable capacity for an industry-standard relational database and manages common database administration tasks. It has support for several engines such as Amazon Aurora, MySQL, MariaDB, PostgreSQL, Oracle Database, and SQL Server. - -### Deploy Amazon RDS MySQL - -To deploy a MySQL database using Amazon RDS, you first need to retrieve some configuration parameters. - -If you created your EKS cluster using `eksctl`, use the following commands to find your `VpcId`, `SubnetId`, and `SecurityGroupId`. For clusters created in other ways, be sure to find these values before deploying your database. - -```bash -export AWS_CLUSTER_NAME= - -# Retrieve your VpcId. -aws ec2 describe-vpcs \ - --output json \ - --filters Name=tag:alpha.eksctl.io/cluster-name,Values=$AWS_CLUSTER_NAME \ - | jq -r '.Vpcs[].VpcId' - -# Retrieve the list of SubnetIds for your cluster's Private subnets. Select at least 2. -aws ec2 describe-subnets \ - --output json \ - --filters Name=tag:alpha.eksctl.io/cluster-name,Values=$AWS_CLUSTER_NAME Name=tag:aws:cloudformation:logical-id,Values=SubnetPrivate* \ - | jq -r '.Subnets[].SubnetId' - -# Retrieve the SecurityGroupId for your nodes. -# Note: This assumes that your nodes share the same SecurityGroup -INSTANCE_IDS=$(aws ec2 describe-instances --query 'Reservations[*].Instances[*].InstanceId' --filters "Name=tag-key,Values=eks:cluster-name" "Name=tag-value,Values=$AWS_CLUSTER_NAME" --output text) -for i in "${INSTANCE_IDS[@]}" -do - echo "SecurityGroup for EC2 instance $i ..." -aws ec2 describe-instances --output json --instance-ids $i | jq -r '.Reservations[].Instances[].SecurityGroups[].GroupId' -done -``` - -With this information in hand, you can now use either the Amazon RDS console or the attached [CloudFormation template](/docs/distributions/aws/customizing-aws/files/rds.yaml) to deploy your database. - -{{% alert title="Warning" color="warning" %}} -The CloudFormation template deploys Amazon RDS for MySQL that is intended for a Dev/Test environment. -We highly recommend deploying a Multi-AZ database for Production use. Please review the Amazon RDS [documentation](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Welcome.html) to learn more. -{{% /alert %}} - -[{{
}}](https://console.aws.amazon.com/cloudformation/home?#/stacks/new?stackName=kubeflow-db&templateURL=https://cloudformation-kubeflow.s3-us-west-2.amazonaws.com/rds.yaml) - -Select your desired **Region** in the AWS CloudFormation management console then click **Next**. -We recommend that you change the **DBPassword**. If you do not, the password will default to `Kubefl0w`. Select your VpcId, Subnets, and SecurityGroupId, then click **Next**. - -For the remaining options, choose the default settings by clicking **Next**. Then click **Create Stack**. - -Once the CloudFormation stack creation is complete, click on **Outputs** to get the RDS endpoint. - -![dashboard](/docs/images/aws/cloudformation-rds-output.png) - -If you did not use CloudFormation, you can retrieve the RDS endpoint through the RDS console on the **Connectivity & Security** tab in the **Endpoint & Port** section. \ No newline at end of file diff --git a/content/en/docs/distributions/aws/deploy/_index.md b/content/en/docs/distributions/aws/deploy/_index.md deleted file mode 100644 index 7c58ddf55b..0000000000 --- a/content/en/docs/distributions/aws/deploy/_index.md +++ /dev/null @@ -1,5 +0,0 @@ -+++ -title = "Deployment" -description = "Deploy Kubeflow on AWS" -weight = 10 -+++ diff --git a/content/en/docs/distributions/aws/deploy/eks-compatibility.md b/content/en/docs/distributions/aws/deploy/eks-compatibility.md deleted file mode 100644 index 2443bdb23c..0000000000 --- a/content/en/docs/distributions/aws/deploy/eks-compatibility.md +++ /dev/null @@ -1,44 +0,0 @@ -+++ -title = "Amazon EKS and Kubeflow Compatibility" -description = "Check compatibility between Amazon EKS and Kubeflow versions" -weight = 25 -+++ - -## Compatibility - -Starting with Kubeflow version 1.2, Amazon EKS maintains end-to-end testing between EKS Kubernetes versions and Kubeflow versions. The following table relates compatibility between Kubernetes versions on Amazon EKS and Kubeflow v1.3.1. - -
- - - - - - - - - - - - - - - - - - - - - -
EKS VersionsKubeflow v1.3.1
1.21Compatible
1.20Compatible
1.19Compatible
-
- -- **Incompatible**: the combination is not known to work together -- **Compatible**: all Kubeflow features have been tested and verified for the EKS Kubernetes version - -### Kubeflow v1.4 and v1.5 Support - -Support for Kubeflow-v1.4 and Kubeflow-v1.5 are in active development. You can track the following issues to stay up-to-date on progress: - - [v1.4 tracking issue](https://github.com/awslabs/kubeflow-manifests/issues/27) - - [v1.5 tracking issue](https://github.com/awslabs/kubeflow-manifests/issues/91) - diff --git a/content/en/docs/distributions/aws/deploy/features.md b/content/en/docs/distributions/aws/deploy/features.md deleted file mode 100644 index 3cdcff8e50..0000000000 --- a/content/en/docs/distributions/aws/deploy/features.md +++ /dev/null @@ -1,25 +0,0 @@ -+++ -title = "AWS Features for Kubeflow" -description = "Get to know the benefits of using Kubeflow with AWS service intergrations" -weight = 10 -+++ - -Running Kubeflow on AWS gives you the following feature benefits and configuration options: - -> Note: Beginning with v1.3, development for Kubeflow on AWS can be found in the [AWS Labs repository](https://github.com/awslabs/kubeflow-manifests). Previous versions can be found in the [Kubeflow manifests repository](https://github.com/kubeflow/manifests). - -## Manage AWS compute environments -* Provision and manage your **[Amazon Elastic Kubernetes Service (EKS)](https://aws.amazon.com/eks/)** clusters with **[eksctl](https://github.com/weaveworks/eksctl)** and easily configure multiple compute and GPU node configurations. -* Use AWS-optimized container images, based on **[AWS Deep Learning Containers](https://docs.aws.amazon.com/deep-learning-containers/latest/devguide/what-is-dlc.html)**, with Kubeflow Notebooks. - -## Load balancing, certificates, and identity management -* Manage external traffic with **[AWS Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html)**. -* Get started with TLS authentication using **[AWS Certificate Manager](https://aws.amazon.com/certificate-manager/)** and **[AWS Cognito](https://aws.amazon.com/cognito/)**. - -## Integrate with AWS database and storage solutions -* Integrate Kubeflow with **[Amazon Relational Database Service (RDS)](https://aws.amazon.com/rds/)** for a highly scalable pipelines and metadata store. -* Deploy Kubeflow with integrations for **[Amazon S3](https://aws.amazon.com/s3/)** for an easy-to-use pipeline artifacts store. -* Use Kubeflow with **[Amazon Elastic File System (EFS)](https://aws.amazon.com/efs/)** for a simple, scalabale, and serverless storage solution. -* Leverage the **[Amazon FSx CSI driver](https://github.com/kubernetes-sigs/aws-fsx-csi-driver)** to manage Lustre file systems which are optimized for compute-intensive workloads, such as high-performance computing and machine learning. **[Amazon FSx for Lustre](https://aws.amazon.com/fsx/lustre/)** can scale to hundreds of GBps of throughput and millions of IOPS. - -To get started with Kubeflow on AWS, see [Install Kubeflow](https://www.kubeflow.org/docs/distributions/aws/deploy/install-kubeflow/). \ No newline at end of file diff --git a/content/en/docs/distributions/aws/deploy/install-kubeflow.md b/content/en/docs/distributions/aws/deploy/install-kubeflow.md deleted file mode 100644 index 56af40bc35..0000000000 --- a/content/en/docs/distributions/aws/deploy/install-kubeflow.md +++ /dev/null @@ -1,104 +0,0 @@ -+++ -title = "Install Kubeflow" -description = "Get started and explore options for deploying Kubeflow on Amazon EKS" -weight = 20 -+++ - -There are a number of deployment options for installing Kubeflow with AWS service integrations. - -The following installation guides assume that you have an existing Kubernetes cluster. To get started with creating an Amazon Elastic Kubernetes Service (EKS) cluster, see [Getting started with Amazon EKS - `eksctl`](https://docs.aws.amazon.com/eks/latest/userguide/getting-started-eksctl.html). To verify compatibility between EKS Kubernetes and Kubeflow versions during setup, see [Amazon EKS and Kubeflow Compatibility](https://www.kubeflow.org/docs/distributions/aws/deploy/eks-compatibility/). - -> Note: It is necessary to use a Kubernetes cluster with compatible tool versions and compute power. For more information, see the specific prerequisites for the [deployment option](https://github.com/awslabs/kubeflow-manifests/tree/v1.3-branch/distributions/aws/examples) of your choosing. - -If you experience any issues with installation, see [Troubleshooting Kubeflow on AWS](/docs/distributions/aws/troubleshooting-aws). - -## Deployment options - -Read on to explore more options for AWS-integrated deployment options. - -### Components configured for Cognito, RDS and S3 - -There is a single guide for deploying Kubeflow on AWS with [RDS, S3, and Cognito](https://github.com/awslabs/kubeflow-manifests/tree/v1.3-branch/distributions/aws/examples/cognito-rds-s3). - -### Vanilla version with Dex for auth and EBS volumes as PV - -The default deployment will leverage [Dex](https://dexidp.io/), an OpenID Connect provider. See the [vanilla installation](https://github.com/awslabs/kubeflow-manifests/tree/v1.3-branch/distributions/aws/examples/vanilla) example for more information. - -### Components configured for RDS and S3 - -Kubeflow components on AWS can be deployed with integrations for just [Amazon S3](https://aws.amazon.com/s3/) and [Amazon RDS](https://aws.amazon.com/rds/). Refer to the [Kustomize Manifests for RDS and S3](https://github.com/awslabs/kubeflow-manifests/tree/v1.3-branch/distributions/aws/examples/rds-s3) guide for deployment configuration instructions. - -### Components configured for Cognito - -Optionally, you may deploy Kubeflow with an integration only with [AWS Cognito](https://aws.amazon.com/cognito/) for your authentication needs. Refer to the [Deploying Kubeflow with AWS Cognito as idP](https://github.com/awslabs/kubeflow-manifests/tree/v1.3-branch/distributions/aws/examples/cognito) guide. - -## Additional component integrations - -Along with Kubernetes support for Amazon EBS, Kubeflow on AWS has integrations for using [Amazon EFS](https://aws.amazon.com/efs/) or [Amazon FSx for Lustre](https://aws.amazon.com/fsx/lustre/) for persistent storage. - -### Using EFS with Kubeflow - -Amazon EFS supports `ReadWriteMany` access mode, which means the volume can be mounted as read-write by many nodes. This is useful for creating a shared filesystem that can be mounted into multiple pods, as you may have with Jupyter. For example, one group can share datasets or models across an entire team. - -Refer to the [Amazon EFS example](https://github.com/awslabs/kubeflow-manifests/tree/v1.3-branch/distributions/aws/examples/storage/efs) for more information. - -### Using FSx for Lustre with Kubeflow - -Amazon FSx for Lustre provides a high-performance file system optimized for fast processing for machine learning and high performance computing (HPC) workloads. Lustre also supports `ReadWriteMany`. One difference between Amazon EFS and Lustre is that Lustre can be used to cache training data with direct connectivity to Amazon S3 as the backing store. With this configuration, you don't need to transfer data to the file system before using the volume. - -Refer to the [Amazon FSx for Lustre example](https://github.com/awslabs/kubeflow-manifests/tree/v1.3-branch/distributions/aws/examples/storage/fsx-for-lustre) for more details. - -## Usage Tracking - -AWS uses customer feedback and usage information to improve the quality of the services and software we offer to customers. We have added usage data collection to the AWS Kubeflow distribution in order to better understand customer usage and guide future improvements. Usage tracking for Kubeflow is activated by default, but is entirely voluntary and can be deactivated at any time. - -Usage tracking for Kubeflow on AWS collects the instance ID used by one of the worker nodes in a customer’s cluster. This data is sent back to AWS once per day. Usage tracking only collects the EC2 instance ID where Kubeflow is running and does not collect or export any other data to AWS. If you wish to deactivate this tracking, instructions are below. - -### How to activate usage tracking - -Usage tracking is activated by default. If you deactivated usage tracking for your Kubeflow deployment and would like to activate it after the fact, you can do so at any time with the following command: - -```bash -kustomize build distributions/aws/aws-telemetry | kubectl apply -f - -``` - -### How to deactivate usage tracking - -**Before deploying Kubeflow:** - -You can deactivate usage tracking by skipping the telemetry component installation in one of two ways: - -1. For single line installation, comment out the `aws-telemetry` line in the `kustomization.yaml` file. e.g. in [cognito-rds-s3 kustomization.yaml](https://github.com/awslabs/kubeflow-manifests/blob/v1.3-branch/distributions/aws/examples/cognito-rds-s3/kustomization.yaml#L58-L59) file: - ``` - # ./../aws-telemetry - ``` -1. For individual component installation, **do not** install the `aws-telemetry` component: - ``` - # AWS Telemetry - This is an optional component. See usage tracking documentation for more information - kustomize build distributions/aws/aws-telemetry | kubectl apply -f - - ``` -**After deploying Kubeflow:** - -To deactivate usage tracking on an existing deployment, delete the `aws-kubeflow-telemetry` cronjob with the following command: - -``` -kubectl delete cronjob -n kubeflow aws-kubeflow-telemetry -``` - -### Information collected by usage tracking - -* **Instance ID** - We collect the instance ID used by one of the worker nodes in the customer’s EKS cluster. This collection occurs once per day. - -### Learn more - -The telemetry data we collect is in accordance with AWS data privacy policies. For more information, see the following: - -* [AWS Service Terms](https://aws.amazon.com/service-terms/) -* [Data Privacy](https://aws.amazon.com/compliance/data-privacy-faq/) - -## Post-installation - -Kubeflow provides multi-tenancy support and users are not able to create notebooks in either the `kubeflow` or `default` namespaces. For more information, see [Multi-Tenancy](https://www.kubeflow.org/docs/components/multi-tenancy/). - -Automatic profile creation is not enabled by default. To create profiles as an administrator, see [Manual profile creation](https://www.kubeflow.org/docs/components/multi-tenancy/getting-started/#manual-profile-creation). - diff --git a/content/en/docs/distributions/aws/deploy/uninstall-kubeflow.md b/content/en/docs/distributions/aws/deploy/uninstall-kubeflow.md deleted file mode 100644 index 2542965bb4..0000000000 --- a/content/en/docs/distributions/aws/deploy/uninstall-kubeflow.md +++ /dev/null @@ -1,36 +0,0 @@ -+++ -title = "Uninstall Kubeflow" -description = "Delete Kubeflow deployments and Amazon EKS clusters" -weight = 30 -+++ - -## Uninstall Kubeflow on AWS - -First, delete all existing Kubeflow profiles. - -```bash -kubectl get profile -kubectl delete profile --all -``` - -Then, delete the Kubeflow deployment with the following command: - -```bash -kustomize build example | kubectl delete -f -``` - -Cleanup steps for specific deployment options can be found in their respective [installation directories](https://github.com/awslabs/kubeflow-manifests/tree/v1.3-branch/distributions/aws/examples). - -> Note: This will not delete your Amazon EKS cluster. - -## (Optional) Delete Amazon EKS Cluster - -If you created a dedicated Amazon EKS cluster for Kubeflow using `eksctl`, you can delete it with the following command: - -```bash -eksctl delete cluster --region $CLUSTER_REGION --name $CLUSTER_NAME -``` - -> Note: It is possible that parts of the CloudFormation deletion will fail depending upon modifications made post-creation. In that case, manually delete the eks-xxx role in IAM, then the ALB, the EKS target groups, and the subnets of that particular cluster. Then, retry the command to delete the nodegroups and the cluster. - -For more detailed information on deletion options, see [Deleting an Amazon EKS cluster](https://docs.aws.amazon.com/eks/latest/userguide/delete-cluster.html). \ No newline at end of file diff --git a/content/en/docs/distributions/aws/troubleshooting-aws.md b/content/en/docs/distributions/aws/troubleshooting-aws.md deleted file mode 100644 index 16e8c61a38..0000000000 --- a/content/en/docs/distributions/aws/troubleshooting-aws.md +++ /dev/null @@ -1,93 +0,0 @@ -+++ -title = "Troubleshooting" -description = "Diagnose and fix issues you may encounter in your Kubeflow deployment" -weight = 30 -+++ - -For general errors related to Kubernetes and Amazon EKS, please refer to the [Amazon EKS User Guide](https://docs.aws.amazon.com/eks/latest/userguide/troubleshooting.html) troubleshooting section. For issues with cluster creation or modification with `eksctl`, see the [`eksctl` troubleshooting](https://eksctl.io/usage/troubleshooting/) page. - -### Validate prerequisites - -You may experience issues due to version incompatibility. Before diving into more specific issues, check to make sure that you have the correct [prerequisites](https://github.com/awslabs/kubeflow-manifests/tree/v1.3-branch/distributions/aws/examples/vanilla#prerequisites) installed. - -### ALB fails to provision - -If you see that your istio-ingress `ADDRESS` is empty after more than a few minutes, it is possible that something is misconfigured in your ALB ingress controller. -```shell -kubectl get ingress -n istio-system -NAME HOSTS ADDRESS PORTS AGE -istio-ingress * 80 3min -``` - -Check the AWS ALB Ingress Controller logs for errors. -```shell -kubectl -n kubeflow logs $(kubectl get pods -n kubeflow --selector=app=aws-alb-ingress-controller --output=jsonpath={.items..metadata.name}) -``` - -``` -E1024 09:02:59.934318 1 :0] kubebuilder/controller "msg"="Reconciler error" "error"="failed to build LoadBalancer configuration due to retrieval of subnets failed to resolve 2 qualified subnets. Subnets must contain the kubernetes.io/cluster/\u003ccluster name\u003e tag with a value of shared or owned and the kubernetes.io/role/elb tag signifying it should be used for ALBs Additionally, there must be at least 2 subnets with unique availability zones as required by ALBs. Either tag subnets to meet this requirement or use the subnets annotation on the ingress resource to explicitly call out what subnets to use for ALB creation. The subnets that did resolve were []" "controller"="alb-ingress-controller" "request"={"Namespace":"istio-system","Name":"istio-ingress"} -``` - -Please check `kubectl get configmaps aws-alb-ingress-controller-config -n kubeflow -o yaml` and make any needed changes. - -If this does not resolve the error, it is possible that your subnets are not tagged so that Kubernetes knows which subnets to use for external load balancers. To fix this, ensure that your cluster's public subnets are tagged with the **Key**: ```kubernetes.io/role/elb``` and **Value**: ```1```. See the Prerequisites section for application load balancing in the [Amazon EKS User Guide](https://docs.aws.amazon.com/eks/latest/userguide/alb-ingress.html) for further details. - -### FSx issues - -Verify that the FSx drivers are installed by running the following command: -```bash -kubectl get csidriver -A -``` - -Check that `PersistentVolumes`, `PersistentVolumeClaims`, and `StorageClasses` are all deployed as expected: -```bash -kubectl get pv,pvc,sc -A -``` - -Use the `kubectl logs` command to get more information on Pods that use these resources. - -For more information, see the [Amazon FSx for Lustre CSI Driver](https://github.com/kubernetes-sigs/aws-fsx-csi-driver) GitHub repository. Troubleshooting information for specific FSx filesystems can be found in the [Amazon FSx documentation](https://docs.aws.amazon.com/fsx/index.html). - -### RDS issues - -If you run into CloudFormation deployment errors, see the [CloudFormation troubleshooting guide](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/troubleshooting.html). - -If you have connectivity issues with Amazon RDS, launch a `mysql-client` container and try connecting to your RDS endpoint. This will let you know if you have network connectivity with the database and also if the database was created and is configured properly. - -``` -# Remember to change your RDS endpoint, DB username and DB Password -$ kubectl run -it --rm --image=mysql:5.7 --restart=Never mysql-client -- mysql -h -u admin -pKubefl0w -If you don't see a command prompt, try pressing enter. - -mysql> show databases; -+--------------------+ -| Database | -+--------------------+ -| information_schema | -| kubeflow | -| mlpipeline | -| mysql | -| performance_schema | -+--------------------+ -5 rows in set (0.00 sec) - -mysql> use mlpipeline; show tables; -Reading table information for completion of table and column names -You can turn off this feature to get a quicker startup with -A - -Database changed -+----------------------+ -| Tables_in_mlpipeline | -+----------------------+ -| db_statuses | -| default_experiments | -| experiments | -| jobs | -| pipeline_versions | -| pipelines | -| resource_references | -| run_details | -| run_metrics | -+----------------------+ -9 rows in set (0.00 sec) -```