diff --git a/.github/actions/cleanup-runner/action.yml b/.github/actions/cleanup-runner/action.yml new file mode 100644 index 00000000..1727fe39 --- /dev/null +++ b/.github/actions/cleanup-runner/action.yml @@ -0,0 +1,33 @@ +name: Cleanup running runner +description: Cleans a spot instance that has the specified label + +inputs: + service_account_key: + description: GCP Service account key + required: true + project_id: + description: GCP Project ID + required: true + zone: + description: GCP Zone + required: true + instance_label: + description: Label to identify the instance + required: true + +runs: + using: composite + steps: + - id: auth + shell: bash + run: echo '${{ inputs.service_account_key }}' | gcloud --project '${{ inputs.project_id }}' --quiet auth activate-service-account --key-file - >/dev/null 2>&1 + - id: get-runner-id-and-kill-runner + shell: bash + run: | + runner_id=$(gcloud compute instances list --filter=labels=${{ inputs.instance_label }} | awk '{print $1}' | tail +2) + if [ -z "$runner_id" ]; then + echo "Instance with label ${{ inputs.instance_label }} not found" + exit 0 + fi + echo "runner_id=$runner_id" >> $GITHUB_OUTPUT + gcloud compute instances delete $runner_id --zone=${{ inputs.zone }} --quiet diff --git a/.github/workflows/build-test-image.yaml b/.github/workflows/build-test-image.yaml index df300465..70688750 100644 --- a/.github/workflows/build-test-image.yaml +++ b/.github/workflows/build-test-image.yaml @@ -24,6 +24,38 @@ env: AWS_NETWORK_NAME: aws-network-${{ github.event_name == 'pull_request' && github.event.pull_request.head.ref || github.ref_name }} jobs: + create-runners: + strategy: + matrix: + machines: + - name: replication-runner-private-gcp + machine_type: e2-standard-4 + runner_label: replication-${{ github.run_id }}-${{ github.run_number }} + arm: false + image: projects/app-plane-dev-f7a2434f/global/images/gh-runner-debian + - name: cluster-runner-private-gcp + machine_type: e2-standard-4 + runner_label: cluster-${{ github.run_id }}-${{ github.run_number }} + arm: false + image: projects/app-plane-dev-f7a2434f/global/images/gh-runner-debian + runs-on: ubuntu-latest + steps: + - name: Create runners + id: create-runner + uses: FalkorDB/gce-github-runner@install_docker + with: + token: ${{ secrets.GH_SA_TOKEN }} + project_id: ${{ vars.GCP_PROJECT_ID }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + machine_zone: ${{ vars.GCP_ZONE }} + network: n-hcjx5tis6bc + subnet: s-hcjx5tis6bc-pod + disk_size: 100 + machine_type: ${{ matrix.machines.machine_type }} + runner_label: ${{ matrix.machines.runner_label }} + arm: ${{ matrix.machines.arm }} + image: ${{ matrix.machines.image }} + build-and-push: runs-on: ubuntu-latest strategy: @@ -174,8 +206,8 @@ jobs: name: ${{ env.AWS_NETWORK_NAME }} test: - needs: create-custom-networks - runs-on: ubuntu-latest + needs: [create-custom-networks, create-runners] + runs-on: ${{ matrix.instances.runner_label || 'ubuntu-latest' }} strategy: fail-fast: false matrix: @@ -464,7 +496,29 @@ jobs: serviceId: ${{ vars.OMNISTRATE_INTERNAL_SERVICE_ID }} environmentId: ${{ vars.OMNISTRATE_INTERNAL_DEV_ENVIRONMENT}} extraParams: "--resource-key 'multi-Zone' --replica-id 'node-mz-0' --instance-name 'test-mz-add-remove-replica' --instance-description 'test-replication-add-remove' --instance-type 'e2-medium' --storage-size '30' --rdb-config 'medium' --aof-config 'always'" - + ###################### GCP private ###################### + - name: PRO/ClusterMultiZone - PRIVATE/GCP/us-central1 - Failover & Persistence + if: "true" + testFile: test_cluster.py + runner_label: cluster-${{ github.run_id }}-${{ github.run_number }} + tierName: pro-${{ contains(github.ref, 'refs/tags/v') && 'main' || github.event_name == 'pull_request' && github.event.pull_request.head.ref || github.ref_name }} + cloudProvider: gcp + cloudRegion: us-central1 + subscriptionId: sub-GJPV3NoNC0 + serviceId: ${{ vars.OMNISTRATE_INTERNAL_SERVICE_ID }} + environmentId: ${{ vars.OMNISTRATE_INTERNAL_DEV_ENVIRONMENT}} + extraParams: "--resource-key 'cluster-Multi-Zone' --replica-id 'cluster-mz-4' --network-type INTERNAL --instance-name 'test-cluster-mz-failover-private' --instance-description 'test-cluster-mz-failover-private' --instance-type 'e2-medium' --storage-size '30' --rdb-config 'medium' --aof-config 'always' --host-count '6' --cluster-replicas '1' --ensure-mz-distribution" + - name: PRO/MultiZone - PRIVATE/GCP/us-central1 - Failover & Persistence + if: "true" + testFile: test_replication.py + runner_label: replication-${{ github.run_id }}-${{ github.run_number }} + tierName: pro-${{ contains(github.ref, 'refs/tags/v') && 'main' || github.event_name == 'pull_request' && github.event.pull_request.head.ref || github.ref_name }} + cloudProvider: gcp + cloudRegion: us-central1 + subscriptionId: sub-GJPV3NoNC0 + serviceId: ${{ vars.OMNISTRATE_INTERNAL_SERVICE_ID }} + environmentId: ${{ vars.OMNISTRATE_INTERNAL_DEV_ENVIRONMENT}} + extraParams: "--resource-key 'multi-Zone' --instance-name 'test-mz-failover-private' --network-type INTERNAL --instance-description 'test-mz-failover-private' --instance-type 'e2-medium' --storage-size '30' --rdb-config 'medium' --aof-config 'always'" ###################### AWS ###################### - name: Free - AWS/us-east-2 - Failover & Persistence if: "true" @@ -587,7 +641,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha || github.sha }} - name: Setup python - if: matrix.instances.if == 'true' || matrix.instances.if == true + if: ${{ (matrix.instances.if == 'true' || matrix.instances.if == true) && !matrix.instances.runner_label }} uses: actions/setup-python@v2 with: python-version: "3.x" @@ -613,7 +667,9 @@ jobs: - name: Install dependencies if: matrix.instances.if == 'true' || matrix.instances.if == true - run: poetry install + run: | + export PATH="~/.local/bin:$PATH" + poetry install - name: ${{ matrix.instances.name }} if: matrix.instances.if == 'true' || matrix.instances.if == true @@ -626,6 +682,7 @@ jobs: SUBSCRIPTION_ID: ${{ matrix.instances.subscriptionId }} REF_NAME: ${{ matrix.instances.tierName }} run: | + export PATH="~/.local/bin:$PATH" poetry run python -u ./omnistrate_tests/${{ matrix.instances.testFile }} ${{ secrets.OMNISTRATE_USERNAME }} ${{ secrets.OMNISTRATE_PASSWORD }} ${{ env.CLOUD_PROVIDER }} ${{ env.CLOUD_REGION }} --service-id ${{ env.SERVICE_ID }} --environment-id ${{ env.ENVIRONMENT_ID }} ${{ env.extraParams }} # Runs only if the branch is 'main' or 'v*' @@ -654,3 +711,21 @@ jobs: username: ${{ secrets.OMNISTRATE_USERNAME }} password: ${{ secrets.OMNISTRATE_PASSWORD }} custom_network_name: ${{ env.AWS_NETWORK_NAME }} + + cleanup-runner: + needs: test + if: ${{ always() }} + runs-on: ubuntu-latest + strategy: + matrix: + platform: + - machine_label: cluster-${{ github.run_id }}-${{ github.run_number }} + - machine_label: replication-${{ github.run_id }}-${{ github.run_number }} + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/cleanup-runner + with: + service_account_key: ${{ secrets.GCP_SA_KEY }} + project_id: ${{ vars.GCP_PROJECT_ID }} + zone: ${{ vars.GCP_ZONE }} + instance_label: ${{ matrix.platform.machine_label }} \ No newline at end of file diff --git a/omnistrate_tests/classes/falkordb_cluster.py b/omnistrate_tests/classes/falkordb_cluster.py index eefc6ebb..2efcdc05 100644 --- a/omnistrate_tests/classes/falkordb_cluster.py +++ b/omnistrate_tests/classes/falkordb_cluster.py @@ -58,10 +58,11 @@ def idx(self) -> int | None: return ( int(self.hostname.split(".")[0].split("-")[-1]) - if "-" in self.hostname and "." in self.hostname + if "-" in self.hostname and "." in self.hostname and "internal" not in self.hostname + else int(self.hostname.split(".")[0].split("-")[-2]) + if "-" in self.hostname and "." in self.hostname and "internal" in self.hostname else None ) - @property def is_master(self) -> bool: return self.mode == "master" diff --git a/omnistrate_tests/classes/omnistrate_fleet_instance.py b/omnistrate_tests/classes/omnistrate_fleet_instance.py index a15d14d4..7a2ee08f 100644 --- a/omnistrate_tests/classes/omnistrate_fleet_instance.py +++ b/omnistrate_tests/classes/omnistrate_fleet_instance.py @@ -112,6 +112,7 @@ def create( description: str, falkordb_user: str, falkordb_password: str, + network_type: str, product_tier_version: str | None = None, custom_network_id: str | None = None, **kwargs, @@ -123,6 +124,7 @@ def create( data = { "cloud_provider": deployment_cloud_provider, "region": deployment_region, + "network_type": network_type, "requestParams": { "name": name, "description": description, @@ -493,7 +495,7 @@ def get_connection_endpoints(self): return endpoints - def get_cluster_endpoint(self): + def get_cluster_endpoint(self, network_type="PUBLIC"): resources = self.get_network_topology() resources_keys = resources.keys() @@ -504,7 +506,7 @@ def get_cluster_endpoint(self): and len(resources[key]["clusterEndpoint"]) > 0 and "streamer." not in resources[key]["clusterEndpoint"] and "clusterPorts" in resources[key] - and resources[key]["networkingType"] != "INTERNAL" + and resources[key]["networkingType"] == network_type ): return { "endpoint": resources[key]["clusterEndpoint"], @@ -512,13 +514,13 @@ def get_cluster_endpoint(self): } def create_connection( - self, ssl: bool = False, force_reconnect: bool = False, retries=5 + self, ssl: bool = False, force_reconnect: bool = False, retries=5, network_type="PUBLIC" ): if self._connection is not None and not force_reconnect: return self._connection - endpoint = self.get_cluster_endpoint() + endpoint = self.get_cluster_endpoint(network_type=network_type) # Connect to the master node while retries > 0: diff --git a/omnistrate_tests/test_cluster.py b/omnistrate_tests/test_cluster.py index 7e87c65b..81454e81 100644 --- a/omnistrate_tests/test_cluster.py +++ b/omnistrate_tests/test_cluster.py @@ -56,6 +56,8 @@ parser.add_argument("--ensure-mz-distribution", action="store_true") parser.add_argument("--custom-network", required=False) +parser.add_argument("--network-type", required=False, default="PUBLIC") + parser.add_argument( "--deployment-create-timeout-seconds", required=False, default=2600, type=int @@ -133,6 +135,7 @@ def test_cluster(): instance.create( wait_for_ready=True, deployment_cloud_provider=args.cloud_provider, + network_type=args.network_type, deployment_region=args.region, name=args.instance_name, description=args.instance_description, @@ -146,11 +149,12 @@ def test_cluster(): hostCount=args.host_count, clusterReplicas=args.cluster_replicas, custom_network_id=network.network_id if network else None, + ) try: ip = resolve_hostname(instance=instance) - logging.info(f"Instance endpoint {instance.get_cluster_endpoint()['endpoint']} resolved to {ip}") + logging.info(f"Instance endpoint {instance.get_cluster_endpoint(network_type=args.network_type)['endpoint']} resolved to {ip}") except TimeoutError as e: logging.error(f"DNS resolution failed: {e}") raise Exception("Instance endpoint not ready: DNS resolution failed") from e @@ -273,6 +277,7 @@ def test_failover(instance: OmnistrateFleetInstance): # Get instance host and port db = instance.create_connection( ssl=args.tls, + network_type=args.network_type, ) graph = db.select_graph("test") @@ -302,6 +307,7 @@ def test_stop_start(instance: OmnistrateFleetInstance): # Get instance host and port db = instance.create_connection( ssl=args.tls, + network_type=args.network_type, ) graph = db.select_graph("test") @@ -335,7 +341,7 @@ def test_zero_downtime( ): """This function should test the ability to read and write while a failover happens""" try: - db = instance.create_connection(ssl=ssl, force_reconnect=True) + db = instance.create_connection(ssl=ssl, force_reconnect=True, network_type=args.network_type) graph = db.select_graph("test") @@ -367,7 +373,7 @@ def resolve_hostname(instance: OmnistrateFleetInstance,timeout=300, interval=1): if interval <= 0 or timeout <= 0: raise ValueError("Interval and timeout must be positive") - cluster_endpoint = instance.get_cluster_endpoint() + cluster_endpoint = instance.get_cluster_endpoint(network_type=args.network_type) if not cluster_endpoint or 'endpoint' not in cluster_endpoint: raise KeyError("Missing endpoint information in cluster configuration") diff --git a/omnistrate_tests/test_cluster_replicas.py b/omnistrate_tests/test_cluster_replicas.py index 4c132830..ad48612b 100644 --- a/omnistrate_tests/test_cluster_replicas.py +++ b/omnistrate_tests/test_cluster_replicas.py @@ -54,7 +54,7 @@ parser.add_argument("--shards", required=False, default="3") parser.add_argument("--persist-instance-on-fail",action="store_true") parser.add_argument("--ensure-mz-distribution", action="store_true") - +parser.add_argument("--network-type", required=False, default="PUBLIC") parser.set_defaults(tls=False) args = parser.parse_args() @@ -118,6 +118,7 @@ def test_cluster_replicas(): instance.create( wait_for_ready=True, deployment_cloud_provider=args.cloud_provider, + network_type=args.network_type, deployment_region=args.region, name=args.instance_name, description=args.instance_description, diff --git a/omnistrate_tests/test_cluster_shards.py b/omnistrate_tests/test_cluster_shards.py index 8032aa01..5a7ca67e 100644 --- a/omnistrate_tests/test_cluster_shards.py +++ b/omnistrate_tests/test_cluster_shards.py @@ -51,6 +51,7 @@ parser.add_argument("--ensure-mz-distribution", action="store_true") parser.add_argument("--persist-instance-on-fail",action="store_true") +parser.add_argument("--network-type", required=False, default="PUBLIC") parser.set_defaults(tls=False) args = parser.parse_args() @@ -112,6 +113,7 @@ def test_cluster_shards(): instance.create( wait_for_ready=True, deployment_cloud_provider=args.cloud_provider, + network_type=args.network_type, deployment_region=args.region, name=args.instance_name, description=args.instance_description, diff --git a/omnistrate_tests/test_replication.py b/omnistrate_tests/test_replication.py index 82691d09..312a09f8 100644 --- a/omnistrate_tests/test_replication.py +++ b/omnistrate_tests/test_replication.py @@ -64,6 +64,7 @@ parser.add_argument("--aof-config", required=False, default="always") parser.add_argument("--persist-instance-on-fail",action="store_true") parser.add_argument("--custom-network", required=False) +parser.add_argument("--network-type", required=False, default="PUBLIC") parser.set_defaults(tls=False) args = parser.parse_args() @@ -126,6 +127,7 @@ def test_replication(): instance.create( wait_for_ready=True, deployment_cloud_provider=args.cloud_provider, + network_type=args.network_type, deployment_region=args.region, name=args.instance_name, description=args.instance_description, @@ -141,7 +143,7 @@ def test_replication(): try: ip = resolve_hostname(instance=instance) - logging.info(f"Instance endpoint {instance.get_cluster_endpoint()['endpoint']} resolved to {ip}") + logging.info(f"Instance endpoint {instance.get_cluster_endpoint(network_type=args.network_type)['endpoint']} resolved to {ip}") except TimeoutError as e: logging.error(f"DNS resolution failed: {e}") raise Exception("Instance endpoint not ready: DNS resolution failed") from e @@ -451,7 +453,7 @@ def test_zero_downtime( ): """This function should test the ability to read and write while replication happens""" try: - db = instance.create_connection(ssl=ssl, force_reconnect=True) + db = instance.create_connection(ssl=ssl, force_reconnect=True, network_type=args.network_type) graph = db.select_graph("test") @@ -483,7 +485,7 @@ def resolve_hostname(instance: OmnistrateFleetInstance,timeout=300, interval=1): if interval <= 0 or timeout <= 0: raise ValueError("Interval and timeout must be positive") - cluster_endpoint = instance.get_cluster_endpoint() + cluster_endpoint = instance.get_cluster_endpoint(network_type=args.network_type) if not cluster_endpoint or 'endpoint' not in cluster_endpoint: raise KeyError("Missing endpoint information in cluster configuration") diff --git a/omnistrate_tests/test_replication_replicas.py b/omnistrate_tests/test_replication_replicas.py index fb465ebf..12644c26 100644 --- a/omnistrate_tests/test_replication_replicas.py +++ b/omnistrate_tests/test_replication_replicas.py @@ -51,7 +51,7 @@ parser.add_argument("--aof-config", required=False, default="always") parser.add_argument("--replica-count", required=False, default="2") parser.add_argument("--persist-instance-on-fail", action="store_true") - +parser.add_argument("--network-type", required=False, default="PUBLIC") parser.set_defaults(tls=False) args = parser.parse_args() @@ -111,6 +111,7 @@ def test_add_remove_replica(): instance.create( wait_for_ready=True, deployment_cloud_provider=args.cloud_provider, + network_type=args.network_type, deployment_region=args.region, name=args.instance_name, description=args.instance_description, diff --git a/omnistrate_tests/test_standalone.py b/omnistrate_tests/test_standalone.py index 649d6099..63d7b275 100644 --- a/omnistrate_tests/test_standalone.py +++ b/omnistrate_tests/test_standalone.py @@ -49,6 +49,7 @@ parser.add_argument("--aof-config", required=False, default="always") parser.add_argument("--persist-instance-on-fail",action="store_true") parser.add_argument("--custom-network", required=False) +parser.add_argument("--network-type", required=False, default="PUBLIC") parser.set_defaults(tls=False) args = parser.parse_args() @@ -112,6 +113,7 @@ def test_standalone(): instance.create( wait_for_ready=True, deployment_cloud_provider=args.cloud_provider, + network_type=args.network_type, deployment_region=args.region, name=args.instance_name, description=args.instance_description, diff --git a/omnistrate_tests/test_update_memory.py b/omnistrate_tests/test_update_memory.py index db1d400b..52c2915e 100644 --- a/omnistrate_tests/test_update_memory.py +++ b/omnistrate_tests/test_update_memory.py @@ -64,6 +64,8 @@ "--deployment-failover-timeout-seconds", required=False, default=2600, type=int ) +parser.add_argument("--network-type", required=False, default="PUBLIC") + parser.set_defaults(tls=False) args = parser.parse_args() @@ -121,6 +123,7 @@ def test_update_memory(): instance.create( wait_for_ready=True, deployment_cloud_provider=args.cloud_provider, + network_type=args.network_type, deployment_region=args.region, name=args.instance_name, description=args.instance_description, diff --git a/omnistrate_tests/test_upgrade_version.py b/omnistrate_tests/test_upgrade_version.py index f46b3b4d..2c290647 100644 --- a/omnistrate_tests/test_upgrade_version.py +++ b/omnistrate_tests/test_upgrade_version.py @@ -55,7 +55,7 @@ parser.add_argument("--host-count", required=False, default="6") parser.add_argument("--cluster-replicas", required=False, default="1") parser.add_argument("--persist-instance-on-fail", action="store_true") - +parser.add_argument("--network-type", required=False, default="PUBLIC") parser.set_defaults(tls=False) args = parser.parse_args() @@ -143,6 +143,7 @@ def test_upgrade_version(): instance.create( wait_for_ready=True, deployment_cloud_provider=args.cloud_provider, + network_type=args.network_type, deployment_region=args.region, name=args.instance_name, description=args.instance_description, @@ -156,6 +157,7 @@ def test_upgrade_version(): hostCount=args.host_count, clusterReplicas=args.cluster_replicas, product_tier_version=last_tier.version, + ) try: