From 73fa61c126037fe29429b09f0e11262e59104999 Mon Sep 17 00:00:00 2001 From: Samuel Almeida Date: Thu, 2 Mar 2023 17:39:18 -0300 Subject: [PATCH 1/3] feat: Filter prod deployment events to persist in table --- bq-workers/gitlab-parser/main.py | 45 ++++++++++++++++++--------- bq-workers/gitlab-parser/main_test.py | 42 +++++++++++++++++++++++++ queries/deployments.sql | 5 ++- 3 files changed, 76 insertions(+), 16 deletions(-) diff --git a/bq-workers/gitlab-parser/main.py b/bq-workers/gitlab-parser/main.py index 9a1cae00..c2dcb67a 100644 --- a/bq-workers/gitlab-parser/main.py +++ b/bq-workers/gitlab-parser/main.py @@ -18,6 +18,7 @@ import json import shared +import logging from flask import Flask, request @@ -44,6 +45,8 @@ def index(): if "attributes" not in msg: raise Exception("Missing pubsub attributes") + metadata = get_metadata(msg) + try: attr = msg["attributes"] @@ -55,15 +58,20 @@ def index(): if "X-Gitlab-Event" in headers: event = process_gitlab_event(headers, msg) - shared.insert_row_into_bigquery(event) + if metadata["object_kind"] == "deployment": + if check_if_is_prod_environment(metadata): + shared.insert_row_into_bigquery(event) + else: + shared.insert_row_into_bigquery(event) + except Exception as e: entry = { - "severity": "WARNING", - "msg": "Data not saved to BigQuery", - "errors": str(e), - "json_payload": envelope - } + "severity": "WARNING", + "msg": "Data not saved to BigQuery", + "errors": str(e), + "json_payload": envelope + } print(json.dumps(entry)) return "", 204 @@ -82,7 +90,7 @@ def process_gitlab_event(headers, msg): "pipeline", "job", "deployment", "build"} - metadata = json.loads(base64.b64decode(msg["data"]).decode("utf-8").strip()) + metadata = get_metadata(msg) event_type = metadata["object_kind"] @@ -99,15 +107,15 @@ def process_gitlab_event(headers, msg): event_object = metadata["object_attributes"] e_id = event_object["id"] time_created = ( - event_object.get("updated_at") or - event_object.get("finished_at") or - event_object.get("created_at")) + event_object.get("updated_at") or + event_object.get("finished_at") or + event_object.get("created_at")) if event_type in ("job"): e_id = metadata["build_id"] time_created = ( - event_object.get("finished_at") or - event_object.get("started_at")) + event_object.get("finished_at") or + event_object.get("started_at")) if event_type in ("deployment"): e_id = metadata["deployment_id"] @@ -116,9 +124,9 @@ def process_gitlab_event(headers, msg): if event_type in ("build"): e_id = metadata["build_id"] time_created = ( - metadata.get("build_finished_at") or - metadata.get("build_started_at") or - metadata.get("build_created_at")) + metadata.get("build_finished_at") or + metadata.get("build_started_at") or + metadata.get("build_created_at")) # Some timestamps come in a format like "2021-04-28 21:50:00 +0200" # BigQuery does not accept this as a valid format @@ -146,6 +154,13 @@ def process_gitlab_event(headers, msg): return gitlab_event +def check_if_is_prod_environment(metadata): + return "prod" in metadata["environment"].lower() + +def get_metadata(msg): + return json.loads(base64.b64decode(msg["data"]).decode("utf-8").strip()) + + if __name__ == "__main__": PORT = int(os.getenv("PORT")) if os.getenv("PORT") else 8080 diff --git a/bq-workers/gitlab-parser/main_test.py b/bq-workers/gitlab-parser/main_test.py index 6b8fa658..d8526991 100644 --- a/bq-workers/gitlab-parser/main_test.py +++ b/bq-workers/gitlab-parser/main_test.py @@ -101,6 +101,7 @@ def test_timestamp_timezone_event_processed(client): "short_sha": "279484c0", "status_changed_at": "2021-04-28 21:50:00 +0200", "deployment_id": 15, + "environment": "Production", }).encode("utf-8") pubsub_msg = { @@ -132,3 +133,44 @@ def test_timestamp_timezone_event_processed(client): shared.insert_row_into_bigquery.assert_called_with(event) assert r.status_code == 204 + +def test_ignore_non_production_environment_event(client): + headers = {"X-Gitlab-Event": "deployment", "X-Gitlab-Token": "foo"} + data = json.dumps({"object_kind": "deployment", + "short_sha": "279484c0", + "status_changed_at": "2021-04-28 21:50:00 +0200", + "deployment_id": 15, + "environment": "development", + }).encode("utf-8") + + pubsub_msg = { + "message": { + "data": base64.b64encode(data).decode("utf-8"), + "attributes": {"headers": json.dumps(headers)}, + "message_id": "foobar", + "publishTime": 1, + }, + } + + event = { + "event_type": "deployment", + "id": 15, + "metadata": data.decode(), + "time_created": "2021-04-28 21:50:00", + "signature": shared.create_unique_id(pubsub_msg["message"]), + "msg_id": "foobar", + "source": "gitlab", + } + + shared.insert_row_into_bigquery = mock.MagicMock() + + r = client.post( + "/", + data=json.dumps(pubsub_msg), + headers={"Content-Type": "application/json"}, + ) + + shared.insert_row_into_bigquery.assert_not_called() + assert r.status_code == 204 + + diff --git a/queries/deployments.sql b/queries/deployments.sql index 3fabdb04..6838d32c 100644 --- a/queries/deployments.sql +++ b/queries/deployments.sql @@ -19,7 +19,9 @@ WITH deploys_cloudbuild_github_gitlab AS (# Cloud Build, Github, Gitlab pipeline CASE WHEN source LIKE "github%" THEN ARRAY( SELECT JSON_EXTRACT_SCALAR(string_element, '$') FROM UNNEST(JSON_EXTRACT_ARRAY(metadata, '$.deployment.additional_sha')) AS string_element) - ELSE ARRAY[] end as additional_commits + ELSE ARRAY[] end as additional_commits, + JSON_EXTRACT_SCALAR(metadata, '$.project.id') as project_id, + JSON_EXTRACT_SCALAR(metadata, '$.environment') as environment FROM four_keys.events_raw WHERE ( # Cloud Build Deployments @@ -33,6 +35,7 @@ WITH deploys_cloudbuild_github_gitlab AS (# Cloud Build, Github, Gitlab pipeline # ArgoCD Deployments OR (source = "argocd" AND JSON_EXTRACT_SCALAR(metadata, '$.status') = "SUCCESS") ) + AND JSON_EXTRACT_SCALAR(metadata, '$.environment') like "%prod%" ), deploys_tekton AS (# Tekton Pipelines SELECT From 9890b293bf29fc2ad050cbf1f196d1da11af9f73 Mon Sep 17 00:00:00 2001 From: Samuel Almeida Date: Fri, 10 Mar 2023 11:29:04 -0300 Subject: [PATCH 2/3] refactor: Add environment deployment variable in Dockerfile --- bq-workers/gitlab-parser/Dockerfile | 5 +++++ bq-workers/gitlab-parser/main.py | 21 +++++++++++---------- bq-workers/gitlab-parser/main_test.py | 8 ++++++-- queries/deployments.sql | 1 - 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/bq-workers/gitlab-parser/Dockerfile b/bq-workers/gitlab-parser/Dockerfile index 47453ea1..a4ef7f37 100644 --- a/bq-workers/gitlab-parser/Dockerfile +++ b/bq-workers/gitlab-parser/Dockerfile @@ -17,6 +17,11 @@ # https://hub.docker.com/_/python FROM python:3.7 +# Allow configure deployment enviroments separated by comma (,) to filter events +# Example: prod,prd,any +# TODO: Fix to Terraform env +ENV DEPLOYMENT_ENVIRONMENTS=prod + # Allow statements and log messages to immediately appear in the Cloud Run logs ENV PYTHONUNBUFFERED True diff --git a/bq-workers/gitlab-parser/main.py b/bq-workers/gitlab-parser/main.py index c2dcb67a..6cd0cdec 100644 --- a/bq-workers/gitlab-parser/main.py +++ b/bq-workers/gitlab-parser/main.py @@ -16,15 +16,12 @@ from datetime import datetime import os import json - import shared -import logging from flask import Flask, request app = Flask(__name__) - @app.route("/", methods=["POST"]) def index(): """ @@ -58,13 +55,9 @@ def index(): if "X-Gitlab-Event" in headers: event = process_gitlab_event(headers, msg) - if metadata["object_kind"] == "deployment": - if check_if_is_prod_environment(metadata): - shared.insert_row_into_bigquery(event) - else: + if check_if_can_save_the_event(metadata): shared.insert_row_into_bigquery(event) - except Exception as e: entry = { "severity": "WARNING", @@ -153,9 +146,17 @@ def process_gitlab_event(headers, msg): return gitlab_event +def check_if_can_save_the_event(metadata): + deploy_envs = os.getenv("DEPLOYMENT_ENVIRONMENTS") + + if deploy_envs is None or metadata["object_kind"] != "deployment": + return True -def check_if_is_prod_environment(metadata): - return "prod" in metadata["environment"].lower() + for env in deploy_envs.split(","): + if env.lower() in metadata["environment"].lower(): + return True + + return False def get_metadata(msg): return json.loads(base64.b64decode(msg["data"]).decode("utf-8").strip()) diff --git a/bq-workers/gitlab-parser/main_test.py b/bq-workers/gitlab-parser/main_test.py index d8526991..366f9e9c 100644 --- a/bq-workers/gitlab-parser/main_test.py +++ b/bq-workers/gitlab-parser/main_test.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import base64 import json @@ -134,13 +135,14 @@ def test_timestamp_timezone_event_processed(client): shared.insert_row_into_bigquery.assert_called_with(event) assert r.status_code == 204 -def test_ignore_non_production_environment_event(client): +def test_ignore_unexpected_environment_event(client): + set_accept_deployment_environment("prod,dev") headers = {"X-Gitlab-Event": "deployment", "X-Gitlab-Token": "foo"} data = json.dumps({"object_kind": "deployment", "short_sha": "279484c0", "status_changed_at": "2021-04-28 21:50:00 +0200", "deployment_id": 15, - "environment": "development", + "environment": "qa", }).encode("utf-8") pubsub_msg = { @@ -173,4 +175,6 @@ def test_ignore_non_production_environment_event(client): shared.insert_row_into_bigquery.assert_not_called() assert r.status_code == 204 +def set_accept_deployment_environment(value): + os.environ["DEPLOYMENT_ENVIRONMENTS"] = value diff --git a/queries/deployments.sql b/queries/deployments.sql index 6838d32c..5e6d1aa9 100644 --- a/queries/deployments.sql +++ b/queries/deployments.sql @@ -35,7 +35,6 @@ WITH deploys_cloudbuild_github_gitlab AS (# Cloud Build, Github, Gitlab pipeline # ArgoCD Deployments OR (source = "argocd" AND JSON_EXTRACT_SCALAR(metadata, '$.status') = "SUCCESS") ) - AND JSON_EXTRACT_SCALAR(metadata, '$.environment') like "%prod%" ), deploys_tekton AS (# Tekton Pipelines SELECT From 944577e582805948f85ba0ad967e92b7b4a2ff38 Mon Sep 17 00:00:00 2001 From: Samuel Almeida Date: Thu, 23 Mar 2023 17:46:01 -0300 Subject: [PATCH 3/3] refactor: Change method name --- bq-workers/gitlab-parser/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bq-workers/gitlab-parser/main.py b/bq-workers/gitlab-parser/main.py index 6cd0cdec..45cdea6c 100644 --- a/bq-workers/gitlab-parser/main.py +++ b/bq-workers/gitlab-parser/main.py @@ -55,7 +55,7 @@ def index(): if "X-Gitlab-Event" in headers: event = process_gitlab_event(headers, msg) - if check_if_can_save_the_event(metadata): + if can_save_the_event(metadata): shared.insert_row_into_bigquery(event) except Exception as e: @@ -146,7 +146,7 @@ def process_gitlab_event(headers, msg): return gitlab_event -def check_if_can_save_the_event(metadata): +def can_save_the_event(metadata): deploy_envs = os.getenv("DEPLOYMENT_ENVIRONMENTS") if deploy_envs is None or metadata["object_kind"] != "deployment":