Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Filter prod deployment events to persist in table #1

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 30 additions & 15 deletions bq-workers/gitlab-parser/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import json

import shared
import logging
samuelalmeida-dev marked this conversation as resolved.
Show resolved Hide resolved

from flask import Flask, request

Expand All @@ -44,6 +45,8 @@ def index():
if "attributes" not in msg:
raise Exception("Missing pubsub attributes")

metadata = get_metadata(msg)

try:
attr = msg["attributes"]

Expand All @@ -55,15 +58,20 @@ def index():
if "X-Gitlab-Event" in headers:
event = process_gitlab_event(headers, msg)

shared.insert_row_into_bigquery(event)
if metadata["object_kind"] == "deployment":
samuelalmeida-dev marked this conversation as resolved.
Show resolved Hide resolved
if check_if_is_prod_environment(metadata):
shared.insert_row_into_bigquery(event)
else:
shared.insert_row_into_bigquery(event)


except Exception as e:
entry = {
"severity": "WARNING",
"msg": "Data not saved to BigQuery",
"errors": str(e),
"json_payload": envelope
}
"severity": "WARNING",
"msg": "Data not saved to BigQuery",
"errors": str(e),
"json_payload": envelope
}
print(json.dumps(entry))
samuelalmeida-dev marked this conversation as resolved.
Show resolved Hide resolved

return "", 204
Expand All @@ -82,7 +90,7 @@ def process_gitlab_event(headers, msg):
"pipeline", "job", "deployment",
"build"}

metadata = json.loads(base64.b64decode(msg["data"]).decode("utf-8").strip())
metadata = get_metadata(msg)

event_type = metadata["object_kind"]

Expand All @@ -99,15 +107,15 @@ def process_gitlab_event(headers, msg):
event_object = metadata["object_attributes"]
e_id = event_object["id"]
time_created = (
event_object.get("updated_at") or
event_object.get("finished_at") or
event_object.get("created_at"))
event_object.get("updated_at") or
event_object.get("finished_at") or
event_object.get("created_at"))

if event_type in ("job"):
e_id = metadata["build_id"]
time_created = (
event_object.get("finished_at") or
event_object.get("started_at"))
event_object.get("finished_at") or
event_object.get("started_at"))

if event_type in ("deployment"):
e_id = metadata["deployment_id"]
Expand All @@ -116,9 +124,9 @@ def process_gitlab_event(headers, msg):
if event_type in ("build"):
e_id = metadata["build_id"]
time_created = (
metadata.get("build_finished_at") or
metadata.get("build_started_at") or
metadata.get("build_created_at"))
metadata.get("build_finished_at") or
metadata.get("build_started_at") or
metadata.get("build_created_at"))

# Some timestamps come in a format like "2021-04-28 21:50:00 +0200"
# BigQuery does not accept this as a valid format
Expand Down Expand Up @@ -146,6 +154,13 @@ def process_gitlab_event(headers, msg):
return gitlab_event


def check_if_is_prod_environment(metadata):
return "prod" in metadata["environment"].lower()
samuelalmeida-dev marked this conversation as resolved.
Show resolved Hide resolved

def get_metadata(msg):
return json.loads(base64.b64decode(msg["data"]).decode("utf-8").strip())


if __name__ == "__main__":
PORT = int(os.getenv("PORT")) if os.getenv("PORT") else 8080

Expand Down
42 changes: 42 additions & 0 deletions bq-workers/gitlab-parser/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def test_timestamp_timezone_event_processed(client):
"short_sha": "279484c0",
"status_changed_at": "2021-04-28 21:50:00 +0200",
"deployment_id": 15,
"environment": "Production",
}).encode("utf-8")

pubsub_msg = {
Expand Down Expand Up @@ -132,3 +133,44 @@ def test_timestamp_timezone_event_processed(client):

shared.insert_row_into_bigquery.assert_called_with(event)
assert r.status_code == 204

def test_ignore_non_production_environment_event(client):
headers = {"X-Gitlab-Event": "deployment", "X-Gitlab-Token": "foo"}
data = json.dumps({"object_kind": "deployment",
"short_sha": "279484c0",
"status_changed_at": "2021-04-28 21:50:00 +0200",
"deployment_id": 15,
"environment": "development",
}).encode("utf-8")

pubsub_msg = {
"message": {
"data": base64.b64encode(data).decode("utf-8"),
"attributes": {"headers": json.dumps(headers)},
"message_id": "foobar",
"publishTime": 1,
},
}

event = {
"event_type": "deployment",
"id": 15,
"metadata": data.decode(),
"time_created": "2021-04-28 21:50:00",
"signature": shared.create_unique_id(pubsub_msg["message"]),
"msg_id": "foobar",
"source": "gitlab",
}

shared.insert_row_into_bigquery = mock.MagicMock()

r = client.post(
"/",
data=json.dumps(pubsub_msg),
headers={"Content-Type": "application/json"},
)

shared.insert_row_into_bigquery.assert_not_called()
assert r.status_code == 204


5 changes: 4 additions & 1 deletion queries/deployments.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ WITH deploys_cloudbuild_github_gitlab AS (# Cloud Build, Github, Gitlab pipeline
CASE WHEN source LIKE "github%" THEN ARRAY(
SELECT JSON_EXTRACT_SCALAR(string_element, '$')
FROM UNNEST(JSON_EXTRACT_ARRAY(metadata, '$.deployment.additional_sha')) AS string_element)
ELSE ARRAY<string>[] end as additional_commits
ELSE ARRAY<string>[] end as additional_commits,
JSON_EXTRACT_SCALAR(metadata, '$.project.id') as project_id,
JSON_EXTRACT_SCALAR(metadata, '$.environment') as environment
FROM four_keys.events_raw
WHERE (
# Cloud Build Deployments
Expand All @@ -33,6 +35,7 @@ WITH deploys_cloudbuild_github_gitlab AS (# Cloud Build, Github, Gitlab pipeline
# ArgoCD Deployments
OR (source = "argocd" AND JSON_EXTRACT_SCALAR(metadata, '$.status') = "SUCCESS")
)
AND JSON_EXTRACT_SCALAR(metadata, '$.environment') like "%prod%"
fbtravi marked this conversation as resolved.
Show resolved Hide resolved
),
deploys_tekton AS (# Tekton Pipelines
SELECT
Expand Down