Skip to content

Commit

Permalink
feat: add script to call sm2a promotion pipeline (#228)
Browse files Browse the repository at this point in the history
* feat: add script to call sm2a promotion pipeline

* fix: add files for testing workflow

* fix: add empty file to indicate file addition and not name change to GH

* fix: temporarily remove job condition to test call

* fix: update to add debugging step

* fix: add output step

* fix: add needs condition to publish step

* fix: comment out datetimerange check

* fix: print out api url

* fix: add environment to publish to prod job

* fix: update payload for dag run

* fix: update to use requests package

* fix: print response.text

* fix: ammend requests.post call

* fix: use http client instead of requests

* fix: add new dataset-config for testing

* fix: update to pass in entire dataset config

* fix: remove unused dag_input
  • Loading branch information
botanical authored Jan 29, 2025
1 parent 7b2bad1 commit 9d2f553
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 2 deletions.
25 changes: 23 additions & 2 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ jobs:
collection_ids=""
pip install -r ./scripts/requirements.txt
for file in ${PUBLISHED_COLLECTION_FILES}; do
collection_id=$(python3 ./scripts/generate-mdx.py "$file")
collection_id=$(python3 ./scripts/generate_mdx.py "$file")
collection_id=$(echo "$collection_id" | sed 's/^["\s]*//;s/["\s]*$//')
echo "Processed collection ID: $collection_id"
collection_ids="$collection_ids$collection_id,"
Expand Down Expand Up @@ -404,11 +404,32 @@ jobs:
echo "Updated Comment Body: $UPDATED_BODY"
publish-to-prod-on-pr-merge:
# TEMPORARILY COMMENTED OUT TO TEST API REQUEST
if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }}
runs-on: ubuntu-latest
environment: staging
needs: [publish-new-datasets, create-mdx-files-and-open-pr]
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Use output from publish-new-datasets
run: |
echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}"
- name: Publish to production on PR merge
run: echo "NO-OP. This step runs when a PR is merged."
env:
PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }}
SM2A_ADMIN_USERNAME: ${{ secrets.SM2A_ADMIN_USERNAME }}
SM2A_ADMIN_PASSWORD: ${{ secrets.SM2A_ADMIN_PASSWORD }}
SM2A_API_URL: ${{ vars.SM2A_API_URL }}
PROMOTION_DAG: ${{ vars.PROMOTION_DAG_NAME }}

run: |
echo $PUBLISHED_COLLECTION_FILES
collection_ids=""
pip install -r ./scripts/requirements.txt
for file in ${PUBLISHED_COLLECTION_FILES}; do
python3 ./scripts/promote_to_production.py "$file"
echo "Processed file: $file"
done
89 changes: 89 additions & 0 deletions ingestion-data/staging/dataset-config/test-sm2a.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"collection": "ida-ndwi-difference-TEST-SM2A",
"title": "NDWI Difference for Pre and Post-Hurricane Ida from PlanetScope TEST-SM2A",
"description": "Normalized Difference Water Index Difference of before and after Hurricane Ida in Southern Louisiana.",
"license": "CC0-1.0",
"is_periodic": true,
"time_density": "day",
"spatial_extent": {
"xmin": -90.85,
"ymin": 29.03,
"xmax": -90.06,
"ymax": 29.65
},
"temporal_extent": {
"startdate": "2021-08-23T00:00:00Z",
"enddate": "2021-09-09T23:59:59Z"
},
"sample_files": [
"s3://veda-data-store-staging/planet-indices-v2/NDWI_Difference_2021-08-23_2021-09-09.tif"
],
"discovery_items": [
{
"discovery": "s3",
"cogify": false,
"upload": false,
"dry_run": false,
"prefix": "planet-indices-v2/",
"bucket": "veda-data-store-staging",
"filename_regex": "(.*)NDWI_Difference_(.*).tif$"
}
],
"data_type": "cog",
"providers": [
{
"name": "NASA VEDA",
"roles": [
"host"
],
"url": "https://www.earthdata.nasa.gov/dashboard/"
}
],
"stac_version": "1.0.0",
"stac_extensions": [
"https://stac-extensions.github.io/render/v1.0.0/schema.json",
"https://stac-extensions.github.io/item-assets/v1.0.0/schema.json"
],
"item_assets": {
"cog_default": {
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"roles": [
"data",
"layer"
],
"title": "Default COG Layer",
"description": "Cloud optimized default layer to display on map"
}
},
"renders": {
"dashboard": {
"resampling": "nearest",
"bidx": [
1
],
"colormap_name": "rdbu",
"assets": [
"cog_default"
],
"rescale": [
[
-1,
1
]
],
"title": "VEDA Dashboard Render Parameters"
}
},
"assets": {
"thumbnail": {
"href": "https://thumbnails.openveda.cloud/louisiana-marsh.jpg",
"type": "image/jpeg",
"roles": [
"thumbnail"
],
"title": "Thumbnail",
"description": "Photo by [Bridget Besaw](https://www.nature.org/en-us/get-involved/how-to-help/places-we-protect/the-nature-conservancy-in-louisiana-gulf-coast-prairies-and-marshes/) (Wetland landscape across southern Louisiana.)"
}
},
"transfer": true
}
File renamed without changes.
86 changes: 86 additions & 0 deletions scripts/promote_to_production.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from typing import Dict, Any

import http.client
import json
import sys
import os
import uuid
from base64 import b64encode


class MissingFieldError(Exception):
pass


def validate_discovery_item_config(item: Dict[str, Any]) -> Dict[str, Any]:
if "bucket" not in item:
raise MissingFieldError(
"Missing required field 'bucket' in discovery item: {item}"
)
if "discovery" not in item:
raise MissingFieldError(
"Missing required field 'discovery' in discovery item: {item}"
)
if "filename_regex" not in item:
raise MissingFieldError(
"Missing required field 'filename_regex' in discovery item: {item}"
)
if "prefix" not in item:
raise MissingFieldError(
"Missing required field 'prefix' in discovery item: {item}"
)
return item


def promote_to_production(payload):
base_api_url = os.getenv("SM2A_API_URL")
promotion_dag = os.getenv("PROMOTION_DAG_NAME", "veda_promotion_pipeline")
username = os.getenv("SM2A_ADMIN_USERNAME")
password = os.getenv("SM2A_ADMIN_PASSWORD")

api_token = b64encode(f"{username}:{password}".encode()).decode()
print(password)
print(api_token)

if not base_api_url or not api_token:
raise ValueError(
"SM2A_API_URL or SM2A_ADMIN_USERNAME or SM2A_ADMIN_PASSWORD is not"
+ " set in the environment variables."
)

headers = {
"Content-Type": "application/json",
"Authorization": "Basic " + api_token,
}

body = {
**payload,
"dag_run_id": f"{promotion_dag}-{uuid.uuid4()}",
"note": "Run from GitHub Actions veda-data",
}
http_conn = http.client.HTTPSConnection(base_api_url)
response = http_conn.request(
"POST", f"/api/v1/dags/{promotion_dag}/dagRuns", json.dumps(body), headers
)
response = http_conn.getresponse()
response_data = response.read()
print(f"Response: ${response_data}")
http_conn.close()

return {"statusCode": response.status, "body": response_data.decode()}


if __name__ == "__main__":
try:
with open(sys.argv[1], "r") as file:
input = json.load(file)
discovery_items = input.get("discovery_items")
validated_discovery_items = [
validate_discovery_item_config(item) for item in discovery_items
]

dag_payload = {"conf": input}
promote_to_production(dag_payload)

except json.JSONDecodeError:
raise ValueError(f"Invalid JSON content in file {sys.argv[1]}")

0 comments on commit 9d2f553

Please sign in to comment.