feat: add script to call sm2a promotion pipeline (#228)

* feat: add script to call sm2a promotion pipeline * fix: add files for testing workflow * fix: add empty file to indicate file addition and not name change to GH * fix: temporarily remove job condition to test call * fix: update to add debugging step * fix: add output step * fix: add needs condition to publish step * fix: comment out datetimerange check * fix: print out api url * fix: add environment to publish to prod job * fix: update payload for dag run * fix: update to use requests package * fix: print response.text * fix: ammend requests.post call * fix: use http client instead of requests * fix: add new dataset-config for testing * fix: update to pass in entire dataset config * fix: remove unused dag_input
NASA-IMPACT · Jan 29, 2025 · 9d2f553 · 9d2f553
1 parent 7b2bad1
commit 9d2f553
Show file tree

Hide file tree

Showing 4 changed files with 198 additions and 2 deletions.
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -228,7 +228,7 @@ jobs:
           collection_ids=""
           pip install -r ./scripts/requirements.txt
           for file in ${PUBLISHED_COLLECTION_FILES}; do
-            collection_id=$(python3 ./scripts/generate-mdx.py "$file")
+            collection_id=$(python3 ./scripts/generate_mdx.py "$file")
             collection_id=$(echo "$collection_id" | sed 's/^["\s]*//;s/["\s]*$//')
             echo "Processed collection ID: $collection_id"
             collection_ids="$collection_ids$collection_id,"
@@ -404,11 +404,32 @@ jobs:
           echo "Updated Comment Body: $UPDATED_BODY"
 
   publish-to-prod-on-pr-merge:
+    # TEMPORARILY COMMENTED OUT TO TEST API REQUEST
     if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }}
     runs-on: ubuntu-latest
+    environment: staging
+    needs: [publish-new-datasets, create-mdx-files-and-open-pr]
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
+      - name: Use output from publish-new-datasets
+        run: |
+          echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}"
+
       - name: Publish to production on PR merge
-        run: echo "NO-OP. This step runs when a PR is merged."
+        env:
+          PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }}
+          SM2A_ADMIN_USERNAME: ${{ secrets.SM2A_ADMIN_USERNAME }}
+          SM2A_ADMIN_PASSWORD: ${{ secrets.SM2A_ADMIN_PASSWORD }}
+          SM2A_API_URL: ${{ vars.SM2A_API_URL }}
+          PROMOTION_DAG: ${{ vars.PROMOTION_DAG_NAME }}
+
+        run: |
+          echo $PUBLISHED_COLLECTION_FILES
+          collection_ids=""
+          pip install -r ./scripts/requirements.txt
+          for file in ${PUBLISHED_COLLECTION_FILES}; do
+            python3 ./scripts/promote_to_production.py "$file"
+            echo "Processed file: $file"
+          done
diff --git a/ingestion-data/staging/dataset-config/test-sm2a.json b/ingestion-data/staging/dataset-config/test-sm2a.json
@@ -0,0 +1,89 @@
+{
+    "collection": "ida-ndwi-difference-TEST-SM2A",
+    "title": "NDWI Difference for Pre and Post-Hurricane Ida from PlanetScope TEST-SM2A",
+    "description": "Normalized Difference Water Index Difference of before and after Hurricane Ida in Southern Louisiana.",
+    "license": "CC0-1.0",
+    "is_periodic": true,
+    "time_density": "day",
+    "spatial_extent": {
+        "xmin": -90.85,
+        "ymin": 29.03,
+        "xmax": -90.06,
+        "ymax": 29.65
+    },
+    "temporal_extent": {
+        "startdate": "2021-08-23T00:00:00Z",
+        "enddate": "2021-09-09T23:59:59Z"
+    },
+    "sample_files": [
+        "s3://veda-data-store-staging/planet-indices-v2/NDWI_Difference_2021-08-23_2021-09-09.tif"
+    ],
+    "discovery_items": [
+        {
+            "discovery": "s3",
+            "cogify": false,
+            "upload": false,
+            "dry_run": false,
+            "prefix": "planet-indices-v2/",
+            "bucket": "veda-data-store-staging",
+            "filename_regex": "(.*)NDWI_Difference_(.*).tif$"
+        }
+    ],
+    "data_type": "cog",
+    "providers": [
+        {
+            "name": "NASA VEDA",
+            "roles": [
+                "host"
+            ],
+            "url": "https://www.earthdata.nasa.gov/dashboard/"
+        }
+    ],
+    "stac_version": "1.0.0",
+    "stac_extensions": [
+        "https://stac-extensions.github.io/render/v1.0.0/schema.json",
+        "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json"
+    ],
+    "item_assets": {
+        "cog_default": {
+            "type": "image/tiff; application=geotiff; profile=cloud-optimized",
+            "roles": [
+                "data",
+                "layer"
+            ],
+            "title": "Default COG Layer",
+            "description": "Cloud optimized default layer to display on map"
+        }
+    },
+    "renders": {
+        "dashboard": {
+            "resampling": "nearest",
+            "bidx": [
+                1
+            ],
+            "colormap_name": "rdbu",
+            "assets": [
+                "cog_default"
+            ],
+            "rescale": [
+                [
+                    -1,
+                    1
+                ]
+            ],
+            "title": "VEDA Dashboard Render Parameters"
+        }
+    },
+    "assets": {
+        "thumbnail": {
+            "href": "https://thumbnails.openveda.cloud/louisiana-marsh.jpg",
+            "type": "image/jpeg",
+            "roles": [
+                "thumbnail"
+            ],
+            "title": "Thumbnail",
+            "description": "Photo by [Bridget Besaw](https://www.nature.org/en-us/get-involved/how-to-help/places-we-protect/the-nature-conservancy-in-louisiana-gulf-coast-prairies-and-marshes/) (Wetland landscape across southern Louisiana.)"
+        }
+    },
+    "transfer": true
+}
diff --git a/scripts/generate-mdx.py → scripts/generate_mdx.py b/scripts/generate-mdx.py → scripts/generate_mdx.py
diff --git a/scripts/promote_to_production.py b/scripts/promote_to_production.py
@@ -0,0 +1,86 @@
+from typing import Dict, Any
+
+import http.client
+import json
+import sys
+import os
+import uuid
+from base64 import b64encode
+
+
+class MissingFieldError(Exception):
+    pass
+
+
+def validate_discovery_item_config(item: Dict[str, Any]) -> Dict[str, Any]:
+    if "bucket" not in item:
+        raise MissingFieldError(
+            "Missing required field 'bucket' in discovery item: {item}"
+        )
+    if "discovery" not in item:
+        raise MissingFieldError(
+            "Missing required field 'discovery' in discovery item: {item}"
+        )
+    if "filename_regex" not in item:
+        raise MissingFieldError(
+            "Missing required field 'filename_regex' in discovery item: {item}"
+        )
+    if "prefix" not in item:
+        raise MissingFieldError(
+            "Missing required field 'prefix' in discovery item: {item}"
+        )
+    return item
+
+
+def promote_to_production(payload):
+    base_api_url = os.getenv("SM2A_API_URL")
+    promotion_dag = os.getenv("PROMOTION_DAG_NAME", "veda_promotion_pipeline")
+    username = os.getenv("SM2A_ADMIN_USERNAME")
+    password = os.getenv("SM2A_ADMIN_PASSWORD")
+
+    api_token = b64encode(f"{username}:{password}".encode()).decode()
+    print(password)
+    print(api_token)
+
+    if not base_api_url or not api_token:
+        raise ValueError(
+            "SM2A_API_URL or SM2A_ADMIN_USERNAME or SM2A_ADMIN_PASSWORD is not"
+            + " set in the environment variables."
+        )
+
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": "Basic " + api_token,
+    }
+
+    body = {
+        **payload,
+        "dag_run_id": f"{promotion_dag}-{uuid.uuid4()}",
+        "note": "Run from GitHub Actions veda-data",
+    }
+    http_conn = http.client.HTTPSConnection(base_api_url)
+    response = http_conn.request(
+        "POST", f"/api/v1/dags/{promotion_dag}/dagRuns", json.dumps(body), headers
+    )
+    response = http_conn.getresponse()
+    response_data = response.read()
+    print(f"Response: ${response_data}")
+    http_conn.close()
+
+    return {"statusCode": response.status, "body": response_data.decode()}
+
+
+if __name__ == "__main__":
+    try:
+        with open(sys.argv[1], "r") as file:
+            input = json.load(file)
+            discovery_items = input.get("discovery_items")
+            validated_discovery_items = [
+                validate_discovery_item_config(item) for item in discovery_items
+            ]
+
+            dag_payload = {"conf": input}
+            promote_to_production(dag_payload)
+
+    except json.JSONDecodeError:
+        raise ValueError(f"Invalid JSON content in file {sys.argv[1]}")