From 9a0b592824e83ffe533e659176681746e735dbf6 Mon Sep 17 00:00:00 2001
From: LynnMHouston <lynn.houston@gsa.gov>
Date: Wed, 22 Jan 2025 12:08:06 -0500
Subject: [PATCH] Update ALN file merge.yml

---
 .github/workflows/ALN file merge.yml | 187 +++++++--------------------
 1 file changed, 50 insertions(+), 137 deletions(-)

diff --git a/.github/workflows/ALN file merge.yml b/.github/workflows/ALN file merge.yml
index 54b043497..243e137f2 100644
--- a/.github/workflows/ALN file merge.yml	
+++ b/.github/workflows/ALN file merge.yml	
@@ -1,140 +1,53 @@
 ---
 name: ALN File Merge
 
-on:
-  workflow_dispatch:  # Enables manual trigger
-
-jobs:
-  process_and_update:
-    runs-on: ubuntu-latest
-    steps:
-      # Checkout the repository
-      - name: Checkout Repository
-        uses: actions/checkout@v3
-
-      # Set up Python
-      - name: Set Up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.x'
-
-      # Install dependencies
-      - name: Install Dependencies
-        run: pip install pandas openpyxl
-
-      - name: Install Python Dependencies
-        run: pip install -r backend/schemas/requirements.txt
-
-      - name: Install Jsonnet and Python module
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y jsonnet
-          pip install git+https://github.com/google/jsonnet.git 
-          
-      - name: Install Jsonnetfmt
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y jsonnet
-
-      # Merge, clean, and standardize CSV files
-      - name: Merge, Clean, and Standardize CSV Files
-        run: |
-          python -c "
-          import os
-          import pandas as pd
-          import glob
-
-          folder = './backend/schemas/source/data/ALNs_raw_downloads'
-          date_suffix = pd.Timestamp.now().strftime('%Y%m%d')
-          output_file = f'./backend/schemas/source/data/cfda-lookup-{date_suffix}.csv'
-
-          print(f'Looking for CSV files in: {folder}')
-          csv_files = glob.glob(f'{folder}/*.csv')
-          print(f'CSV files found: {csv_files}')
-
-          if not csv_files:
-              print('No data found in the input files.')
-              exit(1)
-
-          all_data = []
-          for f in csv_files:
-              try:
-                  df = pd.read_csv(f, encoding='utf-8')
-              except UnicodeDecodeError:
-                  print(f'Warning: Could not read {f} with UTF-8. Trying ISO-8859-1.')
-                  df = pd.read_csv(f, encoding='ISO-8859-1')
-              all_data.append(df)
-
-          combined_data = pd.concat(all_data, ignore_index=True)
-          all_columns = combined_data.columns.unique()
-          standardized_data = combined_data.reindex(columns=all_columns, fill_value=None)
-
-          column_mapping = {
-              'Title': 'Program Title',
-              'Assistance Listings Number': 'Program Number',
-              'Date Published': 'Date Published',
-              'Department/Ind. Agency': 'Department/Ind. Agency',
-              'Funded': 'Funded',
-              'Last Date Modified': 'Last Date Modified',
-              'POC Information': 'POC Information',
-              'Related Federal Assistance': 'Related Federal Assistance',
-              'Sub-Tier': 'Sub-Tier',
-              'Types of Assistance': 'Types of Assistance'
-          }
-
-          standardized_data = standardized_data.rename(columns=column_mapping)
-          print(f'Saving merged and standardized CSV to: {output_file}')
-          standardized_data.to_csv(output_file, index=False, encoding='utf-8')
-          print('CSV processing completed successfully.')
-          "
-
-      # Update FederalProgramNames.json
-      - name: Update FederalProgramNames.json
-        run: |
-          python -c "
-          import pandas as pd
-          import json
-          import glob
-          import os
-
-          folder = './backend/schemas/source/data'
-          latest_file = max(glob.glob(f'{folder}/cfda-lookup-*.csv'), key=os.path.getmtime)
-          output_file = './backend/schemas/source/base/FederalProgramNames.json'
-
-          print(f'Loading CSV file: {latest_file}')
-          df = pd.read_csv(latest_file)
-
-          print('Processing Program Names and Numbers')
-          program_names = df['Program Title'].dropna().str.strip().str.upper().tolist()
-          program_numbers = df['Program Number'].dropna().str.strip().tolist()
-
-          unique_prefixes = {num.split('.')[0]: None for num in program_numbers if '.' in num}
-          unique_cfda = {num: None for num in program_numbers}
-
-          output_data = {
-              'program_names': program_names,
-              'all_alns': list(unique_cfda.keys()),
-              'aln_prefixes': list(unique_prefixes.keys()),
-          }
-
-          print(f'Writing JSON file to: {output_file}')
-          with open(output_file, 'w') as json_file:
-              json.dump(output_data, json_file, indent=2, sort_keys=True)
-          print('FederalProgramNames.json updated successfully')
-          "
-
-      # Commit and push merged CSV and updated JSON
-      - name: Commit and Push Changes
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
-          git add ./backend/schemas/source/data/cfda-lookup-*.csv
-          git add ./backend/schemas/source/base/FederalProgramNames.json
-          git diff --cached --quiet || git commit -m "Update merged CSV and FederalProgramNames.json"
-          git push
-
-      # Run make all
-      - name: Run Make All
-        run: make -C backend/schemas all
+  run: |
+    python -c "
+    import os
+    import pandas as pd
+    from datetime import datetime
+    import glob
+
+    folder = './backend/schemas/source/data/ALNs_raw_downloads'
+    date_suffix = datetime.now().strftime('%Y%m%d')
+    output_file = f'./backend/schemas/source/data/cfda-lookup-{date_suffix}.csv'
+
+    print(f'Looking for CSV files in: {folder}')
+    csv_files = glob.glob(f'{folder}/*.csv')
+    print(f'CSV files found: {csv_files}')
+
+    if not csv_files:
+        print('No data found in the input files.')
+        exit(1)
+
+    all_data = []
+    for f in csv_files:
+        try:
+            df = pd.read_csv(f, encoding='utf-8')
+        except UnicodeDecodeError:
+            print(f'Warning: Could not read {f} with UTF-8. Trying ISO-8859-1.')
+            df = pd.read_csv(f, encoding='ISO-8859-1')
+        all_data.append(df)
+
+    combined_data = pd.concat(all_data, ignore_index=True)
+    all_columns = combined_data.columns.unique()
+    standardized_data = combined_data.reindex(columns=all_columns, fill_value=None)
+
+    column_mapping = {
+        'Title': 'Program Title',
+        'Assistance Listings Number': 'Program Number',
+        'Date Published': 'Date Published',
+        'Department/Ind. Agency': 'Department/Ind. Agency',
+        'Funded': 'Funded',
+        'Last Date Modified': 'Last Date Modified',
+        'POC Information': 'POC Information',
+        'Related Federal Assistance': 'Related Federal Assistance',
+        'Sub-Tier': 'Sub-Tier',
+        'Types of Assistance': 'Types of Assistance'
+    }
+
+    standardized_data = standardized_data.rename(columns=column_mapping)
+    print(f'Saving merged and standardized CSV to: {output_file}')
+    standardized_data.to_csv(output_file, index=False, encoding='utf-8')
+    print('CSV processing completed successfully.')
+    "