forked from GSA-TTS/FAC
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create manual merge of ALN files after download.yml
- Loading branch information
1 parent
bcd11d0
commit aa12d8f
Showing
1 changed file
with
61 additions
and
0 deletions.
There are no files selected for viewing
61 changes: 61 additions & 0 deletions
61
.github/workflows/manual merge of ALN files after download.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
--- | ||
name: Merge and Standardize CSV Files | ||
|
||
on: | ||
workflow_dispatch: | ||
|
||
jobs: | ||
merge_csvs: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout Repository | ||
uses: actions/checkout@v3 | ||
|
||
- name: Set Up Python | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.x' | ||
|
||
- name: Install Dependencies | ||
run: pip install pandas | ||
|
||
- name: Merge and Standardize CSV Files | ||
run: | | ||
python -c " | ||
import os | ||
import pandas as pd | ||
from datetime import datetime | ||
import glob | ||
folder = './backend/schemas/source/data/ALN raw downloads for CFDA lookup files' | ||
date_suffix = datetime.now().strftime('%Y%m%d') | ||
output_file = f'{os.path.dirname(folder)}/cfda-lookup-{date_suffix}.csv' | ||
csv_files = glob.glob(f'{folder}/*.csv') | ||
if not csv_files: | ||
print('No data found in the input files.') | ||
exit(1) | ||
all_data = [pd.read_csv(f) for f in csv_files] | ||
all_columns = pd.concat(all_data, ignore_index=True).columns.unique() | ||
standardized_data = [df.reindex(columns=all_columns, fill_value=None) for df in all_data] | ||
column_mapping = { | ||
'Title': 'Program Title', | ||
'Assistance Listings Number': 'Program Number', | ||
'Date Published': 'Date Published', | ||
'Department/Ind. Agency': 'Department/Ind. Agency', | ||
'Funded': 'Funded', | ||
'Last Date Modified': 'Last Date Modified', | ||
'POC Information': 'POC Information', | ||
'Related Federal Assistance': 'Related Federal Assistance', | ||
'Sub-Tier': 'Sub-Tier', | ||
'Types of Assistance': 'Types of Assistance' | ||
} | ||
reordered_data = pd.concat(standardized_data, ignore_index=True) | ||
reordered_data = reordered_data.rename(columns=column_mapping) | ||
reordered_data.to_csv(output_file, index=False, encoding='utf-8') | ||
print(f'Data from all CSV files in {folder} has been merged, standardized, and saved to {output_file} with UTF-8 encoding.') | ||
" |