diff --git a/notebook_etl_building/Large Scrape Notebook 1.ipynb b/notebook_etl_building/Large Scrape Notebook 1.ipynb index 4cbbfe4..3ee5f54 100644 --- a/notebook_etl_building/Large Scrape Notebook 1.ipynb +++ b/notebook_etl_building/Large Scrape Notebook 1.ipynb @@ -1,14 +1,1944 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "fe8e302d", + "metadata": {}, + "source": [ + "## Large Scrape of Flow Data Into GCS" + ] + }, { "cell_type": "code", - "execution_count": null, - "id": "355d1718", + "execution_count": 1, + "id": "fcff39b7", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import sys\n", + "import os\n", + "from datetime import datetime\n", + "sys.path.append('/Users/gisaac/Documents/GitHub/river_flow_notebooks/')\n", + "from scraping_functions import HydroScraper\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "97c7ec8d", "metadata": {}, "outputs": [], "source": [ - "# " + "def run_scrape(start_time, end_time, idx=0, base_path=\"meta_temp/sentinel_11_03\"):\n", + " dir_list = os.listdir(base_path)\n", + " for i in range(idx, len(dir_list)):\n", + " print(\"Now getting data for \" + str(i))\n", + " print(dir_list[i])\n", + " try:\n", + " h = HydroScraper(start_time, end_time, os.path.join(base_path, dir_list[i]), asos_bq_table=\"asos_weather\")\n", + " h.combine_data()\n", + " if \"snotel\" in h.meta_data:\n", + " h.combine_snotel_with_df()\n", + " else:\n", + " h.final_df = h.joined_df\n", + " sentinel_df = pd.read_csv(\"/Users/gisaac/Downloads/sent_data.csv\")\n", + " h.combine_sentinel(sentinel_df, h.meta_data[\"sentinel_2_tile_id\"])\n", + " file_upload_id = h.meta_data[\"site_number\"]\n", + " h.bq_connect.upload_file_to_gcs(h.final_df, file_upload_id)\n", + " h.final_df[\"usgs_id\"] = h.meta_data[\"site_number\"]\n", + " print(h.final_df.columns)\n", + " h.write_final_df_to_bq(\"final_flow_data\")\n", + " except KeyError as e:\n", + " print(e)\n", + " print(\"Failed \" + dir_list[i])\n", + "\n", + " pass" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96da1552", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Now getting data for 6\n", + "073813375.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=73813375&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "'tz_cd'\n", + "Failed 073813375.json\n", + "Now getting data for 7\n", + "06899900.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=06899900&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=KTVK&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:75: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"precip_usgs\"] = pd.to_numeric(df['precip_usgs'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 TVK 2024-01-01 00:15 30.2 24.8 80.07 21.28 9.0 10.35 30.23 \n", + "1 TVK 2024-01-01 00:35 30.2 24.8 80.07 20.71 10.0 11.50 30.24 \n", + "2 TVK 2024-01-01 00:55 30.2 24.8 80.07 20.71 10.0 11.50 30.24 \n", + "3 TVK 2024-01-01 01:15 30.2 23.0 74.26 19.71 12.0 13.80 30.25 \n", + "4 TVK 2024-01-01 01:35 30.2 23.0 74.26 21.28 9.0 10.35 30.25 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "211 TVK 2024-01-03 22:35 37.4 24.8 60.04 32.03 6.0 6.90 30.15 \n", + "212 TVK 2024-01-03 22:55 35.6 24.8 64.46 29.87 6.0 6.90 30.15 \n", + "213 TVK 2024-01-03 23:15 33.8 24.8 69.25 28.53 5.0 5.75 30.16 \n", + "214 TVK 2024-01-03 23:35 33.8 24.8 69.25 26.33 8.0 9.20 30.17 \n", + "215 TVK 2024-01-03 23:55 32.0 24.8 74.44 26.40 5.0 5.75 30.18 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 peak_wind_gust \\\n", + "0 M 310.00 M 0.0 10.0 M OVC M \n", + "1 M 320.00 M 0.0 10.0 M OVC M \n", + "2 M 320.00 M 0.0 10.0 M OVC M \n", + "3 M 320.00 M 0.0 10.0 M OVC M \n", + "4 M 330.00 M 0.0 10.0 M OVC M \n", + ".. ... ... ... ... ... ... ... ... \n", + "211 M 330.00 M 0.0 10.0 M CLR M \n", + "212 M 340.00 M 0.0 10.0 M CLR M \n", + "213 M 340.00 M 0.0 10.0 M CLR M \n", + "214 M 350.00 M 0.0 10.0 M CLR M \n", + "215 M 360.00 M 0.0 10.0 M CLR M \n", + "\n", + " snowdepth \n", + "0 M \n", + "1 M \n", + "2 M \n", + "3 M \n", + "4 M \n", + ".. ... \n", + "211 M \n", + "212 M \n", + "213 M \n", + "214 M \n", + "215 M \n", + "\n", + "[216 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf dwpf \\\n", + "0 0 2024-01-01 01:00:00 0.0 2024-01-01 00:15 30.2 24.8 \n", + "1 1 2024-01-01 02:00:00 0.0 2024-01-01 01:15 30.2 23.0 \n", + "2 2 2024-01-01 03:00:00 0.0 2024-01-01 02:15 29.0 23.0 \n", + "3 3 2024-01-01 04:00:00 0.0 2024-01-01 03:15 28.4 22.4 \n", + "4 4 2024-01-01 05:00:00 0.0 2024-01-01 04:15 28.4 23.0 \n", + ".. ... ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.0 2024-01-03 19:15 39.2 26.6 \n", + "68 68 2024-01-03 21:00:00 0.0 2024-01-03 20:15 41.0 24.8 \n", + "69 69 2024-01-03 22:00:00 0.0 2024-01-03 21:15 41.0 24.8 \n", + "70 70 2024-01-03 23:00:00 0.0 2024-01-03 22:15 37.4 24.8 \n", + "71 71 2024-01-04 00:00:00 0.0 2024-01-03 23:15 33.2 24.8 \n", + "\n", + " ice_accretion_1hr mslp drct sped alti relh sknt feel vsby \\\n", + "0 NaN NaN 310.0 10.35 30.23 80.07 9.0 21.28 10.0 \n", + "1 NaN NaN 320.0 13.80 30.25 74.26 12.0 19.71 10.0 \n", + "2 NaN NaN 330.0 12.65 30.26 74.26 11.0 20.19 10.0 \n", + "3 NaN NaN 330.0 13.80 30.28 79.92 12.0 17.42 10.0 \n", + "4 NaN NaN 330.0 8.05 30.29 79.92 7.0 20.39 10.0 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "67 NaN NaN 320.0 3.45 30.11 64.70 3.0 34.86 10.0 \n", + "68 NaN NaN 320.0 10.35 30.10 52.17 9.0 34.70 10.0 \n", + "69 NaN NaN 320.0 10.35 30.11 52.17 9.0 34.70 10.0 \n", + "70 NaN NaN 320.0 10.35 30.14 55.95 9.0 32.47 10.0 \n", + "71 NaN NaN 340.0 5.75 30.16 69.25 5.0 28.53 10.0 \n", + "\n", + " gust skyc1 peak_wind_gust snowdepth \n", + "0 NaN OVC NaN NaN \n", + "1 NaN OVC NaN NaN \n", + "2 NaN OVC NaN NaN \n", + "3 14.0 BKN NaN NaN \n", + "4 NaN OVC NaN NaN \n", + ".. ... ... ... ... \n", + "67 11.0 CLR NaN NaN \n", + "68 14.0 FEW NaN NaN \n", + "69 14.0 CLR NaN NaN \n", + "70 NaN CLR NaN NaN \n", + "71 NaN CLR NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'precip_usgs', 'cfs', 'height', 'sensing_time', 'base_url',\n", + " 'usgs_id'],\n", + " dtype='object')\n", + "LoadJob\n", + "Now getting data for 8\n", + "07263880.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=07263880&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "'cfs'\n", + "Failed 07263880.json\n", + "Now getting data for 9\n", + "09296800.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=09296800&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=YELL&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=CHEP&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=74V&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 74V 2024-01-01 00:15 32.0 14.0 46.92 32.00 0.0 0.00 30.16 \n", + "1 74V 2024-01-01 00:35 26.6 12.2 54.03 26.60 0.0 0.00 30.16 \n", + "2 74V 2024-01-01 00:55 23.0 10.4 57.96 23.00 0.0 0.00 30.17 \n", + "3 74V 2024-01-01 01:15 23.0 10.4 57.96 23.00 0.0 0.00 30.17 \n", + "4 74V 2024-01-01 01:35 21.2 10.4 62.53 16.34 3.0 3.45 30.18 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "211 74V 2024-01-03 22:35 30.2 15.8 54.59 30.20 0.0 0.00 29.98 \n", + "212 74V 2024-01-03 22:55 32.0 15.8 50.75 32.00 0.0 0.00 29.99 \n", + "213 74V 2024-01-03 23:15 30.2 15.8 54.59 30.20 0.0 0.00 29.99 \n", + "214 74V 2024-01-03 23:35 30.2 14.0 50.47 30.20 0.0 0.00 29.98 \n", + "215 74V 2024-01-03 23:55 30.2 14.0 50.47 30.20 0.0 0.00 29.98 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 peak_wind_gust \\\n", + "0 M 0.00 M 0.0 10.0 M CLR M \n", + "1 M 0.00 M 0.0 10.0 M CLR M \n", + "2 M 0.00 M 0.0 10.0 M CLR M \n", + "3 M 0.00 M 0.0 10.0 M CLR M \n", + "4 M 250.00 M 0.0 10.0 M CLR M \n", + ".. ... ... ... ... ... ... ... ... \n", + "211 M 0.00 M 0.0 10.0 M SCT M \n", + "212 M 0.00 M 0.0 10.0 M BKN M \n", + "213 M 0.00 M 0.0 10.0 M OVC M \n", + "214 M 0.00 M 0.0 10.0 M BKN M \n", + "215 M 0.00 M 0.0 10.0 M FEW M \n", + "\n", + " snowdepth \n", + "0 M \n", + "1 M \n", + "2 M \n", + "3 M \n", + "4 M \n", + ".. ... \n", + "211 M \n", + "212 M \n", + "213 M \n", + "214 M \n", + "215 M \n", + "\n", + "[216 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf dwpf \\\n", + "0 0 2024-01-01 01:00:00 0.0 2024-01-01 00:15 27.2 12.2 \n", + "1 1 2024-01-01 02:00:00 0.0 2024-01-01 01:15 21.2 10.4 \n", + "2 2 2024-01-01 03:00:00 0.0 2024-01-01 02:15 18.8 10.4 \n", + "3 3 2024-01-01 04:00:00 0.0 2024-01-01 03:15 17.6 10.4 \n", + "4 4 2024-01-01 05:00:00 0.0 2024-01-01 04:15 16.4 10.4 \n", + ".. ... ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.0 2024-01-03 19:15 28.4 14.0 \n", + "68 68 2024-01-03 21:00:00 0.0 2024-01-03 20:15 30.2 14.0 \n", + "69 69 2024-01-03 22:00:00 0.0 2024-01-03 21:15 30.2 15.8 \n", + "70 70 2024-01-03 23:00:00 0.0 2024-01-03 22:15 30.8 15.8 \n", + "71 71 2024-01-04 00:00:00 0.0 2024-01-03 23:15 30.2 14.6 \n", + "\n", + " ice_accretion_1hr mslp drct sped alti relh sknt feel vsby \\\n", + "0 NaN NaN 0.0 0.00 30.16 46.92 0.0 32.00 10.0 \n", + "1 NaN NaN 0.0 0.00 30.17 57.96 0.0 23.00 10.0 \n", + "2 NaN NaN 260.0 4.60 30.19 67.51 4.0 12.71 10.0 \n", + "3 NaN NaN 0.0 0.00 30.20 72.93 0.0 17.60 10.0 \n", + "4 NaN NaN 240.0 4.60 30.19 72.93 4.0 10.60 10.0 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "67 NaN NaN 80.0 5.75 30.06 58.49 5.0 19.98 10.0 \n", + "68 NaN NaN 0.0 0.00 30.02 50.47 0.0 30.20 10.0 \n", + "69 NaN NaN 120.0 3.45 29.99 54.59 3.0 26.63 10.0 \n", + "70 NaN NaN 0.0 0.00 29.99 54.59 0.0 30.20 10.0 \n", + "71 NaN NaN 0.0 0.00 29.99 54.59 0.0 30.20 10.0 \n", + "\n", + " gust skyc1 peak_wind_gust snowdepth \n", + "0 NaN CLR NaN NaN \n", + "1 NaN CLR NaN NaN \n", + "2 NaN CLR NaN NaN \n", + "3 NaN CLR NaN NaN \n", + "4 NaN CLR NaN NaN \n", + ".. ... ... ... ... \n", + "67 NaN CLR NaN NaN \n", + "68 NaN CLR NaN NaN \n", + "69 NaN CLR NaN NaN \n", + "70 NaN CLR NaN NaN \n", + "71 NaN OVC NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The base URL for SNOTEL is below: \n", + "https://powderlines.kellysoftware.org/api/station/643:UT:SNTL?start_date=2024-01-01&end_date=2024-01-02\n", + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'cfs', 'height', 'Date', 'snotel_snow_depth', 'swe',\n", + " 'change_swe', 'change_snow_depth', 'observed_temp_snotel',\n", + " 'sensing_time', 'base_url', 'usgs_id'],\n", + " dtype='object')\n", + "LoadJob\n", + "Now getting data for 10\n", + "01417500.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=01417500&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=MSV&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=KMSV&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=YBEL&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=BGMthr&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=KBGM&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 BGM 2024-01-01 00:04 33.0 27.0 78.34 33.00 0.0 0.00 29.92 \n", + "1 BGM 2024-01-01 00:53 32.0 28.0 84.98 28.69 3.0 3.45 29.92 \n", + "2 BGM 2024-01-01 01:53 32.0 28.0 84.98 32.00 0.0 0.00 29.91 \n", + "3 BGM 2024-01-01 02:53 32.0 28.0 84.98 32.00 0.0 0.00 29.92 \n", + "4 BGM 2024-01-01 03:09 32.0 28.0 84.98 32.00 0.0 0.00 29.92 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "100 BGM 2024-01-03 19:53 32.0 24.0 72.00 25.53 6.0 6.90 29.90 \n", + "101 BGM 2024-01-03 20:53 32.0 24.0 72.00 24.11 8.0 9.20 29.89 \n", + "102 BGM 2024-01-03 21:53 32.0 24.0 72.00 26.40 5.0 5.75 29.91 \n", + "103 BGM 2024-01-03 22:53 32.0 24.0 72.00 28.69 3.0 3.45 29.92 \n", + "104 BGM 2024-01-03 23:53 32.0 24.0 72.00 32.00 0.0 0.00 29.92 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 peak_wind_gust \\\n", + "0 M 0.00 M 0.00 10.0 M BKN M \n", + "1 1014.70 70.00 M 0.00 10.0 M SCT M \n", + "2 1014.50 0.00 M 0.00 10.0 M SCT M \n", + "3 1014.70 0.00 M T 2.0 M FEW M \n", + "4 M 0.00 M T 4.0 M BKN M \n", + ".. ... ... ... ... ... ... ... ... \n", + "100 1014.40 230.00 M 0.00 10.0 M OVC M \n", + "101 1014.00 260.00 M 0.00 10.0 M OVC M \n", + "102 1014.40 270.00 M 0.00 10.0 M OVC M \n", + "103 1015.00 200.00 M 0.00 10.0 M OVC M \n", + "104 1014.80 0.00 M 0.00 10.0 M OVC M \n", + "\n", + " snowdepth \n", + "0 M \n", + "1 M \n", + "2 M \n", + "3 M \n", + "4 M \n", + ".. ... \n", + "100 M \n", + "101 M \n", + "102 M \n", + "103 M \n", + "104 M \n", + "\n", + "[105 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf dwpf \\\n", + "0 0 2024-01-01 01:00:00 0.00 2024-01-01 00:04 32.50 27.50 \n", + "1 1 2024-01-01 02:00:00 0.00 2024-01-01 01:53 32.00 28.00 \n", + "2 2 2024-01-01 03:00:00 0.00 2024-01-01 02:53 32.00 28.00 \n", + "3 3 2024-01-01 04:00:00 0.25 2024-01-01 03:09 31.50 28.00 \n", + "4 4 2024-01-01 05:00:00 0.75 2024-01-01 04:28 30.84 28.48 \n", + ".. ... ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.00 2024-01-03 19:53 32.00 24.00 \n", + "68 68 2024-01-03 21:00:00 0.00 2024-01-03 20:53 32.00 24.00 \n", + "69 69 2024-01-03 22:00:00 0.00 2024-01-03 21:53 32.00 24.00 \n", + "70 70 2024-01-03 23:00:00 0.00 2024-01-03 22:53 32.00 24.00 \n", + "71 71 2024-01-04 00:00:00 0.00 2024-01-03 23:53 32.00 24.00 \n", + "\n", + " ice_accretion_1hr mslp drct sped alti relh sknt feel vsby \\\n", + "0 NaN 1014.7 0.0 0.00 29.92 78.34 0.0 33.00 10.0 \n", + "1 NaN 1014.5 0.0 0.00 29.91 84.98 0.0 32.00 10.0 \n", + "2 NaN 1014.7 0.0 0.00 29.92 84.98 0.0 32.00 2.0 \n", + "3 NaN 1014.7 0.0 0.00 29.92 84.98 0.0 32.00 4.0 \n", + "4 NaN 1014.6 0.0 0.00 29.92 88.48 0.0 31.00 4.0 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "67 NaN 1014.4 230.0 6.90 29.90 72.00 6.0 25.53 10.0 \n", + "68 NaN 1014.0 260.0 9.20 29.89 72.00 8.0 24.11 10.0 \n", + "69 NaN 1014.4 270.0 5.75 29.91 72.00 5.0 26.40 10.0 \n", + "70 NaN 1015.0 200.0 3.45 29.92 72.00 3.0 28.69 10.0 \n", + "71 NaN 1014.8 0.0 0.00 29.92 72.00 0.0 32.00 10.0 \n", + "\n", + " gust skyc1 peak_wind_gust snowdepth \n", + "0 NaN BKN NaN NaN \n", + "1 NaN SCT NaN NaN \n", + "2 NaN FEW NaN NaN \n", + "3 NaN BKN NaN NaN \n", + "4 NaN SCT NaN NaN \n", + ".. ... ... ... ... \n", + "67 NaN OVC NaN NaN \n", + "68 NaN OVC NaN NaN \n", + "69 NaN OVC NaN NaN \n", + "70 NaN OVC NaN NaN \n", + "71 NaN OVC NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoadJob\n", + "ASOS data written to BigQuery\n", + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'cfs', 'height', 'sensing_time', 'base_url', 'usgs_id'],\n", + " dtype='object')\n", + "LoadJob\n", + "Now getting data for 11\n", + "03277500.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=03277500&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=NS182&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:75: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"precip_usgs\"] = pd.to_numeric(df['precip_usgs'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=KPEA&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 PEA 2024-01-01 00:15 30.0 23.7 77.10 19.46 12.0 13.80 30.23 \n", + "1 PEA 2024-01-01 00:35 30.0 24.0 78.07 21.65 8.0 9.20 30.24 \n", + "2 PEA 2024-01-01 00:55 29.7 24.0 79.03 19.56 11.0 12.65 30.25 \n", + "3 PEA 2024-01-01 01:15 29.7 24.0 79.03 21.28 8.0 9.20 30.26 \n", + "4 PEA 2024-01-01 01:35 29.5 24.0 79.68 21.73 7.0 8.05 30.26 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "211 PEA 2024-01-03 22:35 33.0 26.6 77.06 23.27 12.0 13.80 30.17 \n", + "212 PEA 2024-01-03 22:55 32.0 26.4 79.56 22.97 10.0 11.50 30.18 \n", + "213 PEA 2024-01-03 23:15 32.0 26.2 78.90 24.11 8.0 9.20 30.19 \n", + "214 PEA 2024-01-03 23:35 31.6 26.0 79.52 24.29 7.0 8.05 30.19 \n", + "215 PEA 2024-01-03 23:55 31.6 26.0 79.52 25.05 6.0 6.90 30.20 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 peak_wind_gust \\\n", + "0 M 330.0 M 0.0 10.0 M OVC M \n", + "1 M 330.0 M 0.0 10.0 M OVC M \n", + "2 M 320.0 M 0.0 10.0 M OVC M \n", + "3 M 330.0 M 0.0 10.0 M OVC M \n", + "4 M 320.0 M 0.0 10.0 M OVC M \n", + ".. ... ... ... ... ... ... ... ... \n", + "211 M 310.0 M 0.0 10.0 M OVC M \n", + "212 M 320.0 M 0.0 10.0 M OVC M \n", + "213 M 320.0 M 0.0 10.0 M OVC M \n", + "214 M 330.0 M 0.0 10.0 M OVC M \n", + "215 M 330.0 M 0.0 10.0 M OVC M \n", + "\n", + " snowdepth \n", + "0 M \n", + "1 M \n", + "2 M \n", + "3 M \n", + "4 M \n", + ".. ... \n", + "211 M \n", + "212 M \n", + "213 M \n", + "214 M \n", + "215 M \n", + "\n", + "[216 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf \\\n", + "0 0 2024-01-01 01:00:00 0.0 2024-01-01 00:15 29.900000 \n", + "1 1 2024-01-01 02:00:00 0.0 2024-01-01 01:15 29.500000 \n", + "2 2 2024-01-01 03:00:00 0.0 2024-01-01 02:15 28.800000 \n", + "3 3 2024-01-01 04:00:00 0.0 2024-01-01 03:15 28.133333 \n", + "4 4 2024-01-01 05:00:00 0.0 2024-01-01 04:15 27.666667 \n", + ".. ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.0 2024-01-03 19:15 37.633333 \n", + "68 68 2024-01-03 21:00:00 0.0 2024-01-03 20:15 37.666667 \n", + "69 69 2024-01-03 22:00:00 0.0 2024-01-03 21:15 36.433333 \n", + "70 70 2024-01-03 23:00:00 0.0 2024-01-03 22:15 33.000000 \n", + "71 71 2024-01-04 00:00:00 0.0 2024-01-03 23:15 31.733333 \n", + "\n", + " dwpf ice_accretion_1hr mslp drct sped alti relh sknt \\\n", + "0 23.900000 NaN NaN 330.0 13.80 30.23 77.10 12.0 \n", + "1 23.900000 NaN NaN 330.0 9.20 30.26 79.03 8.0 \n", + "2 23.466667 NaN NaN 320.0 11.50 30.27 80.31 10.0 \n", + "3 22.866667 NaN NaN 320.0 11.50 30.28 80.58 10.0 \n", + "4 22.400000 NaN NaN 320.0 9.20 30.29 78.89 8.0 \n", + ".. ... ... ... ... ... ... ... ... \n", + "67 27.900000 NaN NaN 330.0 6.90 30.12 67.70 6.0 \n", + "68 27.466667 NaN NaN 330.0 6.90 30.11 66.64 6.0 \n", + "69 28.033333 NaN NaN 320.0 11.50 30.13 65.35 10.0 \n", + "70 26.900000 NaN NaN 320.0 10.35 30.16 77.47 9.0 \n", + "71 26.066667 NaN NaN 320.0 9.20 30.19 78.90 8.0 \n", + "\n", + " feel vsby gust skyc1 peak_wind_gust snowdepth \n", + "0 19.46 10.0 NaN OVC NaN NaN \n", + "1 21.28 10.0 NaN OVC NaN NaN \n", + "2 19.21 10.0 NaN OVC NaN NaN \n", + "3 18.20 10.0 NaN OVC NaN NaN \n", + "4 19.19 10.0 NaN OVC NaN NaN \n", + ".. ... ... ... ... ... ... \n", + "67 32.03 10.0 NaN SCT NaN NaN \n", + "68 32.52 10.0 NaN SCT NaN NaN \n", + "69 30.87 10.0 NaN OVC NaN NaN \n", + "70 26.00 10.0 NaN BKN NaN NaN \n", + "71 24.11 10.0 NaN OVC NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoadJob\n", + "ASOS data written to BigQuery\n", + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'precip_usgs', 'cfs', 'height', 'sensing_time', 'base_url',\n", + " 'usgs_id'],\n", + " dtype='object')\n", + "LoadJob\n", + "Now getting data for 12\n", + "01168151.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=01168151&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "'tz_cd'\n", + "Failed 01168151.json\n", + "Now getting data for 13\n", + "05078470.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=05078470&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "'tz_cd'\n", + "Failed 05078470.json\n", + "Now getting data for 14\n", + "04114498.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=04114498&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=LAN&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 LAN 2024-01-01 00:13 34.0 31.0 88.63 27.22 7.0 8.05 29.95 \n", + "1 LAN 2024-01-01 00:53 33.0 31.0 92.25 26.00 7.0 8.05 29.97 \n", + "2 LAN 2024-01-01 01:25 33.0 31.0 92.25 27.58 5.0 5.75 29.97 \n", + "3 LAN 2024-01-01 01:51 33.8 30.2 86.49 28.53 5.0 5.75 29.98 \n", + "4 LAN 2024-01-01 01:53 33.0 31.0 92.25 27.58 5.0 5.75 29.98 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "91 LAN 2024-01-03 21:00 33.0 29.0 85.04 26.00 7.0 8.05 29.95 \n", + "92 LAN 2024-01-03 21:53 32.0 29.0 88.53 25.53 6.0 6.90 29.97 \n", + "93 LAN 2024-01-03 22:53 32.0 28.0 84.98 26.40 5.0 5.75 29.99 \n", + "94 LAN 2024-01-03 23:35 32.0 28.0 84.98 25.53 6.0 6.90 30.00 \n", + "95 LAN 2024-01-03 23:53 32.0 28.0 84.98 24.78 7.0 8.05 30.01 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 peak_wind_gust \\\n", + "0 M 360.0 M T 5.0 M BKN M \n", + "1 1015.60 10.0 M T 6.0 M BKN M \n", + "2 M 360.0 M T 2.0 M BKN M \n", + "3 M 350.0 M 0.25 3.0 M OVC M \n", + "4 1016.00 350.0 M 0.25 3.0 M OVC M \n", + ".. ... ... ... ... ... ... ... ... \n", + "91 M 290.0 M 0.00 10.0 M FEW M \n", + "92 1015.80 280.0 M 0.00 10.0 M FEW M \n", + "93 1016.40 270.0 M 0.00 10.0 M BKN M \n", + "94 M 270.0 M 0.00 10.0 M BKN M \n", + "95 1017.10 280.0 M 0.00 9.0 M FEW M \n", + "\n", + " snowdepth \n", + "0 M \n", + "1 M \n", + "2 M \n", + "3 M \n", + "4 M \n", + ".. ... \n", + "91 M \n", + "92 M \n", + "93 M \n", + "94 M \n", + "95 M \n", + "\n", + "[96 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf \\\n", + "0 0 2024-01-01 01:00:00 0.00 2024-01-01 00:13 33.500000 \n", + "1 1 2024-01-01 02:00:00 0.50 2024-01-01 01:25 33.266667 \n", + "2 2 2024-01-01 03:00:00 0.00 2024-01-01 02:40 33.000000 \n", + "3 3 2024-01-01 04:00:00 0.00 2024-01-01 03:53 32.000000 \n", + "4 4 2024-01-01 05:00:00 0.00 2024-01-01 04:04 31.500000 \n", + ".. ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.25 2024-01-03 19:01 32.000000 \n", + "68 68 2024-01-03 21:00:00 0.00 2024-01-03 20:53 33.000000 \n", + "69 69 2024-01-03 22:00:00 0.00 2024-01-03 21:53 32.000000 \n", + "70 70 2024-01-03 23:00:00 0.00 2024-01-03 22:53 32.000000 \n", + "71 71 2024-01-04 00:00:00 0.00 2024-01-03 23:35 32.000000 \n", + "\n", + " dwpf ice_accretion_1hr mslp drct sped alti relh sknt \\\n", + "0 31.000000 NaN 1015.6 360.0 8.05 29.95 88.63 7.0 \n", + "1 30.733333 NaN 1016.0 360.0 5.75 29.97 92.25 5.0 \n", + "2 31.000000 NaN 1016.5 350.0 9.20 29.99 92.25 8.0 \n", + "3 30.000000 NaN 1017.1 10.0 13.80 30.01 92.22 12.0 \n", + "4 29.500000 NaN 1017.8 360.0 10.35 30.01 92.22 9.0 \n", + ".. ... ... ... ... ... ... ... ... \n", + "67 29.500000 NaN 1014.6 280.0 5.75 29.95 92.22 5.0 \n", + "68 29.000000 NaN 1015.1 290.0 6.90 29.95 85.04 6.0 \n", + "69 29.000000 NaN 1015.8 280.0 6.90 29.97 88.53 6.0 \n", + "70 28.000000 NaN 1016.4 270.0 5.75 29.99 84.98 5.0 \n", + "71 28.000000 NaN 1017.1 270.0 6.90 30.00 84.98 6.0 \n", + "\n", + " feel vsby gust skyc1 peak_wind_gust snowdepth \n", + "0 27.22 5.0 NaN BKN NaN NaN \n", + "1 27.58 2.0 NaN BKN NaN NaN \n", + "2 25.34 10.0 NaN OVC NaN NaN \n", + "3 22.00 5.0 17.0 OVC NaN NaN \n", + "4 23.51 5.0 18.0 OVC NaN NaN \n", + ".. ... ... ... ... ... ... \n", + "67 26.40 3.0 NaN BKN NaN NaN \n", + "68 26.73 7.0 NaN BKN NaN NaN \n", + "69 25.53 10.0 NaN FEW NaN NaN \n", + "70 26.40 10.0 NaN BKN NaN NaN \n", + "71 25.53 10.0 NaN BKN NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoadJob\n", + "ASOS data written to BigQuery\n", + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'cfs', 'height', 'sensing_time', 'base_url', 'usgs_id'],\n", + " dtype='object')\n", + "LoadJob\n", + "Now getting data for 15\n", + "02399600.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=02399600&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "'cfs'\n", + "Failed 02399600.json\n", + "Now getting data for 16\n", + "04147500.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=04147500&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=D95&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 D95 2024-01-01 00:15 33.4 33.4 100.00 25.84 8.0 9.20 29.93 \n", + "1 D95 2024-01-01 00:35 33.4 33.4 100.00 25.25 9.0 10.35 29.93 \n", + "2 D95 2024-01-01 00:55 33.3 33.3 100.00 27.10 6.0 6.90 29.94 \n", + "3 D95 2024-01-01 01:15 33.4 33.4 100.00 24.72 10.0 11.50 29.94 \n", + "4 D95 2024-01-01 01:35 33.0 33.0 100.00 25.34 8.0 9.20 29.95 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "211 D95 2024-01-03 22:35 32.0 31.0 96.04 26.40 5.0 5.75 29.95 \n", + "212 D95 2024-01-03 22:55 32.0 31.0 96.04 26.40 5.0 5.75 29.96 \n", + "213 D95 2024-01-03 23:15 32.0 31.0 96.04 25.53 6.0 6.90 29.96 \n", + "214 D95 2024-01-03 23:35 32.0 31.0 96.04 26.40 5.0 5.75 29.96 \n", + "215 D95 2024-01-03 23:55 32.0 31.6 98.40 27.42 4.0 4.60 29.97 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 peak_wind_gust \\\n", + "0 M 20.0 M 0.0 5.0 M OVC M \n", + "1 M 20.0 M 0.0 5.0 M OVC M \n", + "2 M 10.0 M 0.0 7.0 M OVC M \n", + "3 M 20.0 M 0.0 7.0 M OVC M \n", + "4 M 20.0 M 0.0 7.0 M OVC M \n", + ".. ... ... ... ... ... ... ... ... \n", + "211 M 280.0 M 0.0 10.0 M OVC M \n", + "212 M 290.0 M 0.0 10.0 M BKN M \n", + "213 M 270.0 M 0.0 10.0 M SCT M \n", + "214 M 290.0 M 0.0 10.0 M SCT M \n", + "215 M 270.0 M 0.0 5.0 M SCT M \n", + "\n", + " snowdepth \n", + "0 M \n", + "1 M \n", + "2 M \n", + "3 M \n", + "4 M \n", + ".. ... \n", + "211 M \n", + "212 M \n", + "213 M \n", + "214 M \n", + "215 M \n", + "\n", + "[216 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf \\\n", + "0 0 2024-01-01 01:00:00 0.0 2024-01-01 00:15 33.366667 \n", + "1 1 2024-01-01 02:00:00 0.0 2024-01-01 01:15 33.133333 \n", + "2 2 2024-01-01 03:00:00 0.0 2024-01-01 02:15 33.400000 \n", + "3 3 2024-01-01 04:00:00 0.0 2024-01-01 03:15 33.400000 \n", + "4 4 2024-01-01 05:00:00 0.0 2024-01-01 04:15 33.666667 \n", + ".. ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.0 2024-01-03 19:15 32.800000 \n", + "68 68 2024-01-03 21:00:00 0.0 2024-01-03 20:15 32.900000 \n", + "69 69 2024-01-03 22:00:00 0.0 2024-01-03 21:15 32.233333 \n", + "70 70 2024-01-03 23:00:00 0.0 2024-01-03 22:15 32.000000 \n", + "71 71 2024-01-04 00:00:00 0.0 2024-01-03 23:15 32.000000 \n", + "\n", + " dwpf ice_accretion_1hr mslp drct sped alti relh sknt \\\n", + "0 33.366667 NaN NaN 20.0 9.20 29.93 100.00 8.0 \n", + "1 33.133333 NaN NaN 20.0 11.50 29.94 100.00 10.0 \n", + "2 33.400000 NaN NaN 10.0 8.05 29.96 100.00 7.0 \n", + "3 33.400000 NaN NaN 10.0 9.20 29.97 100.00 8.0 \n", + "4 33.666667 NaN NaN 10.0 8.05 29.99 100.00 7.0 \n", + ".. ... ... ... ... ... ... ... ... \n", + "67 31.000000 NaN NaN 280.0 6.90 29.92 98.40 6.0 \n", + "68 29.900000 NaN NaN 280.0 5.75 29.91 88.58 5.0 \n", + "69 30.200000 NaN NaN 260.0 6.90 29.93 88.57 6.0 \n", + "70 31.000000 NaN NaN 280.0 5.75 29.95 96.04 5.0 \n", + "71 31.200000 NaN NaN 270.0 6.90 29.96 96.04 6.0 \n", + "\n", + " feel vsby gust skyc1 peak_wind_gust snowdepth \n", + "0 25.84 5.0 NaN OVC NaN NaN \n", + "1 24.72 7.0 NaN OVC NaN NaN \n", + "2 26.00 3.0 NaN OVC NaN NaN \n", + "3 25.84 5.0 NaN OVC NaN NaN \n", + "4 26.97 7.0 17.0 OVC NaN NaN \n", + ".. ... ... ... ... ... ... \n", + "67 26.01 10.0 NaN SCT NaN NaN \n", + "68 27.58 10.0 NaN OVC NaN NaN \n", + "69 26.37 10.0 NaN SCT NaN NaN \n", + "70 26.40 10.0 NaN SCT NaN NaN \n", + "71 25.53 10.0 NaN SCT NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoadJob\n", + "ASOS data written to BigQuery\n", + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'cfs', 'height', 'sensing_time', 'base_url', 'usgs_id'],\n", + " dtype='object')\n", + "LoadJob\n", + "Now getting data for 17\n", + "06727500.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=06727500&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "'tz_cd'\n", + "Failed 06727500.json\n", + "Now getting data for 18\n", + "03271207.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=03271207&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "'cfs'\n", + "Failed 03271207.json\n", + "Now getting data for 19\n", + "01619500.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=01619500&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=MANN&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=VMRT&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=KMRB&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 MRB 2024-01-01 00:53 40.0 34.0 78.94 40.00 0.0 0.00 29.98 \n", + "1 MRB 2024-01-01 01:53 39.0 34.0 82.08 35.59 4.0 4.60 29.98 \n", + "2 MRB 2024-01-01 02:53 38.0 35.0 88.83 34.43 4.0 4.60 29.97 \n", + "3 MRB 2024-01-01 03:53 37.0 35.0 92.39 33.26 4.0 4.60 29.97 \n", + "4 MRB 2024-01-01 04:53 36.0 35.0 96.11 36.00 0.0 0.00 29.96 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "71 MRB 2024-01-03 19:53 45.0 24.0 43.28 41.83 5.0 5.75 29.98 \n", + "72 MRB 2024-01-03 20:53 43.0 23.0 44.80 40.27 4.0 4.60 29.98 \n", + "73 MRB 2024-01-03 21:53 39.0 24.0 54.54 39.00 0.0 0.00 29.97 \n", + "74 MRB 2024-01-03 22:53 35.0 24.0 63.85 35.00 0.0 0.00 29.97 \n", + "75 MRB 2024-01-03 23:53 33.0 24.0 69.16 33.00 0.0 0.00 29.98 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 peak_wind_gust \\\n", + "0 1015.60 0.00 M T 10.0 M OVC M \n", + "1 1015.60 60.00 M 0.00 10.0 M OVC M \n", + "2 1015.30 40.00 M T 10.0 M SCT M \n", + "3 1015.30 320.00 M T 10.0 M OVC M \n", + "4 1014.80 0.00 M 0.00 10.0 M SCT M \n", + ".. ... ... ... ... ... ... ... ... \n", + "71 1015.40 260.00 M 0.00 10.0 M CLR M \n", + "72 1015.60 210.00 M 0.00 10.0 M CLR M \n", + "73 1015.50 0.00 M 0.00 10.0 M CLR M \n", + "74 1015.40 0.00 M 0.00 10.0 M CLR M \n", + "75 1015.70 0.00 M 0.00 10.0 M CLR M \n", + "\n", + " snowdepth \n", + "0 M \n", + "1 M \n", + "2 M \n", + "3 M \n", + "4 M \n", + ".. ... \n", + "71 M \n", + "72 M \n", + "73 M \n", + "74 M \n", + "75 M \n", + "\n", + "[76 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf dwpf \\\n", + "0 0 2024-01-01 01:00:00 0.0 2024-01-01 00:53 40.0 34.0 \n", + "1 1 2024-01-01 02:00:00 0.0 2024-01-01 01:53 39.0 34.0 \n", + "2 2 2024-01-01 03:00:00 0.0 2024-01-01 02:53 38.0 35.0 \n", + "3 3 2024-01-01 04:00:00 0.0 2024-01-01 03:53 37.0 35.0 \n", + "4 4 2024-01-01 05:00:00 0.0 2024-01-01 04:53 36.0 35.0 \n", + ".. ... ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.0 2024-01-03 19:53 45.0 24.0 \n", + "68 68 2024-01-03 21:00:00 0.0 2024-01-03 20:53 43.0 23.0 \n", + "69 69 2024-01-03 22:00:00 0.0 2024-01-03 21:53 39.0 24.0 \n", + "70 70 2024-01-03 23:00:00 0.0 2024-01-03 22:53 35.0 24.0 \n", + "71 71 2024-01-04 00:00:00 0.0 2024-01-03 23:53 33.0 24.0 \n", + "\n", + " ice_accretion_1hr mslp drct sped alti relh sknt feel vsby \\\n", + "0 NaN 1015.6 0.0 0.00 29.98 78.94 0.0 40.00 10.0 \n", + "1 NaN 1015.6 60.0 4.60 29.98 82.08 4.0 35.59 10.0 \n", + "2 NaN 1015.3 40.0 4.60 29.97 88.83 4.0 34.43 10.0 \n", + "3 NaN 1015.3 320.0 4.60 29.97 92.39 4.0 33.26 10.0 \n", + "4 NaN 1014.8 0.0 0.00 29.96 96.11 0.0 36.00 10.0 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "67 NaN 1015.4 260.0 5.75 29.98 43.28 5.0 41.83 10.0 \n", + "68 NaN 1015.6 210.0 4.60 29.98 44.80 4.0 40.27 10.0 \n", + "69 NaN 1015.5 0.0 0.00 29.97 54.54 0.0 39.00 10.0 \n", + "70 NaN 1015.4 0.0 0.00 29.97 63.85 0.0 35.00 10.0 \n", + "71 NaN 1015.7 0.0 0.00 29.98 69.16 0.0 33.00 10.0 \n", + "\n", + " gust skyc1 peak_wind_gust snowdepth \n", + "0 NaN OVC NaN NaN \n", + "1 NaN OVC NaN NaN \n", + "2 NaN SCT NaN NaN \n", + "3 NaN OVC NaN NaN \n", + "4 NaN SCT NaN NaN \n", + ".. ... ... ... ... \n", + "67 NaN CLR NaN NaN \n", + "68 NaN CLR NaN NaN \n", + "69 NaN CLR NaN NaN \n", + "70 NaN CLR NaN NaN \n", + "71 NaN CLR NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoadJob\n", + "ASOS data written to BigQuery\n", + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'cfs', 'height', 'sensing_time', 'base_url', 'usgs_id'],\n", + " dtype='object')\n", + "LoadJob\n", + "Now getting data for 20\n", + "02146470.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=02146470&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=KCLT&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 CLT 2024-01-01 00:52 46.0 31.0 55.58 41.84 7.0 8.05 30.06 \n", + "1 CLT 2024-01-01 01:52 47.0 31.0 53.52 42.59 8.0 9.20 30.05 \n", + "2 CLT 2024-01-01 02:52 47.0 31.0 53.52 41.78 10.0 11.50 30.04 \n", + "3 CLT 2024-01-01 03:52 46.0 31.0 55.58 40.16 11.0 12.65 30.04 \n", + "4 CLT 2024-01-01 04:52 46.0 31.0 55.58 40.52 10.0 11.50 30.04 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "67 CLT 2024-01-03 19:52 47.0 29.0 49.34 42.59 8.0 9.20 30.03 \n", + "68 CLT 2024-01-03 20:52 46.0 28.0 49.18 42.39 6.0 6.90 30.01 \n", + "69 CLT 2024-01-03 21:52 46.0 28.0 49.18 43.77 4.0 4.60 30.02 \n", + "70 CLT 2024-01-03 22:52 44.0 29.0 55.29 40.65 5.0 5.75 30.03 \n", + "71 CLT 2024-01-03 23:52 44.0 29.0 55.29 42.41 3.0 3.45 30.02 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 peak_wind_gust \\\n", + "0 1019.6 210.00 M 0.0 10.0 M FEW M \n", + "1 1019.2 210.00 M 0.0 10.0 M CLR M \n", + "2 1019.0 220.00 M 0.0 10.0 M FEW M \n", + "3 1019.0 230.00 M 0.0 10.0 M SCT M \n", + "4 1018.9 230.00 M 0.0 10.0 M BKN M \n", + ".. ... ... ... ... ... ... ... ... \n", + "67 1018.6 190.00 M 0.0 10.0 M BKN M \n", + "68 1017.9 190.00 M 0.0 10.0 M BKN M \n", + "69 1018.5 200.00 M 0.0 10.0 M BKN M \n", + "70 1018.8 220.00 M 0.0 10.0 M CLR M \n", + "71 1018.5 180.00 M 0.0 10.0 M BKN M \n", + "\n", + " snowdepth \n", + "0 M \n", + "1 M \n", + "2 M \n", + "3 M \n", + "4 M \n", + ".. ... \n", + "67 M \n", + "68 M \n", + "69 M \n", + "70 M \n", + "71 M \n", + "\n", + "[72 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf dwpf \\\n", + "0 0 2024-01-01 01:00:00 0.0 2024-01-01 00:52 46.0 31.0 \n", + "1 1 2024-01-01 02:00:00 0.0 2024-01-01 01:52 47.0 31.0 \n", + "2 2 2024-01-01 03:00:00 0.0 2024-01-01 02:52 47.0 31.0 \n", + "3 3 2024-01-01 04:00:00 0.0 2024-01-01 03:52 46.0 31.0 \n", + "4 4 2024-01-01 05:00:00 0.0 2024-01-01 04:52 46.0 31.0 \n", + ".. ... ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.0 2024-01-03 19:52 47.0 29.0 \n", + "68 68 2024-01-03 21:00:00 0.0 2024-01-03 20:52 46.0 28.0 \n", + "69 69 2024-01-03 22:00:00 0.0 2024-01-03 21:52 46.0 28.0 \n", + "70 70 2024-01-03 23:00:00 0.0 2024-01-03 22:52 44.0 29.0 \n", + "71 71 2024-01-04 00:00:00 0.0 2024-01-03 23:52 44.0 29.0 \n", + "\n", + " ice_accretion_1hr mslp drct sped alti relh sknt feel vsby \\\n", + "0 NaN 1019.6 210.0 8.05 30.06 55.58 7.0 41.84 10.0 \n", + "1 NaN 1019.2 210.0 9.20 30.05 53.52 8.0 42.59 10.0 \n", + "2 NaN 1019.0 220.0 11.50 30.04 53.52 10.0 41.78 10.0 \n", + "3 NaN 1019.0 230.0 12.65 30.04 55.58 11.0 40.16 10.0 \n", + "4 NaN 1018.9 230.0 11.50 30.04 55.58 10.0 40.52 10.0 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "67 NaN 1018.6 190.0 9.20 30.03 49.34 8.0 42.59 10.0 \n", + "68 NaN 1017.9 190.0 6.90 30.01 49.18 6.0 42.39 10.0 \n", + "69 NaN 1018.5 200.0 4.60 30.02 49.18 4.0 43.77 10.0 \n", + "70 NaN 1018.8 220.0 5.75 30.03 55.29 5.0 40.65 10.0 \n", + "71 NaN 1018.5 180.0 3.45 30.02 55.29 3.0 42.41 10.0 \n", + "\n", + " gust skyc1 peak_wind_gust snowdepth \n", + "0 NaN FEW NaN NaN \n", + "1 NaN CLR NaN NaN \n", + "2 NaN FEW NaN NaN \n", + "3 NaN SCT NaN NaN \n", + "4 NaN BKN NaN NaN \n", + ".. ... ... ... ... \n", + "67 NaN BKN NaN NaN \n", + "68 NaN BKN NaN NaN \n", + "69 NaN BKN NaN NaN \n", + "70 NaN CLR NaN NaN \n", + "71 NaN BKN NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoadJob\n", + "ASOS data written to BigQuery\n", + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'cfs', 'height', 'sensing_time', 'base_url', 'usgs_id'],\n", + " dtype='object')\n", + "LoadJob\n", + "Now getting data for 21\n", + "11418500.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=11418500&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=CRDR&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=BAB&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 BAB 2024-01-01 00:10 55.4 50.0 81.99 55.4 0.0 0.00 30.11 \n", + "1 BAB 2024-01-01 00:40 53.6 50.0 87.56 53.6 0.0 0.00 30.11 \n", + "2 BAB 2024-01-01 00:55 53.0 49.5 87.86 53.0 4.0 4.60 30.11 \n", + "3 BAB 2024-01-01 01:55 50.5 48.6 93.16 50.5 5.0 5.75 30.11 \n", + "4 BAB 2024-01-01 02:55 51.0 49.0 92.83 51.0 4.0 4.60 30.11 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "134 BAB 2024-01-03 19:55 52.0 45.0 76.94 52.0 3.0 3.45 30.15 \n", + "135 BAB 2024-01-03 20:55 53.0 44.6 73.04 53.0 0.0 0.00 30.14 \n", + "136 BAB 2024-01-03 21:55 53.2 42.3 66.40 53.2 3.0 3.45 30.14 \n", + "137 BAB 2024-01-03 22:56 55.4 43.0 62.94 55.4 4.0 4.60 30.14 \n", + "138 BAB 2024-01-03 23:55 55.0 39.2 55.13 55.0 3.0 3.45 30.16 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 peak_wind_gust \\\n", + "0 1019.9 0.0 M 0.00 10.00 M BKN M \n", + "1 1019.9 0.0 M 0.00 10.00 M FEW M \n", + "2 1019.9 330.0 M 0.00 10.00 M OVC M \n", + "3 1019.9 340.0 M 0.00 10.00 M OVC M \n", + "4 1019.9 360.0 M 0.00 10.00 M BKN M \n", + ".. ... ... ... ... ... ... ... ... \n", + "134 1021.3 190.0 M 0.00 10.00 M OVC M \n", + "135 1021.0 0.0 M 0.00 10.00 M FEW M \n", + "136 1021.0 10.0 M 0.00 10.00 M SCT M \n", + "137 1021.0 330.0 M 0.00 10.00 M CLR M \n", + "138 1021.6 310.0 M 0.00 10.00 M CLR M \n", + "\n", + " snowdepth \n", + "0 M \n", + "1 M \n", + "2 M \n", + "3 M \n", + "4 M \n", + ".. ... \n", + "134 M \n", + "135 M \n", + "136 M \n", + "137 M \n", + "138 M \n", + "\n", + "[139 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf dwpf \\\n", + "0 0 2024-01-01 01:00:00 0.0 2024-01-01 00:10 54.0 49.833333 \n", + "1 1 2024-01-01 02:00:00 0.0 2024-01-01 01:55 50.5 48.600000 \n", + "2 2 2024-01-01 03:00:00 0.0 2024-01-01 02:55 51.0 49.000000 \n", + "3 3 2024-01-01 04:00:00 0.0 2024-01-01 03:55 50.0 48.400000 \n", + "4 4 2024-01-01 05:00:00 0.0 2024-01-01 04:55 49.0 47.300000 \n", + ".. ... ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.0 2024-01-03 19:25 51.9 45.700000 \n", + "68 68 2024-01-03 21:00:00 0.0 2024-01-03 20:55 53.0 44.600000 \n", + "69 69 2024-01-03 22:00:00 0.0 2024-01-03 21:55 53.2 42.300000 \n", + "70 70 2024-01-03 23:00:00 0.0 2024-01-03 22:56 55.4 43.000000 \n", + "71 71 2024-01-04 00:00:00 0.0 2024-01-03 23:55 55.0 39.200000 \n", + "\n", + " ice_accretion_1hr mslp drct sped alti relh sknt feel vsby \\\n", + "0 NaN 1019.9 0.0 0.00 30.11 81.99 0.0 55.40 10.0 \n", + "1 NaN 1019.9 340.0 5.75 30.11 93.16 5.0 50.50 10.0 \n", + "2 NaN 1019.9 360.0 4.60 30.11 92.83 4.0 51.00 10.0 \n", + "3 NaN 1019.9 10.0 5.75 30.11 94.20 5.0 50.00 10.0 \n", + "4 NaN 1019.9 360.0 5.75 30.11 93.82 5.0 46.58 10.0 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "67 NaN 1021.3 150.0 6.90 30.15 81.74 6.0 51.80 10.0 \n", + "68 NaN 1021.0 0.0 0.00 30.14 73.04 0.0 53.00 10.0 \n", + "69 NaN 1021.0 10.0 3.45 30.14 66.40 3.0 53.20 10.0 \n", + "70 NaN 1021.0 330.0 4.60 30.14 62.94 4.0 55.40 10.0 \n", + "71 NaN 1021.6 310.0 3.45 30.16 55.13 3.0 55.00 10.0 \n", + "\n", + " gust skyc1 peak_wind_gust snowdepth \n", + "0 NaN BKN NaN NaN \n", + "1 NaN OVC NaN NaN \n", + "2 NaN BKN NaN NaN \n", + "3 NaN SCT NaN NaN \n", + "4 NaN FEW NaN NaN \n", + ".. ... ... ... ... \n", + "67 NaN BKN NaN NaN \n", + "68 NaN FEW NaN NaN \n", + "69 NaN SCT NaN NaN \n", + "70 NaN CLR NaN NaN \n", + "71 NaN CLR NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoadJob\n", + "ASOS data written to BigQuery\n", + "The base URL for SNOTEL is below: \n", + "https://powderlines.kellysoftware.org/api/station/428:CA:SNTL?start_date=2024-01-01&end_date=2024-01-02\n", + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'cfs', 'height', 'Date', 'snotel_snow_depth', 'swe',\n", + " 'change_swe', 'change_snow_depth', 'observed_temp_snotel',\n", + " 'sensing_time', 'base_url', 'usgs_id'],\n", + " dtype='object')\n", + "LoadJob\n", + "Now getting data for 22\n", + "02466030.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=02466030&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=AEUT&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=TT400&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=0A0166&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=09F6EC&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + "Empty DataFrame\n", + "Columns: [station, valid, tmpf, dwpf, relh, feel, sknt, sped, alti, mslp, drct, ice_accretion_1hr, p01m, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Empty DataFrame\n", + "Columns: [Unnamed: 0, hour_updated, p01m, valid, tmpf, dwpf, ice_accretion_1hr, mslp, drct, sped, alti, relh, sknt, feel, vsby, gust, skyc1, peak_wind_gust, snowdepth]\n", + "Index: []\n", + "after\n", + "Initial ASOS data empty, trying again\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=KDYA&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n", + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 DYA 2024-01-01 00:15 46.4 35.6 65.83 45.15 3.0 3.45 30.13 \n", + "1 DYA 2024-01-01 00:35 45.0 35.4 68.87 43.55 3.0 3.45 30.13 \n", + "2 DYA 2024-01-01 00:55 45.0 34.7 66.98 42.60 4.0 4.60 30.13 \n", + "3 DYA 2024-01-01 01:15 43.5 34.7 70.92 41.84 3.0 3.45 30.13 \n", + "4 DYA 2024-01-01 01:35 44.8 34.0 65.63 43.32 3.0 3.45 30.13 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "211 DYA 2024-01-03 22:35 43.7 37.0 77.10 42.06 3.0 3.45 30.11 \n", + "212 DYA 2024-01-03 22:55 43.7 37.4 78.32 43.70 0.0 0.00 30.11 \n", + "213 DYA 2024-01-03 23:15 42.8 37.6 81.71 42.80 0.0 0.00 30.11 \n", + "214 DYA 2024-01-03 23:35 42.0 37.8 84.93 42.00 0.0 0.00 30.11 \n", + "215 DYA 2024-01-03 23:55 41.5 37.6 85.90 41.50 0.0 0.00 30.11 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 peak_wind_gust \\\n", + "0 M 160.0 M 0.0 10.0 M CLR M \n", + "1 M 150.0 M 0.0 10.0 M CLR M \n", + "2 M 150.0 M 0.0 10.0 M CLR M \n", + "3 M 160.0 M 0.0 10.0 M CLR M \n", + "4 M 150.0 M 0.0 10.0 M CLR M \n", + ".. ... ... ... ... ... ... ... ... \n", + "211 M 320.0 M 0.0 10.0 M SCT M \n", + "212 M 0.0 M 0.0 10.0 M OVC M \n", + "213 M 0.0 M 0.0 10.0 M BKN M \n", + "214 M 0.0 M 0.0 10.0 M OVC M \n", + "215 M 0.0 M 0.0 10.0 M BKN M \n", + "\n", + " snowdepth \n", + "0 M \n", + "1 M \n", + "2 M \n", + "3 M \n", + "4 M \n", + ".. ... \n", + "211 M \n", + "212 M \n", + "213 M \n", + "214 M \n", + "215 M \n", + "\n", + "[216 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf \\\n", + "0 0 2024-01-01 01:00:00 0.0 2024-01-01 00:15 45.466667 \n", + "1 1 2024-01-01 02:00:00 0.0 2024-01-01 01:15 44.233333 \n", + "2 2 2024-01-01 03:00:00 0.0 2024-01-01 02:15 44.433333 \n", + "3 3 2024-01-01 04:00:00 0.0 2024-01-01 03:15 44.200000 \n", + "4 4 2024-01-01 05:00:00 0.0 2024-01-01 04:15 45.266667 \n", + ".. ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.0 2024-01-03 19:15 43.133333 \n", + "68 68 2024-01-03 21:00:00 0.0 2024-01-03 20:15 43.966667 \n", + "69 69 2024-01-03 22:00:00 0.0 2024-01-03 21:15 44.600000 \n", + "70 70 2024-01-03 23:00:00 0.0 2024-01-03 22:15 43.866667 \n", + "71 71 2024-01-04 00:00:00 0.0 2024-01-03 23:15 42.100000 \n", + "\n", + " dwpf ice_accretion_1hr mslp drct sped alti relh sknt \\\n", + "0 35.233333 NaN NaN 160.0 3.45 30.13 65.83 3.0 \n", + "1 34.466667 NaN NaN 160.0 3.45 30.13 70.92 3.0 \n", + "2 33.933333 NaN NaN 160.0 4.60 30.13 61.74 4.0 \n", + "3 34.233333 NaN NaN 0.0 0.00 30.13 67.49 0.0 \n", + "4 34.333333 NaN NaN 170.0 3.45 30.13 60.84 3.0 \n", + ".. ... ... ... ... ... ... ... ... \n", + "67 37.166667 NaN NaN 320.0 4.60 30.11 81.33 4.0 \n", + "68 35.600000 NaN NaN 0.0 0.00 30.10 74.11 0.0 \n", + "69 35.866667 NaN NaN 0.0 0.00 30.09 71.04 0.0 \n", + "70 36.900000 NaN NaN 310.0 4.60 30.11 73.58 4.0 \n", + "71 37.666667 NaN NaN 0.0 0.00 30.11 81.71 0.0 \n", + "\n", + " feel vsby gust skyc1 peak_wind_gust snowdepth \n", + "0 45.15 10.0 NaN CLR NaN NaN \n", + "1 41.84 10.0 NaN CLR NaN NaN \n", + "2 44.00 10.0 NaN CLR NaN NaN \n", + "3 44.80 10.0 NaN CLR NaN NaN \n", + "4 45.61 10.0 NaN CLR NaN NaN \n", + ".. ... ... ... ... ... ... \n", + "67 39.10 10.0 NaN BKN NaN NaN \n", + "68 43.70 10.0 NaN SCT NaN NaN \n", + "69 44.60 10.0 NaN OVC NaN NaN \n", + "70 41.67 10.0 NaN SCT NaN NaN \n", + "71 42.80 10.0 NaN BKN NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoadJob\n", + "ASOS data written to BigQuery\n", + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'cfs', 'height', 'sensing_time', 'base_url', 'usgs_id'],\n", + " dtype='object')\n", + "LoadJob\n", + "Now getting data for 23\n", + "03111500.json\n", + "Getting request from USGS\n", + "http://waterservices.usgs.gov/nwis/iv/?format=rdb,1.0&sites=03111500&startDT=2024-01-01&endDT=2024-01-02¶meterCd=00060,00065,00045&siteStatus=all\n", + "Request finished\n", + "Getting request from ASOS\n", + "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=HLG&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1=2024&month1=1&day1=1&year2=2024&month2=1&day2=4&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:70: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"datetime\"] = df[\"datetime\"].map(lambda x: old_timezone.localize(datetime.strptime(x, \"%Y-%m-%d %H:%M\")).astimezone(new_timezone))\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:71: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"cfs\"] = pd.to_numeric(df['cfs'], errors='coerce')\n", + "/Users/gisaac/Documents/GitHub/river_flow_notebooks/scraping_functions.py:73: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[\"height\"] = pd.to_numeric(df['height'], errors='coerce')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " station valid tmpf dwpf relh feel sknt sped alti \\\n", + "0 HLG 2024-01-01 00:53 37.0 34.0 88.78 32.33 5.00 5.75 29.92 \n", + "1 HLG 2024-01-01 01:30 37.0 34.0 88.78 32.33 5.00 5.75 29.92 \n", + "2 HLG 2024-01-01 01:46 38.0 34.0 85.35 32.76 6.00 6.90 29.92 \n", + "3 HLG 2024-01-01 01:53 38.0 34.0 85.35 32.76 6.00 6.90 29.92 \n", + "4 HLG 2024-01-01 02:21 38.0 34.0 85.35 32.76 6.00 6.90 29.92 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "110 HLG 2024-01-03 19:53 41.0 26.0 54.84 33.84 11.00 12.65 29.96 \n", + "111 HLG 2024-01-03 20:53 38.0 25.0 59.13 32.09 7.00 8.05 29.96 \n", + "112 HLG 2024-01-03 21:53 38.0 25.0 59.13 32.76 6.00 6.90 29.96 \n", + "113 HLG 2024-01-03 22:53 36.0 25.0 63.98 33.26 3.00 3.45 29.96 \n", + "114 HLG 2024-01-03 23:53 35.0 25.0 66.57 29.96 5.00 5.75 29.97 \n", + "\n", + " mslp drct ice_accretion_1hr p01m vsby gust skyc1 \\\n", + "0 1014.50 180.00 M T 10.0 M FEW \n", + "1 M 190.00 M 0.00 10.0 M BKN \n", + "2 M 200.00 M 0.00 10.0 M SCT \n", + "3 1014.30 200.00 M 0.00 10.0 M SCT \n", + "4 M 210.00 M T 10.0 M BKN \n", + ".. ... ... ... ... ... ... ... \n", + "110 1015.70 210.00 M 0.00 10.0 17.00 SCT \n", + "111 1016.00 200.00 M 0.00 10.0 M CLR \n", + "112 1016.00 240.00 M 0.00 10.0 M CLR \n", + "113 1016.00 230.00 M 0.00 10.0 M BKN \n", + "114 1016.30 210.00 M 0.00 10.0 M BKN \n", + "\n", + " peak_wind_gust snowdepth \n", + "0 M M \n", + "1 M M \n", + "2 M M \n", + "3 M M \n", + "4 M M \n", + ".. ... ... \n", + "110 M M \n", + "111 M M \n", + "112 M M \n", + "113 M M \n", + "114 M M \n", + "\n", + "[115 rows x 18 columns]\n", + "after\n", + " Unnamed: 0 hour_updated p01m valid tmpf dwpf \\\n", + "0 0 2024-01-01 01:00:00 0.00 2024-01-01 00:53 37.000000 34.0 \n", + "1 1 2024-01-01 02:00:00 0.00 2024-01-01 01:30 37.666667 34.0 \n", + "2 2 2024-01-01 03:00:00 0.00 2024-01-01 02:21 38.000000 34.0 \n", + "3 3 2024-01-01 04:00:00 0.00 2024-01-01 03:25 38.000000 34.0 \n", + "4 4 2024-01-01 05:00:00 0.25 2024-01-01 04:01 36.800000 33.2 \n", + ".. ... ... ... ... ... ... \n", + "67 67 2024-01-03 20:00:00 0.00 2024-01-03 19:53 41.000000 26.0 \n", + "68 68 2024-01-03 21:00:00 0.00 2024-01-03 20:53 38.000000 25.0 \n", + "69 69 2024-01-03 22:00:00 0.00 2024-01-03 21:53 38.000000 25.0 \n", + "70 70 2024-01-03 23:00:00 0.00 2024-01-03 22:53 36.000000 25.0 \n", + "71 71 2024-01-04 00:00:00 0.00 2024-01-03 23:53 35.000000 25.0 \n", + "\n", + " ice_accretion_1hr mslp drct sped alti relh sknt feel vsby \\\n", + "0 NaN 1014.5 180.0 5.75 29.92 88.78 5.0 32.33 10.0 \n", + "1 NaN 1014.3 190.0 5.75 29.92 88.78 5.0 32.33 10.0 \n", + "2 NaN 1014.5 210.0 6.90 29.92 85.35 6.0 32.76 10.0 \n", + "3 NaN 1014.9 220.0 4.60 29.93 85.35 4.0 34.43 10.0 \n", + "4 NaN 1015.1 230.0 10.35 29.94 88.83 9.0 30.97 7.0 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "67 NaN 1015.7 210.0 12.65 29.96 54.84 11.0 33.84 10.0 \n", + "68 NaN 1016.0 200.0 8.05 29.96 59.13 7.0 32.09 10.0 \n", + "69 NaN 1016.0 240.0 6.90 29.96 59.13 6.0 32.76 10.0 \n", + "70 NaN 1016.0 230.0 3.45 29.96 63.98 3.0 33.26 10.0 \n", + "71 NaN 1016.3 210.0 5.75 29.97 66.57 5.0 29.96 10.0 \n", + "\n", + " gust skyc1 peak_wind_gust snowdepth \n", + "0 NaN FEW NaN NaN \n", + "1 NaN BKN NaN NaN \n", + "2 NaN BKN NaN NaN \n", + "3 NaN FEW NaN NaN \n", + "4 15.0 BKN NaN NaN \n", + ".. ... ... ... ... \n", + "67 17.0 SCT NaN NaN \n", + "68 NaN CLR NaN NaN \n", + "69 NaN CLR NaN NaN \n", + "70 NaN BKN NaN NaN \n", + "71 NaN BKN NaN NaN \n", + "\n", + "[72 rows x 19 columns]\n", + "after\n", + "Scraping completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n", + "/Users/gisaac/anaconda3/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds. \n", + " warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoadJob\n", + "ASOS data written to BigQuery\n", + "Index(['hour_updated', 'p01m', 'valid', 'tmpf', 'dwpf', 'ice_accretion_1hr',\n", + " 'mslp', 'drct', 'sped', 'alti', 'relh', 'sknt', 'feel', 'vsby', 'gust',\n", + " 'skyc1', 'peak_wind_gust', 'snowdepth', 'station_id', 'index',\n", + " 'datetime', 'cfs', 'height', 'sensing_time', 'base_url', 'usgs_id'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "run_scrape(datetime(2024, 1, 1), datetime(2024, 1, 2), idx=6)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5515d4f", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/scraping_functions.py b/scraping_functions.py index 3433601..862d005 100644 --- a/scraping_functions.py +++ b/scraping_functions.py @@ -36,8 +36,15 @@ def __init__(self, start_time: datetime, end_time: datetime, meta_data_path: str # base_url = "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station={}&data=tmpf&data=dwpf&data=p01m&data=mslp&data=drct&data=ice_accretion_1hr&year1={}&month1={}&day1={}&year2={}&month2={}&day2={}&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=1&report_type=2" base_url = "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station={}&data=tmpf&data=dwpf&data=relh&data=feel&data=sknt&data=sped&data=alti&data=mslp&data=drct&data=ice_accretion_1hr&data=p01m&data=vsby&data=gust&data=skyc1&data=peak_wind_gust&data=snowdepth&year1={}&month1={}&day1={}&year2={}&month2={}&day2={}&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4" asos_path = get_asos_data_from_url(self.meta_data["stations"][0]["station_id"], base_url, self.start_time, self.end_time + timedelta(days=2), self.meta_data, self.meta_data) + self.asos_df, self.precip, self.temp = process_asos_csv(asos_path) self.asos_df["station_id"] = self.meta_data["stations"][0]["station_id"] + idx = 1 + while self.asos_df.empty: + print("Initial ASOS data empty, trying again") + self.get_asos_data(idx, base_url) + idx += 1 + print("Scraping completed") self.bq_connect = BiqQueryConnector() res = False @@ -46,7 +53,7 @@ def __init__(self, start_time: datetime, end_time: datetime, meta_data_path: str if res: print("ASOS data written to BigQuery") self.r.set(self.meta_data["stations"][0]["station_id"] + "_" + str(self.start_time) + "_" + str(self.end_time), "True") - + @staticmethod def process_intermediate_csv(df: pd.DataFrame) -> (pd.DataFrame, int, int, int): """ @@ -161,7 +168,12 @@ def combine_snotel_with_df(self): self.snotel_df = get_snotel_data(self.start_time, self.end_time, self.meta_data["snotel"]["triplet"]) self.snotel_df["Date"] = pd.to_datetime(self.snotel_df["Date"], utc=True) self.final_df = self.joined_df.merge(self.snotel_df, left_on="hour_updated", right_on="Date", how="left") - self.final_df["filled_snow"] = self.final_df["Snow Depth (in)"].interpolate(method='nearest').ffill().bfill() + self.final_df["snotel_snow_depth"] = pd.to_numeric(self.final_df["Snow Depth (in)"], errors='coerce') + self.final_df["swe"] = pd.to_numeric(self.final_df["Snow Water Equivalent (in)"], errors='coerce') + self.final_df["change_swe"] = pd.to_numeric(self.final_df["Change In Snow Water Equivalent (in)"], errors='coerce') + self.final_df["change_snow_depth"] = pd.to_numeric(self.final_df["Change In Snow Depth (in)"], errors='coerce') + self.final_df["observed_temp_snotel"] = pd.to_numeric(self.final_df["Observed Air Temperature (degrees farenheit)"], errors='coerce') + self.final_df.drop(columns=['Snow Water Equivalent (in)', 'Change In Snow Water Equivalent (in)', 'Snow Depth (in)', 'Change In Snow Depth (in)', 'Observed Air Temperature (degrees farenheit)'], inplace=True) def combine_sentinel(self, sentinel_df, tile) -> None: """Function to combine the Sentinel data with the joined ASOS, USGS, and SNOTEL data. @@ -174,6 +186,10 @@ def combine_sentinel(self, sentinel_df, tile) -> None: def write_final_df_to_bq(self, table_name: str): self.bq_connect.write_to_bq(self.final_df, table_name) + def get_asos_data(self, station_idx, base_url): + asos_path = get_asos_data_from_url(self.meta_data["stations"][station_idx]["station_id"], base_url, self.start_time, self.end_time + timedelta(days=2), self.meta_data, self.meta_data) + self.asos_df, self.precip, self.temp = process_asos_csv(asos_path) + self.asos_df["station_id"] = self.meta_data["stations"][0]["station_id"] class BiqQueryConnector(object): def __init__(self) -> None: