notebook cleanup

lfunderburk · May 3, 2023 · 85afa59 · 85afa59
1 parent 4c2982d
commit 85afa59
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 18,872 deletions.
diff --git a/.gitignore b/.gitignore
@@ -130,4 +130,8 @@ dmypy.json
 .DS_Store
 
 *.metadata
-*.DS_Store
+*.DS_Store
+notebooks/data_extraction.ipynb
+notebooks/predict_model.ipynb
+notebooks/train_model.ipynb
+notebooks/clustering.ipynb
diff --git a/notebooks/hugging_face_exploration.ipynb b/notebooks/hugging_face_exploration.ipynb
@@ -2,94 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5795f461e1f24e9d88eece50c7f07a4e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading (…)okenizer_config.json:   0%|          | 0.00/2.35k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e3b88c1d1324400b864290fc06fc4688",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0347733d4069420582bf069c6dee0919",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4d23f37a8bf44bdcb9fe994195fe6602",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading (…)lve/main/config.json:   0%|          | 0.00/1.49k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8f0d49d4d590427aa9aa74d225d4f319",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "SELECT name FROM table WHERE age = 25\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from typing import List\n",
     "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n",
@@ -117,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -127,18 +42,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/2t/nqb9hcfs07n91h4v5p34slp00000gn/T/ipykernel_62938/4036504853.py:5: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
-      "  df.columns = df.columns.str.replace('.', '_')\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "predicted_data_path = '/Users/macpro/Documents/GitHub/fuel-electric-hybrid-vehicle-ml/data/predicted-data/vehicle_data_with_clusters.csv'\n",
     "\n",
@@ -155,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -165,51 +71,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['vehicle_id', 'vehicleclass_', 'make_', 'model.1_', 'model_year',\n",
-       "       'cylinders_', 'fuelconsumption_city(l/100km)',\n",
-       "       'fuelconsumption_hwy(l/100km)', 'fuelconsumption_comb(l/100km)',\n",
-       "       'co2emissions_(g/km)', 'number_of_gears', 'predicted_co2_rating',\n",
-       "       'enginesize_(l)', 'transmission_', 'fuel_type',\n",
-       "       'fuelconsumption_comb(mpg)', 'smog_rating', 'transmission_type',\n",
-       "       'mapped_fuel_type', 'type_of_wheel_drive', 'vehicle_type', 'motor_(kw)',\n",
-       "       'consumption_combinedle/100km', 'range1_(km)', 'recharge_time(h)',\n",
-       "       'fuel_type2', 'range2_(km)', 'hybrid_fuels',\n",
-       "       'consumption_city(kwh/100km)', 'fuelconsumption_hwy(kwh/100km)',\n",
-       "       'fuelconsumption_comb(kwh/100km)', 'fuelconsumption_city(le/100km)',\n",
-       "       'fuelconsumption_hwy(le/100km)', 'fuelconsumption_comb(le/100km)',\n",
-       "       'range_(km)', 'hybrid_in_fuel', 'hybrid_in_electric',\n",
-       "       'aggregate_levels'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "df.columns"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "SELECT hybrid_fuels FROM table WHERE vehicle_type = vehicle_class_ = vehicle_id AND make_ = vehicle_year AND fuelconsumption_comb(l/100km) = fuelconsumption_city(kwh/100km) = fuelconsumption_comb(kwh/100km) = fuel_type = hybrid_car\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(inference(question=\"Show me hybrid car models\", table=df.columns))"
    ]