Skip to content

Commit

Permalink
notebook cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
lfunderburk committed May 3, 2023
1 parent 4c2982d commit 85afa59
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 18,872 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,8 @@ dmypy.json
.DS_Store

*.metadata
*.DS_Store
*.DS_Store
notebooks/data_extraction.ipynb
notebooks/predict_model.ipynb
notebooks/train_model.ipynb
notebooks/clustering.ipynb
147 changes: 10 additions & 137 deletions notebooks/hugging_face_exploration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,94 +2,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5795f461e1f24e9d88eece50c7f07a4e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)okenizer_config.json: 0%| | 0.00/2.35k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e3b88c1d1324400b864290fc06fc4688",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)/main/tokenizer.json: 0%| | 0.00/2.42M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0347733d4069420582bf069c6dee0919",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)cial_tokens_map.json: 0%| | 0.00/2.20k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4d23f37a8bf44bdcb9fe994195fe6602",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)lve/main/config.json: 0%| | 0.00/1.49k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8f0d49d4d590427aa9aa74d225d4f319",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading pytorch_model.bin: 0%| | 0.00/892M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"SELECT name FROM table WHERE age = 25\n"
]
}
],
"outputs": [],
"source": [
"from typing import List\n",
"from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n",
Expand Down Expand Up @@ -117,7 +32,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -127,18 +42,9 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/2t/nqb9hcfs07n91h4v5p34slp00000gn/T/ipykernel_62938/4036504853.py:5: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
" df.columns = df.columns.str.replace('.', '_')\n"
]
}
],
"outputs": [],
"source": [
"predicted_data_path = '/Users/macpro/Documents/GitHub/fuel-electric-hybrid-vehicle-ml/data/predicted-data/vehicle_data_with_clusters.csv'\n",
"\n",
Expand All @@ -155,7 +61,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -165,51 +71,18 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['vehicle_id', 'vehicleclass_', 'make_', 'model.1_', 'model_year',\n",
" 'cylinders_', 'fuelconsumption_city(l/100km)',\n",
" 'fuelconsumption_hwy(l/100km)', 'fuelconsumption_comb(l/100km)',\n",
" 'co2emissions_(g/km)', 'number_of_gears', 'predicted_co2_rating',\n",
" 'enginesize_(l)', 'transmission_', 'fuel_type',\n",
" 'fuelconsumption_comb(mpg)', 'smog_rating', 'transmission_type',\n",
" 'mapped_fuel_type', 'type_of_wheel_drive', 'vehicle_type', 'motor_(kw)',\n",
" 'consumption_combinedle/100km', 'range1_(km)', 'recharge_time(h)',\n",
" 'fuel_type2', 'range2_(km)', 'hybrid_fuels',\n",
" 'consumption_city(kwh/100km)', 'fuelconsumption_hwy(kwh/100km)',\n",
" 'fuelconsumption_comb(kwh/100km)', 'fuelconsumption_city(le/100km)',\n",
" 'fuelconsumption_hwy(le/100km)', 'fuelconsumption_comb(le/100km)',\n",
" 'range_(km)', 'hybrid_in_fuel', 'hybrid_in_electric',\n",
" 'aggregate_levels'],\n",
" dtype='object')"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SELECT hybrid_fuels FROM table WHERE vehicle_type = vehicle_class_ = vehicle_id AND make_ = vehicle_year AND fuelconsumption_comb(l/100km) = fuelconsumption_city(kwh/100km) = fuelconsumption_comb(kwh/100km) = fuel_type = hybrid_car\n"
]
}
],
"outputs": [],
"source": [
"print(inference(question=\"Show me hybrid car models\", table=df.columns))"
]
Expand Down
Loading

0 comments on commit 85afa59

Please sign in to comment.