Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
SanBast authored Sep 21, 2022
0 parents commit 18a09b2
Show file tree
Hide file tree
Showing 2 changed files with 2,035 additions and 0 deletions.
330 changes: 330 additions & 0 deletions 10s_windowing.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,330 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import matplotlib\n",
"import seaborn as sns\n",
"from pylab import rcParams\n",
"import os\n",
"import gzip\n",
"from tqdm import tqdm\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"%config InlineBackend.figure_format='retina'\n",
"\n",
"sns.set(style='whitegrid', palette='muted', font_scale=1.2)\n",
"\n",
"HAPPY_COLORS_PALETTE = [\"#01BEFE\", \"#FFDD00\", \"#FF7D00\", \"#FF006D\", \"#ADFF02\", \"#8F00FF\"]\n",
"\n",
"sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))\n",
"rcParams['figure.figsize'] = 20, 10"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"full_df = pd.read_csv('full_df.csv')\n",
"\n",
"outdoor_df = full_df[full_df['IndoorProb']!=100]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\marci\\AppData\\Local\\Temp\\ipykernel_4064\\544610941.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" outdoor_df['series_id'] = np.arange(len(outdoor_df)) // 10 + 1\n"
]
}
],
"source": [
"outdoor_df['series_id'] = np.arange(len(outdoor_df)) // 10 + 1"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\marci\\AppData\\Local\\Temp\\ipykernel_4064\\3215045333.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" outdoor_df['Timestamp'] = pd.to_datetime(outdoor_df['Timestamp'], unit='s')\n"
]
}
],
"source": [
"outdoor_df['Timestamp'] = pd.to_datetime(outdoor_df['Timestamp'], unit='s')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"19040895"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(outdoor_df)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Patient</th>\n",
" <th>Cohort</th>\n",
" <th>Day</th>\n",
" <th>StepPerSec</th>\n",
" <th>Timestamp</th>\n",
" <th>IndoorProb</th>\n",
" <th>series_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>29017</th>\n",
" <td>29017</td>\n",
" <td>1000</td>\n",
" <td>HA</td>\n",
" <td>Day1</td>\n",
" <td>0.875</td>\n",
" <td>2020-08-13 07:03:37</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29018</th>\n",
" <td>29018</td>\n",
" <td>1000</td>\n",
" <td>HA</td>\n",
" <td>Day1</td>\n",
" <td>0.875</td>\n",
" <td>2020-08-13 07:03:38</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29019</th>\n",
" <td>29019</td>\n",
" <td>1000</td>\n",
" <td>HA</td>\n",
" <td>Day1</td>\n",
" <td>0.875</td>\n",
" <td>2020-08-13 07:03:39</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29020</th>\n",
" <td>29020</td>\n",
" <td>1000</td>\n",
" <td>HA</td>\n",
" <td>Day1</td>\n",
" <td>0.875</td>\n",
" <td>2020-08-13 07:03:40</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29021</th>\n",
" <td>29021</td>\n",
" <td>1000</td>\n",
" <td>HA</td>\n",
" <td>Day1</td>\n",
" <td>0.875</td>\n",
" <td>2020-08-13 07:03:41</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 Patient Cohort Day StepPerSec Timestamp \\\n",
"29017 29017 1000 HA Day1 0.875 2020-08-13 07:03:37 \n",
"29018 29018 1000 HA Day1 0.875 2020-08-13 07:03:38 \n",
"29019 29019 1000 HA Day1 0.875 2020-08-13 07:03:39 \n",
"29020 29020 1000 HA Day1 0.875 2020-08-13 07:03:40 \n",
"29021 29021 1000 HA Day1 0.875 2020-08-13 07:03:41 \n",
"\n",
" IndoorProb series_id \n",
"29017 0 1 \n",
"29018 0 1 \n",
"29019 0 1 \n",
"29020 0 1 \n",
"29021 0 1 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outdoor_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1843185/1843185 [14:16<00:00, 2151.82it/s]\n"
]
}
],
"source": [
"rows = []\n",
"for _,group in tqdm(outdoor_df[outdoor_df['StepPerSec'] < 1].groupby(['series_id', 'Patient']), position=0, leave=True):\n",
" #if group.StepPerSec.count()==10:\n",
" rows.append([\n",
" group['Patient'].unique()[0], \n",
" group['Cohort'].unique()[0], \n",
" group['StepPerSec'].count(),\n",
" group['Timestamp'].iloc[0],\n",
" group['Timestamp'].iloc[-1],\n",
" group['StepPerSec'].mean(),\n",
" group['StepPerSec'].std()\n",
" ])\n",
" #print(row)\n",
"missing_df = pd.DataFrame(np.array(rows),columns=['Subject', 'Cohort', 'Duration', 'start_timestamp', 'end_timestamp', 'mean', 'std'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"missing_df.to_csv('walking_missing.csv')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'openpyxl'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn [10], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mExcelWriter\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfile_for_missing_steps.xlsx\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m writer:\n\u001b[0;32m 2\u001b[0m missing_df\u001b[38;5;241m.\u001b[39mto_excel(writer, sheet_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m10s_steps\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"File \u001b[1;32mc:\\Users\\marci\\miniconda3\\lib\\site-packages\\pandas\\io\\excel\\_openpyxl.py:49\u001b[0m, in \u001b[0;36mOpenpyxlWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001b[0m\n\u001b[0;32m 36\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[0;32m 37\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 38\u001b[0m path,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 47\u001b[0m ):\n\u001b[0;32m 48\u001b[0m \u001b[39m# Use the openpyxl module as the Excel writer.\u001b[39;00m\n\u001b[1;32m---> 49\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mopenpyxl\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mworkbook\u001b[39;00m \u001b[39mimport\u001b[39;00m Workbook\n\u001b[0;32m 51\u001b[0m engine_kwargs \u001b[39m=\u001b[39m combine_kwargs(engine_kwargs, kwargs)\n\u001b[0;32m 53\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\n\u001b[0;32m 54\u001b[0m path,\n\u001b[0;32m 55\u001b[0m mode\u001b[39m=\u001b[39mmode,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 58\u001b[0m engine_kwargs\u001b[39m=\u001b[39mengine_kwargs,\n\u001b[0;32m 59\u001b[0m )\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'openpyxl'"
]
}
],
"source": [
"with pd.ExcelWriter('file_for_missing_steps.xlsx') as writer:\n",
" missing_df.to_excel(writer, sheet_name='10s_steps')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.12 ('base')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "9324f6f91069ef608944cf59327718832b88647e83e66beddcee769fe0e7a057"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 18a09b2

Please sign in to comment.