Done ch-08

qpsy · Jul 31, 2024 · 4ae2ae2 · 4ae2ae2
1 parent 1575899
commit 4ae2ae2
Show file tree

Hide file tree

Showing 8 changed files with 2,029 additions and 5,542 deletions.
diff --git a/04_geocentric_models-2.ipynb b/04_geocentric_models-2.ipynb
@@ -42,28 +42,6 @@
     "from collections import namedtuple"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
-    "import rethinking"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from rethinking.data import RethinkingDataset\n",
-    "from rethinking.data import dataframe_to_tensors\n",
-    "# from rethinking.mcmc import sample_posterior"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 7,
@@ -325,7 +303,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "d = RethinkingDataset.Howell1.get_dataset()\n",
+    "d = pd.read_csv(\"./data/Howell1.csv\", sep=\";\")\n",
     "d.head()"
    ]
   },
@@ -808,7 +786,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "d = RethinkingDataset.Howell1.get_dataset()\n",
+    "d = pd.read_csv(\"./data/Howell1.csv\", sep=\";\")\n",
     "d2 = d[d.age > 18]"
    ]
   },
@@ -1370,7 +1348,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "d = RethinkingDataset.Howell1.get_dataset()\n",
+    "d = pd.read_csv(\"./data/Howell1.csv\", sep=\";\")\n",
     "d2 = d[d.age > 18]"
    ]
   },
@@ -1821,7 +1799,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "d = RethinkingDataset.Howell1.get_dataset()\n",
+    "d = pd.read_csv(\"./data/Howell1.csv\", sep=\";\")\n",
     "d2 = d[d.age > 18]\n",
     "x_bar = d2.weight.mean()"
    ]
@@ -4256,7 +4234,7 @@
     }
    ],
    "source": [
-    "d = RethinkingDataset.Howell1.get_dataset()\n",
+    "d = pd.read_csv(\"./data/Howell1.csv\", sep=\";\")\n",
     "d.describe()"
    ]
   },

diff --git a/05_the_many_variables_and_the_spurious_waffles-2.ipynb b/05_the_many_variables_and_the_spurious_waffles-2.ipynb
@@ -24,7 +24,6 @@
     "import arviz as az\n",
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
-    "import daft\n",
     "from collections import namedtuple\n",
     "# from causalgraphicalmodels import CausalGraphicalModel\n",
     "\n",
@@ -36,14 +35,6 @@
     "tfd = tfp.distributions\n",
     "tfb = tfp.bijectors\n",
     "\n",
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
-    "import rethinking\n",
-    "\n",
-    "from rethinking.data import RethinkingDataset\n",
-    "from rethinking.data import dataframe_to_tensors\n",
-    "# from rethinking.mcmc import sample_posterior\n",
-    "\n",
     "plt.rcParams['figure.figsize'] = [6, 4]\n",
     "\n",
     "def a_key():\n",
@@ -75,8 +66,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "d = RethinkingDataset.WaffleDivorce.get_dataset()\n",
-    "\n",
+    "d = pd.read_csv(\"./data/WaffleDivorce.csv\", sep=\";\")\n",
     "# standardize variables\n",
     "d[\"A\"] = d.MedianAgeMarriage.pipe(lambda x: (x - x.mean()) / x.std())\n",
     "d[\"D\"] = d.Divorce.pipe(lambda x: (x - x.mean()) / x.std())"
@@ -2063,7 +2053,7 @@
     }
    ],
    "source": [
-    "d = RethinkingDataset.Milk.get_dataset()\n",
+    "d = pd.read_csv(\"./data/milk.csv\", sep=\";\")\n",
     "d.head()"
    ]
   },
@@ -3601,7 +3591,7 @@
     }
    ],
    "source": [
-    "d = RethinkingDataset.Howell1.get_dataset()\n",
+    "d = pd.read_csv(\"./data/Howell1.csv\", sep=\";\")\n",
     "d.head()"
    ]
   },
@@ -4136,7 +4126,7 @@
     }
    ],
    "source": [
-    "d = RethinkingDataset.Milk.get_dataset()\n",
+    "d = pd.read_csv(\"./data/milk.csv\", sep=\";\")\n",
     "d.clade.unique()"
    ]
   },

diff --git a/06_the_haunted_dag_and_the_causal_terror-2.ipynb b/06_the_haunted_dag_and_the_causal_terror-2.ipynb
@@ -24,20 +24,16 @@
     "import arviz as az\n",
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
-    "import daft\n",
     "from collections import namedtuple\n",
     "from statsmodels.regression import linear_model\n",
     "\n",
-    "import random\n",
     "import jax\n",
     "import jax.numpy as jnp\n",
     "\n",
     "from tensorflow_probability.substrates import jax as tfp\n",
     "tfd = tfp.distributions\n",
     "tfb = tfp.bijectors\n",
     "\n",
-    "from rethinking.data import RethinkingDataset\n",
-    "\n",
     "plt.rcParams['figure.figsize'] = [6, 4]"
    ]
   },
@@ -1481,8 +1477,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "d = RethinkingDataset.Milk.get_dataset()\n",
-    "\n",
+    "d = pd.read_csv(\"./data/milk.csv\", sep=\";\")\n",
     "d[\"K\"] = d[\"kcal.per.g\"].pipe(lambda x: (x - x.mean()) / x.std())\n",
     "d[\"F\"] = d[\"perc.fat\"].pipe(lambda x: (x - x.mean()) / x.std())\n",
     "d[\"L\"] = d[\"perc.lactose\"].pipe(lambda x: (x - x.mean()) / x.std())"
@@ -2250,8 +2245,7 @@
     }
    ],
    "source": [
-    "d = RethinkingDataset.Milk.get_dataset()\n",
-    "\n",
+    "d = pd.read_csv(\"./data/milk.csv\", sep=\";\")\n",
     "\n",
     "def simcoll(r=0.9):\n",
     "\n",

diff --git a/07_ulysses_compass-2.ipynb b/07_ulysses_compass-2.ipynb
@@ -20,7 +20,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from io import StringIO\n",
     "import numpy as np\n",
     "import arviz as az\n",
     "import pandas as pd\n",
@@ -38,8 +37,6 @@
     "tfd = tfp.distributions\n",
     "tfb = tfp.bijectors\n",
     "\n",
-    "from rethinking.data import RethinkingDataset\n",
-    "\n",
     "plt.rcParams['figure.figsize'] = [6, 4]"
    ]
   },
@@ -2488,7 +2485,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 120,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -2512,7 +2509,7 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>Unnamed: 0</th>\n",
+       "      <th>rownames</th>\n",
        "      <th>speed</th>\n",
        "      <th>dist</th>\n",
        "    </tr>\n",
@@ -2553,99 +2550,39 @@
        "</div>"
       ],
       "text/plain": [
-       "   Unnamed: 0  speed  dist\n",
-       "0           1      4     2\n",
-       "1           2      4    10\n",
-       "2           3      7     4\n",
-       "3           4      7    22\n",
-       "4           5      8    16"
+       "   rownames  speed  dist\n",
+       "0         1      4     2\n",
+       "1         2      4    10\n",
+       "2         3      7     4\n",
+       "3         4      7    22\n",
+       "4         5      8    16"
       ]
      },
-     "execution_count": 120,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# There is no CSV file for cars dataset in author's repo\n",
-    "# hence I have inlined the data. In R this dataset much be bundled in\n",
-    "# and this is why his code snippets do not need the csv file\n",
-    "\n",
-    "cars_data = \"\"\"\n",
-    "\"\",\"speed\",\"dist\"\n",
-    "\"1\",4,2\n",
-    "\"2\",4,10\n",
-    "\"3\",7,4\n",
-    "\"4\",7,22\n",
-    "\"5\",8,16\n",
-    "\"6\",9,10\n",
-    "\"7\",10,18\n",
-    "\"8\",10,26\n",
-    "\"9\",10,34\n",
-    "\"10\",11,17\n",
-    "\"11\",11,28\n",
-    "\"12\",12,14\n",
-    "\"13\",12,20\n",
-    "\"14\",12,24\n",
-    "\"15\",12,28\n",
-    "\"16\",13,26\n",
-    "\"17\",13,34\n",
-    "\"18\",13,34\n",
-    "\"19\",13,46\n",
-    "\"20\",14,26\n",
-    "\"21\",14,36\n",
-    "\"22\",14,60\n",
-    "\"23\",14,80\n",
-    "\"24\",15,20\n",
-    "\"25\",15,26\n",
-    "\"26\",15,54\n",
-    "\"27\",16,32\n",
-    "\"28\",16,40\n",
-    "\"29\",17,32\n",
-    "\"30\",17,40\n",
-    "\"31\",17,50\n",
-    "\"32\",18,42\n",
-    "\"33\",18,56\n",
-    "\"34\",18,76\n",
-    "\"35\",18,84\n",
-    "\"36\",19,36\n",
-    "\"37\",19,46\n",
-    "\"38\",19,68\n",
-    "\"39\",20,32\n",
-    "\"40\",20,48\n",
-    "\"41\",20,52\n",
-    "\"42\",20,56\n",
-    "\"43\",20,64\n",
-    "\"44\",22,66\n",
-    "\"45\",23,54\n",
-    "\"46\",24,70\n",
-    "\"47\",24,92\n",
-    "\"48\",24,93\n",
-    "\"49\",24,120\n",
-    "\"50\",25,85\n",
-    "\"\"\"\n",
-    "\n",
-    "buffer = StringIO(cars_data)\n",
-    "d = pd.read_csv(buffer, sep=\",\")\n",
-    "\n",
+    "d = pd.read_csv(\"./data/cars.csv\")\n",
     "d.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "Unnamed: 0    float32\n",
-       "speed         float32\n",
-       "dist          float32\n",
+       "rownames    float32\n",
+       "speed       float32\n",
+       "dist        float32\n",
        "dtype: object"
       ]
      },
-     "execution_count": 121,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }