{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "81d12e20",
   "metadata": {},
   "source": [
    "# Assignment 5"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7e98fab",
   "metadata": {},
   "source": [
    "## Martha Brinkmann - 6504930105"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "44c694df",
   "metadata": {},
   "source": [
    "### 1.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a0028196",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Food</th>\n",
       "      <th>Beer</th>\n",
       "      <th>Smoke</th>\n",
       "      <th>Games</th>\n",
       "      <th>Books</th>\n",
       "      <th>Hshld</th>\n",
       "      <th>Clths</th>\n",
       "      <th>Hlth</th>\n",
       "      <th>Chems</th>\n",
       "      <th>Txtls</th>\n",
       "      <th>...</th>\n",
       "      <th>Telcm</th>\n",
       "      <th>Servs</th>\n",
       "      <th>BusEq</th>\n",
       "      <th>Paper</th>\n",
       "      <th>Trans</th>\n",
       "      <th>Whlsl</th>\n",
       "      <th>Rtail</th>\n",
       "      <th>Meals</th>\n",
       "      <th>Fin</th>\n",
       "      <th>Other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>192607</th>\n",
       "      <td>0.56</td>\n",
       "      <td>-5.19</td>\n",
       "      <td>1.29</td>\n",
       "      <td>2.93</td>\n",
       "      <td>10.97</td>\n",
       "      <td>-0.48</td>\n",
       "      <td>8.08</td>\n",
       "      <td>1.77</td>\n",
       "      <td>8.14</td>\n",
       "      <td>0.39</td>\n",
       "      <td>...</td>\n",
       "      <td>0.83</td>\n",
       "      <td>9.22</td>\n",
       "      <td>2.06</td>\n",
       "      <td>7.70</td>\n",
       "      <td>1.91</td>\n",
       "      <td>-23.79</td>\n",
       "      <td>0.07</td>\n",
       "      <td>1.87</td>\n",
       "      <td>-0.02</td>\n",
       "      <td>5.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192608</th>\n",
       "      <td>2.59</td>\n",
       "      <td>27.03</td>\n",
       "      <td>6.50</td>\n",
       "      <td>0.55</td>\n",
       "      <td>10.01</td>\n",
       "      <td>-3.58</td>\n",
       "      <td>-2.51</td>\n",
       "      <td>4.25</td>\n",
       "      <td>5.50</td>\n",
       "      <td>7.97</td>\n",
       "      <td>...</td>\n",
       "      <td>2.17</td>\n",
       "      <td>2.02</td>\n",
       "      <td>4.39</td>\n",
       "      <td>-2.38</td>\n",
       "      <td>4.85</td>\n",
       "      <td>5.39</td>\n",
       "      <td>-0.75</td>\n",
       "      <td>-0.13</td>\n",
       "      <td>4.47</td>\n",
       "      <td>6.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192609</th>\n",
       "      <td>1.16</td>\n",
       "      <td>4.02</td>\n",
       "      <td>1.26</td>\n",
       "      <td>6.58</td>\n",
       "      <td>-0.99</td>\n",
       "      <td>0.73</td>\n",
       "      <td>-0.51</td>\n",
       "      <td>0.69</td>\n",
       "      <td>5.33</td>\n",
       "      <td>2.30</td>\n",
       "      <td>...</td>\n",
       "      <td>2.41</td>\n",
       "      <td>2.25</td>\n",
       "      <td>0.19</td>\n",
       "      <td>-5.54</td>\n",
       "      <td>0.07</td>\n",
       "      <td>-7.87</td>\n",
       "      <td>0.25</td>\n",
       "      <td>-0.56</td>\n",
       "      <td>-1.61</td>\n",
       "      <td>-3.86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192610</th>\n",
       "      <td>-3.06</td>\n",
       "      <td>-3.31</td>\n",
       "      <td>1.06</td>\n",
       "      <td>-4.76</td>\n",
       "      <td>9.47</td>\n",
       "      <td>-4.68</td>\n",
       "      <td>0.12</td>\n",
       "      <td>-0.57</td>\n",
       "      <td>-4.76</td>\n",
       "      <td>1.00</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.11</td>\n",
       "      <td>-2.00</td>\n",
       "      <td>-1.09</td>\n",
       "      <td>-5.08</td>\n",
       "      <td>-2.61</td>\n",
       "      <td>-15.38</td>\n",
       "      <td>-2.20</td>\n",
       "      <td>-4.11</td>\n",
       "      <td>-5.51</td>\n",
       "      <td>-8.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192611</th>\n",
       "      <td>6.35</td>\n",
       "      <td>7.29</td>\n",
       "      <td>4.55</td>\n",
       "      <td>1.66</td>\n",
       "      <td>-5.80</td>\n",
       "      <td>-0.54</td>\n",
       "      <td>1.87</td>\n",
       "      <td>5.42</td>\n",
       "      <td>5.20</td>\n",
       "      <td>3.10</td>\n",
       "      <td>...</td>\n",
       "      <td>1.63</td>\n",
       "      <td>3.77</td>\n",
       "      <td>3.64</td>\n",
       "      <td>3.84</td>\n",
       "      <td>1.61</td>\n",
       "      <td>4.67</td>\n",
       "      <td>6.52</td>\n",
       "      <td>4.33</td>\n",
       "      <td>2.34</td>\n",
       "      <td>4.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202204</th>\n",
       "      <td>3.01</td>\n",
       "      <td>3.03</td>\n",
       "      <td>6.37</td>\n",
       "      <td>-25.22</td>\n",
       "      <td>-10.76</td>\n",
       "      <td>2.04</td>\n",
       "      <td>-7.00</td>\n",
       "      <td>-6.80</td>\n",
       "      <td>-2.28</td>\n",
       "      <td>6.63</td>\n",
       "      <td>...</td>\n",
       "      <td>-10.70</td>\n",
       "      <td>-12.59</td>\n",
       "      <td>-12.26</td>\n",
       "      <td>-0.74</td>\n",
       "      <td>-10.93</td>\n",
       "      <td>-2.14</td>\n",
       "      <td>-11.41</td>\n",
       "      <td>-5.47</td>\n",
       "      <td>-7.99</td>\n",
       "      <td>-7.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202205</th>\n",
       "      <td>-1.68</td>\n",
       "      <td>-1.60</td>\n",
       "      <td>2.67</td>\n",
       "      <td>-2.93</td>\n",
       "      <td>-7.40</td>\n",
       "      <td>-5.12</td>\n",
       "      <td>-6.45</td>\n",
       "      <td>0.99</td>\n",
       "      <td>4.52</td>\n",
       "      <td>2.38</td>\n",
       "      <td>...</td>\n",
       "      <td>8.54</td>\n",
       "      <td>-3.35</td>\n",
       "      <td>-0.75</td>\n",
       "      <td>-0.66</td>\n",
       "      <td>-4.59</td>\n",
       "      <td>1.03</td>\n",
       "      <td>-5.64</td>\n",
       "      <td>-3.29</td>\n",
       "      <td>2.80</td>\n",
       "      <td>-1.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202206</th>\n",
       "      <td>-1.64</td>\n",
       "      <td>-0.02</td>\n",
       "      <td>-11.63</td>\n",
       "      <td>-11.33</td>\n",
       "      <td>-12.53</td>\n",
       "      <td>-2.56</td>\n",
       "      <td>-12.00</td>\n",
       "      <td>-2.05</td>\n",
       "      <td>-15.65</td>\n",
       "      <td>-11.17</td>\n",
       "      <td>...</td>\n",
       "      <td>-6.72</td>\n",
       "      <td>-6.79</td>\n",
       "      <td>-10.19</td>\n",
       "      <td>-8.51</td>\n",
       "      <td>-7.14</td>\n",
       "      <td>-6.43</td>\n",
       "      <td>-8.50</td>\n",
       "      <td>-9.02</td>\n",
       "      <td>-9.05</td>\n",
       "      <td>-11.78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202207</th>\n",
       "      <td>3.67</td>\n",
       "      <td>5.49</td>\n",
       "      <td>0.56</td>\n",
       "      <td>14.62</td>\n",
       "      <td>12.10</td>\n",
       "      <td>0.76</td>\n",
       "      <td>11.86</td>\n",
       "      <td>2.75</td>\n",
       "      <td>7.66</td>\n",
       "      <td>6.86</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.40</td>\n",
       "      <td>8.60</td>\n",
       "      <td>15.68</td>\n",
       "      <td>7.22</td>\n",
       "      <td>9.33</td>\n",
       "      <td>9.08</td>\n",
       "      <td>16.33</td>\n",
       "      <td>11.89</td>\n",
       "      <td>7.38</td>\n",
       "      <td>9.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202208</th>\n",
       "      <td>-1.61</td>\n",
       "      <td>-1.87</td>\n",
       "      <td>-0.12</td>\n",
       "      <td>-2.95</td>\n",
       "      <td>-4.97</td>\n",
       "      <td>-2.16</td>\n",
       "      <td>-6.01</td>\n",
       "      <td>-5.07</td>\n",
       "      <td>-1.39</td>\n",
       "      <td>-12.20</td>\n",
       "      <td>...</td>\n",
       "      <td>-3.00</td>\n",
       "      <td>-4.72</td>\n",
       "      <td>-5.89</td>\n",
       "      <td>-7.66</td>\n",
       "      <td>-1.46</td>\n",
       "      <td>-1.60</td>\n",
       "      <td>-3.46</td>\n",
       "      <td>-1.47</td>\n",
       "      <td>-2.24</td>\n",
       "      <td>-3.65</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1154 rows × 30 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        Food   Beer   Smoke  Games  Books  Hshld  Clths  Hlth   Chems  Txtls  \\\n",
       "192607   0.56  -5.19   1.29   2.93  10.97  -0.48   8.08   1.77   8.14   0.39   \n",
       "192608   2.59  27.03   6.50   0.55  10.01  -3.58  -2.51   4.25   5.50   7.97   \n",
       "192609   1.16   4.02   1.26   6.58  -0.99   0.73  -0.51   0.69   5.33   2.30   \n",
       "192610  -3.06  -3.31   1.06  -4.76   9.47  -4.68   0.12  -0.57  -4.76   1.00   \n",
       "192611   6.35   7.29   4.55   1.66  -5.80  -0.54   1.87   5.42   5.20   3.10   \n",
       "...       ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   \n",
       "202204   3.01   3.03   6.37 -25.22 -10.76   2.04  -7.00  -6.80  -2.28   6.63   \n",
       "202205  -1.68  -1.60   2.67  -2.93  -7.40  -5.12  -6.45   0.99   4.52   2.38   \n",
       "202206  -1.64  -0.02 -11.63 -11.33 -12.53  -2.56 -12.00  -2.05 -15.65 -11.17   \n",
       "202207   3.67   5.49   0.56  14.62  12.10   0.76  11.86   2.75   7.66   6.86   \n",
       "202208  -1.61  -1.87  -0.12  -2.95  -4.97  -2.16  -6.01  -5.07  -1.39 -12.20   \n",
       "\n",
       "        ...  Telcm  Servs  BusEq  Paper  Trans  Whlsl  Rtail  Meals  Fin    \\\n",
       "192607  ...   0.83   9.22   2.06   7.70   1.91 -23.79   0.07   1.87  -0.02   \n",
       "192608  ...   2.17   2.02   4.39  -2.38   4.85   5.39  -0.75  -0.13   4.47   \n",
       "192609  ...   2.41   2.25   0.19  -5.54   0.07  -7.87   0.25  -0.56  -1.61   \n",
       "192610  ...  -0.11  -2.00  -1.09  -5.08  -2.61 -15.38  -2.20  -4.11  -5.51   \n",
       "192611  ...   1.63   3.77   3.64   3.84   1.61   4.67   6.52   4.33   2.34   \n",
       "...     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   \n",
       "202204  ... -10.70 -12.59 -12.26  -0.74 -10.93  -2.14 -11.41  -5.47  -7.99   \n",
       "202205  ...   8.54  -3.35  -0.75  -0.66  -4.59   1.03  -5.64  -3.29   2.80   \n",
       "202206  ...  -6.72  -6.79 -10.19  -8.51  -7.14  -6.43  -8.50  -9.02  -9.05   \n",
       "202207  ...  -0.40   8.60  15.68   7.22   9.33   9.08  16.33  11.89   7.38   \n",
       "202208  ...  -3.00  -4.72  -5.89  -7.66  -1.46  -1.60  -3.46  -1.47  -2.24   \n",
       "\n",
       "        Other  \n",
       "192607   5.20  \n",
       "192608   6.76  \n",
       "192609  -3.86  \n",
       "192610  -8.49  \n",
       "192611   4.00  \n",
       "...       ...  \n",
       "202204  -7.65  \n",
       "202205  -1.19  \n",
       "202206 -11.78  \n",
       "202207   9.19  \n",
       "202208  -3.65  \n",
       "\n",
       "[1154 rows x 30 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "%matplotlib inline\n",
    "# activate plot theme\n",
    "import qeds\n",
    "\n",
    "qeds.themes.mpl_style();\n",
    "plotly_template = qeds.themes.plotly_template()\n",
    "colors = qeds.themes.COLOR_CYCLE\n",
    "\n",
    "\n",
    "from sklearn import (linear_model, metrics, model_selection)\n",
    "\n",
    "df = pd.read_csv('30_Industry_Portfolios.csv', index_col=0)\n",
    "df.astype(float)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7a24a76b",
   "metadata": {},
   "outputs": [],
   "source": [
    "beta = {}\n",
    "alpha = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4d7e9d4",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "f9e38d5c",
   "metadata": {},
   "source": [
    "1.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a0dd9e8",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "f32bbcfc",
   "metadata": {},
   "source": [
    "1.3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "394b2f6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import KFold\n",
    "kf = KFold(n_splits=10,shuffle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81a04d4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "err_train = 0\n",
    "err_test =0\n",
    "for train,test in kf.split(X):\n",
    "    lr=linear_model.LinearRegression()\n",
    "    reg=lr.fit(X[train],y[train])\n",
    "    y_pred_train =reg.predict(X[train])\n",
    "    y_pred_test =reg.predict(X[test])\n",
    "    e_train= y[train]-y_pred_train\n",
    "    e_test = y[test]-y_pred_test\n",
    "    err_train += np.sqrt(np.mean(e_train*e_train))     \n",
    "    err_test += np.sqrt(np.mean(e_test*e_test))  \n",
    "rmse_train_10cv_model1 = err_train/10              #average the rmse\n",
    "rmse_test_10cv_model1 = err_test/10              #average the rmse\n",
    "print('RMSE on 10-fold CV on the train data: {}'.format(rmse_train_10cv_model1))\n",
    "print('RMSE on 10-fold CV on the test data: {}'.format(rmse_test_10cv_model1))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
