CAPM¶

6304640094¶

Aus Atsavakovith¶

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import urllib.request
import zipfile

from pandas_datareader.famafrench import get_available_datasets
import pandas_datareader.data as web
import datetime as dt


%matplotlib inline
import qeds

qeds.themes.mpl_style();
plotly_template = qeds.themes.plotly_template()
colors = qeds.themes.COLOR_CYCLE

from sklearn import (linear_model, metrics, model_selection)

Fama-French Factors (monthly)

In [ ]:
import urllib.request
import zipfile
ff_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"

urllib.request.urlretrieve(ff_url,'fama_french.zip')
zip_file = zipfile.ZipFile('fama_french.zip', 'r')

zip_file.extractall()
zip_file.close()

ff_factors = pd.read_csv('F-F_Research_Data_Factors.csv', skiprows = 3, nrows = 1114, index_col = 0)
ff_factors.index = pd.to_datetime(ff_factors.index, format= '%Y%m')
ff_factors.index = ff_factors.index + pd.offsets.MonthEnd()
ff_factors = ff_factors.apply(lambda x: x/ 100)
ff_factors.head()

CONCAT DATA

In [2]:
get_available_datasets()
Out[2]:
['F-F_Research_Data_Factors',
 'F-F_Research_Data_Factors_weekly',
 'F-F_Research_Data_Factors_daily',
 'F-F_Research_Data_5_Factors_2x3',
 'F-F_Research_Data_5_Factors_2x3_daily',
 'Portfolios_Formed_on_ME',
 'Portfolios_Formed_on_ME_Wout_Div',
 'Portfolios_Formed_on_ME_Daily',
 'Portfolios_Formed_on_BE-ME',
 'Portfolios_Formed_on_BE-ME_Wout_Div',
 'Portfolios_Formed_on_BE-ME_Daily',
 'Portfolios_Formed_on_OP',
 'Portfolios_Formed_on_OP_Wout_Div',
 'Portfolios_Formed_on_OP_Daily',
 'Portfolios_Formed_on_INV',
 'Portfolios_Formed_on_INV_Wout_Div',
 'Portfolios_Formed_on_INV_Daily',
 '6_Portfolios_2x3',
 '6_Portfolios_2x3_Wout_Div',
 '6_Portfolios_2x3_weekly',
 '6_Portfolios_2x3_daily',
 '25_Portfolios_5x5',
 '25_Portfolios_5x5_Wout_Div',
 '25_Portfolios_5x5_Daily',
 '100_Portfolios_10x10',
 '100_Portfolios_10x10_Wout_Div',
 '100_Portfolios_10x10_Daily',
 '6_Portfolios_ME_OP_2x3',
 '6_Portfolios_ME_OP_2x3_Wout_Div',
 '6_Portfolios_ME_OP_2x3_daily',
 '25_Portfolios_ME_OP_5x5',
 '25_Portfolios_ME_OP_5x5_Wout_Div',
 '25_Portfolios_ME_OP_5x5_daily',
 '100_Portfolios_ME_OP_10x10',
 '100_Portfolios_10x10_ME_OP_Wout_Div',
 '100_Portfolios_ME_OP_10x10_daily',
 '6_Portfolios_ME_INV_2x3',
 '6_Portfolios_ME_INV_2x3_Wout_Div',
 '6_Portfolios_ME_INV_2x3_daily',
 '25_Portfolios_ME_INV_5x5',
 '25_Portfolios_ME_INV_5x5_Wout_Div',
 '25_Portfolios_ME_INV_5x5_daily',
 '100_Portfolios_ME_INV_10x10',
 '100_Portfolios_10x10_ME_INV_Wout_Div',
 '100_Portfolios_ME_INV_10x10_daily',
 '25_Portfolios_BEME_OP_5x5',
 '25_Portfolios_BEME_OP_5x5_Wout_Div',
 '25_Portfolios_BEME_OP_5x5_daily',
 '25_Portfolios_BEME_INV_5x5',
 '25_Portfolios_BEME_INV_5x5_Wout_Div',
 '25_Portfolios_BEME_INV_5x5_daily',
 '25_Portfolios_OP_INV_5x5',
 '25_Portfolios_OP_INV_5x5_Wout_Div',
 '25_Portfolios_OP_INV_5x5_daily',
 '32_Portfolios_ME_BEME_OP_2x4x4',
 '32_Portfolios_ME_BEME_OP_2x4x4_Wout_Div',
 '32_Portfolios_ME_BEME_INV_2x4x4',
 '32_Portfolios_ME_BEME_INV_2x4x4_Wout_Div',
 '32_Portfolios_ME_OP_INV_2x4x4',
 '32_Portfolios_ME_OP_INV_2x4x4_Wout_Div',
 'Portfolios_Formed_on_E-P',
 'Portfolios_Formed_on_E-P_Wout_Div',
 'Portfolios_Formed_on_CF-P',
 'Portfolios_Formed_on_CF-P_Wout_Div',
 'Portfolios_Formed_on_D-P',
 'Portfolios_Formed_on_D-P_Wout_Div',
 '6_Portfolios_ME_EP_2x3',
 '6_Portfolios_ME_EP_2x3_Wout_Div',
 '6_Portfolios_ME_CFP_2x3',
 '6_Portfolios_ME_CFP_2x3_Wout_Div',
 '6_Portfolios_ME_DP_2x3',
 '6_Portfolios_ME_DP_2x3_Wout_Div',
 'F-F_Momentum_Factor',
 'F-F_Momentum_Factor_daily',
 '6_Portfolios_ME_Prior_12_2',
 '6_Portfolios_ME_Prior_12_2_Daily',
 '25_Portfolios_ME_Prior_12_2',
 '25_Portfolios_ME_Prior_12_2_Daily',
 '10_Portfolios_Prior_12_2',
 '10_Portfolios_Prior_12_2_Daily',
 'F-F_ST_Reversal_Factor',
 'F-F_ST_Reversal_Factor_daily',
 '6_Portfolios_ME_Prior_1_0',
 '6_Portfolios_ME_Prior_1_0_Daily',
 '25_Portfolios_ME_Prior_1_0',
 '25_Portfolios_ME_Prior_1_0_Daily',
 '10_Portfolios_Prior_1_0',
 '10_Portfolios_Prior_1_0_Daily',
 'F-F_LT_Reversal_Factor',
 'F-F_LT_Reversal_Factor_daily',
 '6_Portfolios_ME_Prior_60_13',
 '6_Portfolios_ME_Prior_60_13_Daily',
 '25_Portfolios_ME_Prior_60_13',
 '25_Portfolios_ME_Prior_60_13_Daily',
 '10_Portfolios_Prior_60_13',
 '10_Portfolios_Prior_60_13_Daily',
 'Portfolios_Formed_on_AC',
 '25_Portfolios_ME_AC_5x5',
 'Portfolios_Formed_on_BETA',
 '25_Portfolios_ME_BETA_5x5',
 'Portfolios_Formed_on_NI',
 '25_Portfolios_ME_NI_5x5',
 'Portfolios_Formed_on_VAR',
 '25_Portfolios_ME_VAR_5x5',
 'Portfolios_Formed_on_RESVAR',
 '25_Portfolios_ME_RESVAR_5x5',
 '5_Industry_Portfolios',
 '5_Industry_Portfolios_Wout_Div',
 '5_Industry_Portfolios_daily',
 '10_Industry_Portfolios',
 '10_Industry_Portfolios_Wout_Div',
 '10_Industry_Portfolios_daily',
 '12_Industry_Portfolios',
 '12_Industry_Portfolios_Wout_Div',
 '12_Industry_Portfolios_daily',
 '17_Industry_Portfolios',
 '17_Industry_Portfolios_Wout_Div',
 '17_Industry_Portfolios_daily',
 '30_Industry_Portfolios',
 '30_Industry_Portfolios_Wout_Div',
 '30_Industry_Portfolios_daily',
 '38_Industry_Portfolios',
 '38_Industry_Portfolios_Wout_Div',
 '38_Industry_Portfolios_daily',
 '48_Industry_Portfolios',
 '48_Industry_Portfolios_Wout_Div',
 '48_Industry_Portfolios_daily',
 '49_Industry_Portfolios',
 '49_Industry_Portfolios_Wout_Div',
 '49_Industry_Portfolios_daily',
 'ME_Breakpoints',
 'BE-ME_Breakpoints',
 'OP_Breakpoints',
 'INV_Breakpoints',
 'E-P_Breakpoints',
 'CF-P_Breakpoints',
 'D-P_Breakpoints',
 'Prior_2-12_Breakpoints',
 'Developed_3_Factors',
 'Developed_3_Factors_Daily',
 'Developed_ex_US_3_Factors',
 'Developed_ex_US_3_Factors_Daily',
 'Europe_3_Factors',
 'Europe_3_Factors_Daily',
 'Japan_3_Factors',
 'Japan_3_Factors_Daily',
 'Asia_Pacific_ex_Japan_3_Factors',
 'Asia_Pacific_ex_Japan_3_Factors_Daily',
 'North_America_3_Factors',
 'North_America_3_Factors_Daily',
 'Developed_5_Factors',
 'Developed_5_Factors_Daily',
 'Developed_ex_US_5_Factors',
 'Developed_ex_US_5_Factors_Daily',
 'Europe_5_Factors',
 'Europe_5_Factors_Daily',
 'Japan_5_Factors',
 'Japan_5_Factors_Daily',
 'Asia_Pacific_ex_Japan_5_Factors',
 'Asia_Pacific_ex_Japan_5_Factors_Daily',
 'North_America_5_Factors',
 'North_America_5_Factors_Daily',
 'Developed_Mom_Factor',
 'Developed_Mom_Factor_Daily',
 'Developed_ex_US_Mom_Factor',
 'Developed_ex_US_Mom_Factor_Daily',
 'Europe_Mom_Factor',
 'Europe_Mom_Factor_Daily',
 'Japan_Mom_Factor',
 'Japan_Mom_Factor_Daily',
 'Asia_Pacific_ex_Japan_MOM_Factor',
 'Asia_Pacific_ex_Japan_MOM_Factor_Daily',
 'North_America_Mom_Factor',
 'North_America_Mom_Factor_Daily',
 'Developed_6_Portfolios_ME_BE-ME',
 'Developed_6_Portfolios_ME_BE-ME_daily',
 'Developed_ex_US_6_Portfolios_ME_BE-ME',
 'Developed_ex_US_6_Portfolios_ME_BE-ME_daily',
 'Europe_6_Portfolios_ME_BE-ME',
 'Europe_6_Portfolios_ME_BE-ME_daily',
 'Japan_6_Portfolios_ME_BE-ME',
 'Japan_6_Portfolios_ME_BE-ME_daily',
 'Asia_Pacific_ex_Japan_6_Portfolios_ME_BE-ME',
 'Asia_Pacific_ex_Japan_6_Portfolios_ME_BE-ME_daily',
 'North_America_6_Portfolios_ME_BE-ME',
 'North_America_6_Portfolios_ME_BE-ME_daily',
 'Developed_25_Portfolios_ME_BE-ME',
 'Developed_25_Portfolios_ME_BE-ME_daily',
 'Developed_ex_US_25_Portfolios_ME_BE-ME',
 'Developed_ex_US_25_Portfolios_ME_BE-ME_daily',
 'Europe_25_Portfolios_ME_BE-ME',
 'Europe_25_Portfolios_ME_BE-ME_daily',
 'Japan_25_Portfolios_ME_BE-ME',
 'Japan_25_Portfolios_ME_BE-ME_daily',
 'Asia_Pacific_ex_Japan_25_Portfolios_ME_BE-ME',
 'Asia_Pacific_ex_Japan_25_Portfolios_ME_BE-ME_daily',
 'North_America_25_Portfolios_ME_BE-ME',
 'North_America_25_Portfolios_ME_BE-ME_daily',
 'Developed_6_Portfolios_ME_OP',
 'Developed_6_Portfolios_ME_OP_Daily',
 'Developed_ex_US_6_Portfolios_ME_OP',
 'Developed_ex_US_6_Portfolios_ME_OP_Daily',
 'Europe_6_Portfolios_ME_OP',
 'Europe_6_Portfolios_ME_OP_Daily',
 'Japan_6_Portfolios_ME_OP',
 'Japan_6_Portfolios_ME_OP_Daily',
 'Asia_Pacific_ex_Japan_6_Portfolios_ME_OP',
 'Asia_Pacific_ex_Japan_6_Portfolios_ME_OP_Daily',
 'North_America_6_Portfolios_ME_OP',
 'North_America_6_Portfolios_ME_OP_Daily',
 'Developed_25_Portfolios_ME_OP',
 'Developed_25_Portfolios_ME_OP_Daily',
 'Developed_ex_US_25_Portfolios_ME_OP',
 'Developed_ex_US_25_Portfolios_ME_OP_Daily',
 'Europe_25_Portfolios_ME_OP',
 'Europe_25_Portfolios_ME_OP_Daily',
 'Japan_25_Portfolios_ME_OP',
 'Japan_25_Portfolios_ME_OP_Daily',
 'Asia_Pacific_ex_Japan_25_Portfolios_ME_OP',
 'Asia_Pacific_ex_Japan_25_Portfolios_ME_OP_Daily',
 'North_America_25_Portfolios_ME_OP',
 'North_America_25_Portfolios_ME_OP_Daily',
 'Developed_6_Portfolios_ME_INV',
 'Developed_6_Portfolios_ME_INV_Daily',
 'Developed_ex_US_6_Portfolios_ME_INV',
 'Developed_ex_US_6_Portfolios_ME_INV_Daily',
 'Europe_6_Portfolios_ME_INV',
 'Europe_6_Portfolios_ME_INV_Daily',
 'Japan_6_Portfolios_ME_INV',
 'Japan_6_Portfolios_ME_INV_Daily',
 'Asia_Pacific_ex_Japan_6_Portfolios_ME_INV',
 'Asia_Pacific_ex_Japan_6_Portfolios_ME_INV_Daily',
 'North_America_6_Portfolios_ME_INV',
 'North_America_6_Portfolios_ME_INV_Daily',
 'Developed_25_Portfolios_ME_INV',
 'Developed_25_Portfolios_ME_INV_Daily',
 'Developed_ex_US_25_Portfolios_ME_INV',
 'Developed_ex_US_25_Portfolios_ME_INV_Daily',
 'Europe_25_Portfolios_ME_INV',
 'Europe_25_Portfolios_ME_INV_Daily',
 'Japan_25_Portfolios_ME_INV',
 'Japan_25_Portfolios_ME_INV_Daily',
 'Asia_Pacific_ex_Japan_25_Portfolios_ME_INV',
 'Asia_Pacific_ex_Japan_25_Portfolios_ME_INV_Daily',
 'North_America_25_Portfolios_ME_INV',
 'North_America_25_Portfolios_ME_INV_Daily',
 'Developed_6_Portfolios_ME_Prior_12_2',
 'Developed_6_Portfolios_ME_Prior_250_20_daily',
 'Developed_ex_US_6_Portfolios_ME_Prior_12_2',
 'Developed_ex_US_6_Portfolios_ME_Prior_250_20_daily',
 'Europe_6_Portfolios_ME_Prior_12_2',
 'Europe_6_Portfolios_ME_Prior_250_20_daily',
 'Japan_6_Portfolios_ME_Prior_12_2',
 'Japan_6_Portfolios_ME_Prior_250_20_daily',
 'Asia_Pacific_ex_Japan_6_Portfolios_ME_Prior_12_2',
 'Asia_Pacific_ex_Japan_6_Portfolios_ME_Prior_250_20_daily',
 'North_America_6_Portfolios_ME_Prior_12_2',
 'North_America_6_Portfolios_ME_Prior_250_20_daily',
 'Developed_25_Portfolios_ME_Prior_12_2',
 'Developed_25_Portfolios_ME_Prior_250_20_daily',
 'Developed_ex_US_25_Portfolios_ME_Prior_12_2',
 'Developed_ex_US_25_Portfolios_ME_Prior_250_20_daily',
 'Europe_25_Portfolios_ME_Prior_12_2',
 'Europe_25_Portfolios_ME_Prior_250_20_daily',
 'Japan_25_Portfolios_ME_Prior_12_2',
 'Japan_25_Portfolios_ME_Prior_250_20_daily',
 'Asia_Pacific_ex_Japan_25_Portfolios_ME_Prior_12_2',
 'Asia_Pacific_ex_Japan_25_Portfolios_ME_Prior_250_20_daily',
 'North_America_25_Portfolios_ME_Prior_12_2',
 'North_America_25_Portfolios_ME_Prior_250_20_daily',
 'Developed_32_Portfolios_ME_BE-ME_OP_2x4x4',
 'Developed_ex_US_32_Portfolios_ME_BE-ME_OP_2x4x4',
 'Europe_32_Portfolios_ME_BE-ME_OP_2x4x4',
 'Japan_32_Portfolios_ME_BE-ME_OP_2x4x4',
 'Asia_Pacific_ex_Japan_32_Portfolios_ME_BE-ME_OP_2x4x4',
 'North_America_32_Portfolios_ME_BE-ME_OP_2x4x4',
 'Developed_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',
 'Developed_ex_US_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',
 'Europe_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',
 'Japan_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',
 'Asia_Pacific_ex_Japan_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',
 'North_America_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',
 'Developed_32_Portfolios_ME_INV(TA)_OP_2x4x4',
 'Developed_ex_US_32_Portfolios_ME_INV(TA)_OP_2x4x4',
 'Europe_32_Portfolios_ME_INV(TA)_OP_2x4x4',
 'Japan_32_Portfolios_ME_INV(TA)_OP_2x4x4',
 'Asia_Pacific_ex_Japan_32_Portfolios_ME_INV(TA)_OP_2x4x4',
 'North_America_32_Portfolios_ME_INV(TA)_OP_2x4x4',
 'Emerging_5_Factors',
 'Emerging_MOM_Factor',
 'Emerging_Markets_6_Portfolios_ME_BE-ME',
 'Emerging_Markets_6_Portfolios_ME_OP',
 'Emerging_Markets_6_Portfolios_ME_INV',
 'Emerging_Markets_6_Portfolios_ME_Prior_12_2',
 'Emerging_Markets_4_Portfolios_BE-ME_OP',
 'Emerging_Markets_4_Portfolios_OP_INV',
 'Emerging_Markets_4_Portfolios_BE-ME_INV']

SELECT

F-F_Research_Data_Factors

30_Industry_Portfolios

In [3]:
start_date = dt.datetime(1926, 7, 1)
end_date = dt.datetime(2022, 8, 1)
In [4]:
ff_factors = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start = start_date, end = end_date)
print(ff_factors['DESCR'])
F-F Research Data Factors
-------------------------

This file was created by CMPT_ME_BEME_RETS using the 202208 CRSP database. The 1-month TBill return is from Ibbotson and Associates, Inc. Copyright 2022 Kenneth R. French

  0 : (1154 rows x 4 cols)
  1 : Annual Factors: January-December (95 rows x 4 cols)
In [5]:
ff_monthly = ff_factors[0]
ff_monthly
Out[5]:
Mkt-RF SMB HML RF
Date
1926-07 2.96 -2.56 -2.43 0.22
1926-08 2.64 -1.17 3.82 0.25
1926-09 0.36 -1.40 0.13 0.23
1926-10 -3.24 -0.09 0.70 0.32
1926-11 2.53 -0.10 -0.51 0.31
... ... ... ... ...
2022-04 -9.46 -1.41 6.19 0.01
2022-05 -0.34 -1.85 8.41 0.03
2022-06 -8.43 2.09 -5.97 0.06
2022-07 9.57 2.81 -4.10 0.08
2022-08 -3.78 1.39 0.31 0.19

1154 rows × 4 columns

In [6]:
industry_port = web.DataReader('30_Industry_Portfolios', 'famafrench', start = start_date, end = end_date)
print(industry_port['DESCR'])
30 Industry Portfolios
----------------------

This file was created by CMPT_IND_RETS using the 202208 CRSP database. It contains value- and equal-weighted returns for 30 industry portfolios. The portfolios are constructed at the end of June. The annual returns are from January to December. Missing data are indicated by -99.99 or -999. Copyright 2022 Kenneth R. French

  0 : Average Value Weighted Returns -- Monthly (1154 rows x 30 cols)
  1 : Average Equal Weighted Returns -- Monthly (1154 rows x 30 cols)
  2 : Average Value Weighted Returns -- Annual (95 rows x 30 cols)
  3 : Average Equal Weighted Returns -- Annual (95 rows x 30 cols)
  4 : Number of Firms in Portfolios (1154 rows x 30 cols)
  5 : Average Firm Size (1154 rows x 30 cols)
  6 : Sum of BE / Sum of ME (97 rows x 30 cols)
  7 : Value-Weighted Average of BE/ME (97 rows x 30 cols)
In [7]:
ind_monthly = industry_port[0]
ind_monthly
Out[7]:
Food Beer Smoke Games Books Hshld Clths Hlth Chems Txtls ... Telcm Servs BusEq Paper Trans Whlsl Rtail Meals Fin Other
Date
1926-07 0.56 -5.19 1.29 2.93 10.97 -0.48 8.08 1.77 8.14 0.39 ... 0.83 9.22 2.06 7.70 1.91 -23.79 0.07 1.87 -0.02 5.20
1926-08 2.59 27.03 6.50 0.55 10.01 -3.58 -2.51 4.25 5.50 7.97 ... 2.17 2.02 4.39 -2.38 4.85 5.39 -0.75 -0.13 4.47 6.76
1926-09 1.16 4.02 1.26 6.58 -0.99 0.73 -0.51 0.69 5.33 2.30 ... 2.41 2.25 0.19 -5.54 0.07 -7.87 0.25 -0.56 -1.61 -3.86
1926-10 -3.06 -3.31 1.06 -4.76 9.47 -4.68 0.12 -0.57 -4.76 1.00 ... -0.11 -2.00 -1.09 -5.08 -2.61 -15.38 -2.20 -4.11 -5.51 -8.49
1926-11 6.35 7.29 4.55 1.66 -5.80 -0.54 1.87 5.42 5.20 3.10 ... 1.63 3.77 3.64 3.84 1.61 4.67 6.52 4.33 2.34 4.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2022-04 3.01 3.03 6.37 -25.22 -10.76 2.04 -7.00 -6.80 -2.28 6.63 ... -10.70 -12.59 -12.26 -0.74 -10.93 -2.14 -11.41 -5.47 -7.99 -7.65
2022-05 -1.68 -1.60 2.67 -2.93 -7.40 -5.12 -6.45 0.99 4.52 2.38 ... 8.54 -3.35 -0.75 -0.66 -4.59 1.03 -5.64 -3.29 2.80 -1.19
2022-06 -1.64 -0.02 -11.63 -11.33 -12.53 -2.56 -12.00 -2.05 -15.65 -11.17 ... -6.72 -6.79 -10.19 -8.51 -7.14 -6.43 -8.50 -9.02 -9.05 -11.78
2022-07 3.67 5.49 0.56 14.62 12.10 0.76 11.86 2.75 7.66 6.86 ... -0.40 8.60 15.68 7.22 9.33 9.08 16.33 11.89 7.38 9.19
2022-08 -1.61 -1.87 -0.12 -2.95 -4.97 -2.16 -6.01 -5.07 -1.39 -12.20 ... -3.00 -4.72 -5.89 -7.66 -1.46 -1.60 -3.46 -1.47 -2.24 -3.65

1154 rows × 30 columns

In [9]:
df = pd.concat([ff_monthly, ind_monthly], axis = 1, join = 'inner')
df
Out[9]:
Mkt-RF SMB HML RF Food Beer Smoke Games Books Hshld ... Telcm Servs BusEq Paper Trans Whlsl Rtail Meals Fin Other
Date
1926-07 2.96 -2.56 -2.43 0.22 0.56 -5.19 1.29 2.93 10.97 -0.48 ... 0.83 9.22 2.06 7.70 1.91 -23.79 0.07 1.87 -0.02 5.20
1926-08 2.64 -1.17 3.82 0.25 2.59 27.03 6.50 0.55 10.01 -3.58 ... 2.17 2.02 4.39 -2.38 4.85 5.39 -0.75 -0.13 4.47 6.76
1926-09 0.36 -1.40 0.13 0.23 1.16 4.02 1.26 6.58 -0.99 0.73 ... 2.41 2.25 0.19 -5.54 0.07 -7.87 0.25 -0.56 -1.61 -3.86
1926-10 -3.24 -0.09 0.70 0.32 -3.06 -3.31 1.06 -4.76 9.47 -4.68 ... -0.11 -2.00 -1.09 -5.08 -2.61 -15.38 -2.20 -4.11 -5.51 -8.49
1926-11 2.53 -0.10 -0.51 0.31 6.35 7.29 4.55 1.66 -5.80 -0.54 ... 1.63 3.77 3.64 3.84 1.61 4.67 6.52 4.33 2.34 4.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2022-04 -9.46 -1.41 6.19 0.01 3.01 3.03 6.37 -25.22 -10.76 2.04 ... -10.70 -12.59 -12.26 -0.74 -10.93 -2.14 -11.41 -5.47 -7.99 -7.65
2022-05 -0.34 -1.85 8.41 0.03 -1.68 -1.60 2.67 -2.93 -7.40 -5.12 ... 8.54 -3.35 -0.75 -0.66 -4.59 1.03 -5.64 -3.29 2.80 -1.19
2022-06 -8.43 2.09 -5.97 0.06 -1.64 -0.02 -11.63 -11.33 -12.53 -2.56 ... -6.72 -6.79 -10.19 -8.51 -7.14 -6.43 -8.50 -9.02 -9.05 -11.78
2022-07 9.57 2.81 -4.10 0.08 3.67 5.49 0.56 14.62 12.10 0.76 ... -0.40 8.60 15.68 7.22 9.33 9.08 16.33 11.89 7.38 9.19
2022-08 -3.78 1.39 0.31 0.19 -1.61 -1.87 -0.12 -2.95 -4.97 -2.16 ... -3.00 -4.72 -5.89 -7.66 -1.46 -1.60 -3.46 -1.47 -2.24 -3.65

1154 rows × 34 columns

1.1) Run the following Regression¶

Create blank dataframe

In [11]:
ind_name = ind_monthly.columns
In [13]:
beta = pd.DataFrame(data = np.nan, index = ind_name, columns = ['BETAs'])
beta
Out[13]:
BETAs
Food NaN
Beer NaN
Smoke NaN
Games NaN
Books NaN
Hshld NaN
Clths NaN
Hlth NaN
Chems NaN
Txtls NaN
Cnstr NaN
Steel NaN
FabPr NaN
ElcEq NaN
Autos NaN
Carry NaN
Mines NaN
Coal NaN
Oil NaN
Util NaN
Telcm NaN
Servs NaN
BusEq NaN
Paper NaN
Trans NaN
Whlsl NaN
Rtail NaN
Meals NaN
Fin NaN
Other NaN

y = Ri - Rf

x = E(Rm) - Rf

In [16]:
from sklearn import linear_model

for i in range(len(ind_name)):
    model = linear_model.LinearRegression()
    model = model.fit(df[['Mkt-RF']], pd.DataFrame(df[ind_name[i]]-df['RF'])) 
    beta.iloc[i,:][0] = model.coef_
beta
Out[16]:
BETAs
Food 0.729095
Beer 0.924026
Smoke 0.623424
Games 1.385464
Books 1.110920
Hshld 0.884306
Clths 0.831563
Hlth 0.833813
Chems 1.043679
Txtls 1.145560
Cnstr 1.181354
Steel 1.361442
FabPr 1.239619
ElcEq 1.288733
Autos 1.286918
Carry 1.188716
Mines 0.913507
Coal 1.278337
Oil 0.889536
Util 0.763865
Telcm 0.664651
Servs 0.823919
BusEq 1.080205
Paper 0.948937
Trans 1.138238
Whlsl 1.086695
Rtail 0.967677
Meals 0.946614
Fin 1.158976
Other 1.055502

1.2) Plot 30 beta's against premium¶

In [36]:
exp_ri = pd.DataFrame(data = np.nan, index = ind_name, columns = ['Average Excess Return'])
for i in range(len(ind_name)):
    exp_ri.iloc[i] = np.sum(df[ind_name[i]]-df['RF'])/df.shape[0]

exp_ri
Out[36]:
Average Excess Return
Food 0.699220
Beer 0.929593
Smoke 0.866482
Games 0.835659
Books 0.631750
Hshld 0.655269
Clths 0.664896
Hlth 0.807920
Chems 0.777340
Txtls 0.675537
Cnstr 0.694168
Steel 0.677374
FabPr 0.800849
ElcEq 0.896854
Autos 0.923614
Carry 0.850295
Mines 0.640659
Coal 0.828094
Oil 0.773960
Util 0.618354
Telcm 0.566352
Servs 0.953033
BusEq 0.903787
Paper 0.721256
Trans 0.655191
Whlsl 0.575069
Rtail 0.777964
Meals 0.800208
Fin 0.742305
Other 0.522088
In [37]:
plt.scatter(beta, exp_ri)
plt.xlabel('Beta')
plt.ylabel('Excess Return')
plt.title('Beta and Excess Return')
plt.ylim(0.1,2)
plt.xlim(0.1,2)
plt.show()

plt.savefig('Beta and Excess Return.png')
<Figure size 432x288 with 0 Axes>
In [82]:
ax = sns.regplot(x = beta, y = exp_ri) 
ax.set_title("Beta and Excess Return", fontsize=10)
ax.set_xlabel("Beta", fontsize = 10)
ax.set_ylabel("Excess Return", fontsize = 10)

plt.savefig("REG - Beta and Excess Return.png")

CONCLUSION

we cannot observe SML right away because they are lots of error terms from the linear line.

1.3) Cross-validation for CAPM ( 10-fold, RMSE, MAE )¶

create blank dataframe

In [71]:
metric_CAPM = pd.DataFrame(data = np.nan, index = ind_name, columns = ['RMSE','MAE'])
metric_CAPM
Out[71]:
RMSE MAE
Food NaN NaN
Beer NaN NaN
Smoke NaN NaN
Games NaN NaN
Books NaN NaN
Hshld NaN NaN
Clths NaN NaN
Hlth NaN NaN
Chems NaN NaN
Txtls NaN NaN
Cnstr NaN NaN
Steel NaN NaN
FabPr NaN NaN
ElcEq NaN NaN
Autos NaN NaN
Carry NaN NaN
Mines NaN NaN
Coal NaN NaN
Oil NaN NaN
Util NaN NaN
Telcm NaN NaN
Servs NaN NaN
BusEq NaN NaN
Paper NaN NaN
Trans NaN NaN
Whlsl NaN NaN
Rtail NaN NaN
Meals NaN NaN
Fin NaN NaN
Other NaN NaN
In [72]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=10,shuffle=False)

for i in range(len(ind_name)):
    err_rmse_test = 0
    err_mae_test = 0
    mkt = df['Mkt-RF'].to_numpy()
    ri_rf = (df[ind_name[i]]-df['RF']).to_numpy()
    for train,test in kf.split(mkt):
        lr=linear_model.LinearRegression()
        reg=lr.fit(mkt[train].reshape(-1, 1),ri_rf[train])
        ri_rf_pred_test =reg.predict(mkt[test].reshape(-1, 1))
        e_test = ri_rf[test]-ri_rf_pred_test
        err_rmse_test += np.sqrt(np.mean(e_test*e_test))
        err_mae_test += np.mean(np.abs(e_test))
    rmse_10cv_test = err_rmse_test/10
    mae_10cv_test = err_mae_test/10
    metric_CAPM.iloc[i][0] = rmse_10cv_test
    metric_CAPM.iloc[i][1] = mae_10cv_test
metric_CAPM
Out[72]:
RMSE MAE
Food 2.538109 1.898086
Beer 4.636514 3.384528
Smoke 4.526541 3.521301
Games 4.874197 3.623334
Books 3.861875 2.956903
Hshld 3.213406 2.409892
Clths 4.235036 3.093844
Hlth 3.229728 2.437323
Chems 2.905301 2.200431
Txtls 4.653665 3.403551
Cnstr 2.829698 2.153414
Steel 4.417177 3.314934
FabPr 2.817130 2.186627
ElcEq 3.274492 2.547459
Autos 4.572182 3.280924
Carry 4.224256 3.154163
Mines 5.255009 4.125605
Coal 8.114324 5.838560
Oil 4.139266 3.138144
Util 3.519019 2.705937
Telcm 2.901631 2.226987
Servs 5.632045 3.950464
BusEq 3.421839 2.596730
Paper 2.839679 2.148962
Trans 3.515633 2.602099
Whlsl 3.747144 2.795816
Rtail 2.954227 2.257817
Meals 3.997552 3.018501
Fin 2.691408 2.016012
Other 3.483393 2.651443
In [73]:
mean_rmse_CAPM = np.mean(metric['RMSE'])
mean_mae_CAPM = np.mean(metric['MAE'])
print(f"RMSE: {mean_rmse_CAPM}")
print(f"MAE: {mean_mae_CAPM}")
RMSE: 3.900715824121209
MAE: 2.9213263884838936

1.4) Fama-Factors¶

Blank dataframe

In [42]:
beta_ff = pd.DataFrame(data = np.nan, index = ind_name, columns = ['MKT', 'SMB', 'HML'])
beta_ff
Out[42]:
MKT SMB HML
Food NaN NaN NaN
Beer NaN NaN NaN
Smoke NaN NaN NaN
Games NaN NaN NaN
Books NaN NaN NaN
Hshld NaN NaN NaN
Clths NaN NaN NaN
Hlth NaN NaN NaN
Chems NaN NaN NaN
Txtls NaN NaN NaN
Cnstr NaN NaN NaN
Steel NaN NaN NaN
FabPr NaN NaN NaN
ElcEq NaN NaN NaN
Autos NaN NaN NaN
Carry NaN NaN NaN
Mines NaN NaN NaN
Coal NaN NaN NaN
Oil NaN NaN NaN
Util NaN NaN NaN
Telcm NaN NaN NaN
Servs NaN NaN NaN
BusEq NaN NaN NaN
Paper NaN NaN NaN
Trans NaN NaN NaN
Whlsl NaN NaN NaN
Rtail NaN NaN NaN
Meals NaN NaN NaN
Fin NaN NaN NaN
Other NaN NaN NaN
In [45]:
#the same as industry but 3 columns
from sklearn import linear_model

for i in range(len(ind_name)):
    model = linear_model.LinearRegression()
    model = model.fit(df[['Mkt-RF','SMB','HML']], pd.DataFrame(df[ind_name[i]]-df['RF']))
   
    beta_ff.iloc[i] = model.coef_
beta_ff
Out[45]:
MKT SMB HML
Food 0.747453 -0.140384 0.052259
Beer 0.866732 0.196442 0.133388
Smoke 0.649355 -0.216873 0.096605
Games 1.287535 0.408923 0.138250
Books 1.012498 0.372491 0.186158
Hshld 0.906274 -0.088170 -0.035378
Clths 0.761506 0.422109 -0.060045
Hlth 0.878894 -0.085659 -0.189481
Chems 1.068119 -0.151514 0.026175
Txtls 0.982731 0.563268 0.372973
Cnstr 1.118341 0.247742 0.107824
Steel 1.259659 0.232049 0.380399
FabPr 1.173377 0.261550 0.111985
ElcEq 1.295485 -0.035460 -0.000616
Autos 1.247708 0.055086 0.188627
Carry 1.097555 0.218124 0.328079
Mines 0.842647 0.258731 0.145621
Coal 1.084163 0.485378 0.673339
Oil 0.878785 -0.179207 0.290080
Util 0.755451 -0.166903 0.259715
Telcm 0.694991 -0.131654 -0.036739
Servs 0.832129 0.368626 -0.505840
BusEq 1.121167 0.153322 -0.455735
Paper 0.952825 -0.057684 0.045355
Trans 1.038001 0.163473 0.454423
Whlsl 0.972969 0.550152 0.068217
Rtail 0.979982 0.041639 -0.131479
Meals 0.896939 0.289515 -0.030568
Fin 1.121536 -0.057539 0.315218
Other 0.999547 0.283097 0.018335

1.5) Plot of those 3 betas¶

In [62]:
import matplotlib.pyplot as plt

plt.scatter(beta_ff['MKT'], exp_ri)
plt.xlabel('MKT_FF')
plt.ylabel('Excess Return')
plt.title('MKT_FF and Excess Return')
plt.ylim(0.1,2)
plt.xlim(0.1,2)
plt.show()

plt.savefig('MKT_FF and Excess Return.png')
<Figure size 432x288 with 0 Axes>
In [68]:
ax = sns.regplot(x = beta_ff[['MKT']], y = exp_ri) 
ax.set_title("MKT_FF and Excess Return", fontsize=10)
ax.set_xlabel("MKT_FF", fontsize = 10)
ax.set_ylabel("Excess Return", fontsize = 10)

plt.savefig("REG - MKT_FF and Excess Return.png")
In [49]:
import matplotlib.pyplot as plt

plt.scatter(beta_ff['SMB'], exp_ri)
plt.xlabel('SMB_FF')
plt.ylabel('Excess Return')
plt.title('SMB_FF and Excess Return')
plt.ylim(0.1,2)
plt.xlim(0.1,2)
plt.show()

plt.savefig('SMB_FF and Excess Return.png')
<Figure size 432x288 with 0 Axes>
In [69]:
ax = sns.regplot(x = beta_ff[['SMB']], y = exp_ri) 
ax.set_title("SMB_FF and Excess Return", fontsize=10)
ax.set_xlabel("SMB_FF", fontsize = 10)
ax.set_ylabel("Excess Return", fontsize = 10)

plt.savefig("REG - SMB_FF and Excess Return.png")
In [58]:
import matplotlib.pyplot as plt

plt.scatter(beta_ff['HML'], exp_ri)
plt.xlabel('HML_FF')
plt.ylabel('Excess Return')
plt.title('HML_FF and Excess Return')
plt.ylim(0.1,2)
plt.xlim(-1,1)
plt.show()

plt.savefig('HML_FF and Excess Return.png')
<Figure size 432x288 with 0 Axes>
In [70]:
ax = sns.regplot(x = beta_ff[['HML']], y = exp_ri) 
ax.set_title("HML_FF and Excess Return", fontsize=10)
ax.set_xlabel("HML_FF", fontsize = 10)
ax.set_ylabel("Excess Return", fontsize = 10)

plt.savefig("REG - HML_FF and Excess Return.png")

CONCLUSION

Since I plot a regplot that obviously show the linear line, but as can be seen, only MKT_FF show the positive relationship with the excess return while another 2 roughly show the negative relationship but there are many outliers.

Easily compared, when we use the CAPM to analyze the relationship, SML reveals that the linear is more obvious rather than CAPM. Therefore the Three factors model is more suitable than CAPM.

1.6) Cross-validation for Three factors model ( 10-fold, RMSE, MAE )¶

In [74]:
metric_ff = pd.DataFrame(data = np.nan, index = ind_name, columns = ['RMSE','MAE'])
metric_ff
Out[74]:
RMSE MAE
Food NaN NaN
Beer NaN NaN
Smoke NaN NaN
Games NaN NaN
Books NaN NaN
Hshld NaN NaN
Clths NaN NaN
Hlth NaN NaN
Chems NaN NaN
Txtls NaN NaN
Cnstr NaN NaN
Steel NaN NaN
FabPr NaN NaN
ElcEq NaN NaN
Autos NaN NaN
Carry NaN NaN
Mines NaN NaN
Coal NaN NaN
Oil NaN NaN
Util NaN NaN
Telcm NaN NaN
Servs NaN NaN
BusEq NaN NaN
Paper NaN NaN
Trans NaN NaN
Whlsl NaN NaN
Rtail NaN NaN
Meals NaN NaN
Fin NaN NaN
Other NaN NaN
In [80]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=10,shuffle=False)

for i in range(len(ind_name)):
    err_rmse_test = 0
    err_mae_test = 0
    FFs = df[['Mkt-RF','SMB','HML']].to_numpy()
    ri_rf = (df[ind_name[i]]-df['RF']).to_numpy()
    for train,test in kf.split(mkt):
        lr=linear_model.LinearRegression()
        reg=lr.fit(FFs[train],ri_rf[train])
        ri_rf_pred_test =reg.predict(FFs[test])
        e_test = ri_rf[test]-ri_rf_pred_test
        err_rmse_test += np.sqrt(np.mean(e_test*e_test))
        err_mae_test += np.mean(np.abs(e_test))
    rmse_10cv_test = err_rmse_test/10
    mae_10cv_test = err_mae_test/10
    metric_ff.iloc[i][0] = rmse_10cv_test
    metric_ff.iloc[i][1] = mae_10cv_test
metric_ff
Out[80]:
RMSE MAE
Food 2.583992 1.950460
Beer 4.727382 3.441337
Smoke 4.534637 3.511960
Games 4.822235 3.520520
Books 3.725951 2.855786
Hshld 3.235331 2.434792
Clths 4.197140 2.975686
Hlth 3.197743 2.386017
Chems 2.943988 2.243567
Txtls 4.255104 3.106636
Cnstr 2.807713 2.112981
Steel 4.192168 3.179183
FabPr 2.690331 2.090893
ElcEq 3.334103 2.592332
Autos 4.564839 3.317047
Carry 4.102379 3.079451
Mines 5.209853 4.070646
Coal 7.683888 5.632237
Oil 4.087016 3.117979
Util 3.389365 2.590805
Telcm 2.908926 2.232419
Servs 5.143993 3.536210
BusEq 3.147124 2.411187
Paper 2.924536 2.204255
Trans 3.201444 2.472775
Whlsl 3.581311 2.605731
Rtail 2.952258 2.250535
Meals 4.032527 3.011346
Fin 2.514929 1.901121
Other 3.480490 2.631941
In [81]:
mean_rmse_ff = np.mean(metric['RMSE'])
mean_mae_ff = np.mean(metric['MAE'])
print(f"RMSE: {mean_rmse_ff}")
print(f"MAE: {mean_mae_ff}")
RMSE: 3.900715824121209
MAE: 2.9213263884838936