import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import urllib.request
import zipfile
from pandas_datareader.famafrench import get_available_datasets
import pandas_datareader.data as web
import datetime as dt
%matplotlib inline
import qeds
qeds.themes.mpl_style();
plotly_template = qeds.themes.plotly_template()
colors = qeds.themes.COLOR_CYCLE
from sklearn import (linear_model, metrics, model_selection)
Fama-French Factors (monthly)
import urllib.request
import zipfile
ff_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
urllib.request.urlretrieve(ff_url,'fama_french.zip')
zip_file = zipfile.ZipFile('fama_french.zip', 'r')
zip_file.extractall()
zip_file.close()
ff_factors = pd.read_csv('F-F_Research_Data_Factors.csv', skiprows = 3, nrows = 1114, index_col = 0)
ff_factors.index = pd.to_datetime(ff_factors.index, format= '%Y%m')
ff_factors.index = ff_factors.index + pd.offsets.MonthEnd()
ff_factors = ff_factors.apply(lambda x: x/ 100)
ff_factors.head()
CONCAT DATA
get_available_datasets()
['F-F_Research_Data_Factors', 'F-F_Research_Data_Factors_weekly', 'F-F_Research_Data_Factors_daily', 'F-F_Research_Data_5_Factors_2x3', 'F-F_Research_Data_5_Factors_2x3_daily', 'Portfolios_Formed_on_ME', 'Portfolios_Formed_on_ME_Wout_Div', 'Portfolios_Formed_on_ME_Daily', 'Portfolios_Formed_on_BE-ME', 'Portfolios_Formed_on_BE-ME_Wout_Div', 'Portfolios_Formed_on_BE-ME_Daily', 'Portfolios_Formed_on_OP', 'Portfolios_Formed_on_OP_Wout_Div', 'Portfolios_Formed_on_OP_Daily', 'Portfolios_Formed_on_INV', 'Portfolios_Formed_on_INV_Wout_Div', 'Portfolios_Formed_on_INV_Daily', '6_Portfolios_2x3', '6_Portfolios_2x3_Wout_Div', '6_Portfolios_2x3_weekly', '6_Portfolios_2x3_daily', '25_Portfolios_5x5', '25_Portfolios_5x5_Wout_Div', '25_Portfolios_5x5_Daily', '100_Portfolios_10x10', '100_Portfolios_10x10_Wout_Div', '100_Portfolios_10x10_Daily', '6_Portfolios_ME_OP_2x3', '6_Portfolios_ME_OP_2x3_Wout_Div', '6_Portfolios_ME_OP_2x3_daily', '25_Portfolios_ME_OP_5x5', '25_Portfolios_ME_OP_5x5_Wout_Div', '25_Portfolios_ME_OP_5x5_daily', '100_Portfolios_ME_OP_10x10', '100_Portfolios_10x10_ME_OP_Wout_Div', '100_Portfolios_ME_OP_10x10_daily', '6_Portfolios_ME_INV_2x3', '6_Portfolios_ME_INV_2x3_Wout_Div', '6_Portfolios_ME_INV_2x3_daily', '25_Portfolios_ME_INV_5x5', '25_Portfolios_ME_INV_5x5_Wout_Div', '25_Portfolios_ME_INV_5x5_daily', '100_Portfolios_ME_INV_10x10', '100_Portfolios_10x10_ME_INV_Wout_Div', '100_Portfolios_ME_INV_10x10_daily', '25_Portfolios_BEME_OP_5x5', '25_Portfolios_BEME_OP_5x5_Wout_Div', '25_Portfolios_BEME_OP_5x5_daily', '25_Portfolios_BEME_INV_5x5', '25_Portfolios_BEME_INV_5x5_Wout_Div', '25_Portfolios_BEME_INV_5x5_daily', '25_Portfolios_OP_INV_5x5', '25_Portfolios_OP_INV_5x5_Wout_Div', '25_Portfolios_OP_INV_5x5_daily', '32_Portfolios_ME_BEME_OP_2x4x4', '32_Portfolios_ME_BEME_OP_2x4x4_Wout_Div', '32_Portfolios_ME_BEME_INV_2x4x4', '32_Portfolios_ME_BEME_INV_2x4x4_Wout_Div', '32_Portfolios_ME_OP_INV_2x4x4', '32_Portfolios_ME_OP_INV_2x4x4_Wout_Div', 'Portfolios_Formed_on_E-P', 'Portfolios_Formed_on_E-P_Wout_Div', 'Portfolios_Formed_on_CF-P', 'Portfolios_Formed_on_CF-P_Wout_Div', 'Portfolios_Formed_on_D-P', 'Portfolios_Formed_on_D-P_Wout_Div', '6_Portfolios_ME_EP_2x3', '6_Portfolios_ME_EP_2x3_Wout_Div', '6_Portfolios_ME_CFP_2x3', '6_Portfolios_ME_CFP_2x3_Wout_Div', '6_Portfolios_ME_DP_2x3', '6_Portfolios_ME_DP_2x3_Wout_Div', 'F-F_Momentum_Factor', 'F-F_Momentum_Factor_daily', '6_Portfolios_ME_Prior_12_2', '6_Portfolios_ME_Prior_12_2_Daily', '25_Portfolios_ME_Prior_12_2', '25_Portfolios_ME_Prior_12_2_Daily', '10_Portfolios_Prior_12_2', '10_Portfolios_Prior_12_2_Daily', 'F-F_ST_Reversal_Factor', 'F-F_ST_Reversal_Factor_daily', '6_Portfolios_ME_Prior_1_0', '6_Portfolios_ME_Prior_1_0_Daily', '25_Portfolios_ME_Prior_1_0', '25_Portfolios_ME_Prior_1_0_Daily', '10_Portfolios_Prior_1_0', '10_Portfolios_Prior_1_0_Daily', 'F-F_LT_Reversal_Factor', 'F-F_LT_Reversal_Factor_daily', '6_Portfolios_ME_Prior_60_13', '6_Portfolios_ME_Prior_60_13_Daily', '25_Portfolios_ME_Prior_60_13', '25_Portfolios_ME_Prior_60_13_Daily', '10_Portfolios_Prior_60_13', '10_Portfolios_Prior_60_13_Daily', 'Portfolios_Formed_on_AC', '25_Portfolios_ME_AC_5x5', 'Portfolios_Formed_on_BETA', '25_Portfolios_ME_BETA_5x5', 'Portfolios_Formed_on_NI', '25_Portfolios_ME_NI_5x5', 'Portfolios_Formed_on_VAR', '25_Portfolios_ME_VAR_5x5', 'Portfolios_Formed_on_RESVAR', '25_Portfolios_ME_RESVAR_5x5', '5_Industry_Portfolios', '5_Industry_Portfolios_Wout_Div', '5_Industry_Portfolios_daily', '10_Industry_Portfolios', '10_Industry_Portfolios_Wout_Div', '10_Industry_Portfolios_daily', '12_Industry_Portfolios', '12_Industry_Portfolios_Wout_Div', '12_Industry_Portfolios_daily', '17_Industry_Portfolios', '17_Industry_Portfolios_Wout_Div', '17_Industry_Portfolios_daily', '30_Industry_Portfolios', '30_Industry_Portfolios_Wout_Div', '30_Industry_Portfolios_daily', '38_Industry_Portfolios', '38_Industry_Portfolios_Wout_Div', '38_Industry_Portfolios_daily', '48_Industry_Portfolios', '48_Industry_Portfolios_Wout_Div', '48_Industry_Portfolios_daily', '49_Industry_Portfolios', '49_Industry_Portfolios_Wout_Div', '49_Industry_Portfolios_daily', 'ME_Breakpoints', 'BE-ME_Breakpoints', 'OP_Breakpoints', 'INV_Breakpoints', 'E-P_Breakpoints', 'CF-P_Breakpoints', 'D-P_Breakpoints', 'Prior_2-12_Breakpoints', 'Developed_3_Factors', 'Developed_3_Factors_Daily', 'Developed_ex_US_3_Factors', 'Developed_ex_US_3_Factors_Daily', 'Europe_3_Factors', 'Europe_3_Factors_Daily', 'Japan_3_Factors', 'Japan_3_Factors_Daily', 'Asia_Pacific_ex_Japan_3_Factors', 'Asia_Pacific_ex_Japan_3_Factors_Daily', 'North_America_3_Factors', 'North_America_3_Factors_Daily', 'Developed_5_Factors', 'Developed_5_Factors_Daily', 'Developed_ex_US_5_Factors', 'Developed_ex_US_5_Factors_Daily', 'Europe_5_Factors', 'Europe_5_Factors_Daily', 'Japan_5_Factors', 'Japan_5_Factors_Daily', 'Asia_Pacific_ex_Japan_5_Factors', 'Asia_Pacific_ex_Japan_5_Factors_Daily', 'North_America_5_Factors', 'North_America_5_Factors_Daily', 'Developed_Mom_Factor', 'Developed_Mom_Factor_Daily', 'Developed_ex_US_Mom_Factor', 'Developed_ex_US_Mom_Factor_Daily', 'Europe_Mom_Factor', 'Europe_Mom_Factor_Daily', 'Japan_Mom_Factor', 'Japan_Mom_Factor_Daily', 'Asia_Pacific_ex_Japan_MOM_Factor', 'Asia_Pacific_ex_Japan_MOM_Factor_Daily', 'North_America_Mom_Factor', 'North_America_Mom_Factor_Daily', 'Developed_6_Portfolios_ME_BE-ME', 'Developed_6_Portfolios_ME_BE-ME_daily', 'Developed_ex_US_6_Portfolios_ME_BE-ME', 'Developed_ex_US_6_Portfolios_ME_BE-ME_daily', 'Europe_6_Portfolios_ME_BE-ME', 'Europe_6_Portfolios_ME_BE-ME_daily', 'Japan_6_Portfolios_ME_BE-ME', 'Japan_6_Portfolios_ME_BE-ME_daily', 'Asia_Pacific_ex_Japan_6_Portfolios_ME_BE-ME', 'Asia_Pacific_ex_Japan_6_Portfolios_ME_BE-ME_daily', 'North_America_6_Portfolios_ME_BE-ME', 'North_America_6_Portfolios_ME_BE-ME_daily', 'Developed_25_Portfolios_ME_BE-ME', 'Developed_25_Portfolios_ME_BE-ME_daily', 'Developed_ex_US_25_Portfolios_ME_BE-ME', 'Developed_ex_US_25_Portfolios_ME_BE-ME_daily', 'Europe_25_Portfolios_ME_BE-ME', 'Europe_25_Portfolios_ME_BE-ME_daily', 'Japan_25_Portfolios_ME_BE-ME', 'Japan_25_Portfolios_ME_BE-ME_daily', 'Asia_Pacific_ex_Japan_25_Portfolios_ME_BE-ME', 'Asia_Pacific_ex_Japan_25_Portfolios_ME_BE-ME_daily', 'North_America_25_Portfolios_ME_BE-ME', 'North_America_25_Portfolios_ME_BE-ME_daily', 'Developed_6_Portfolios_ME_OP', 'Developed_6_Portfolios_ME_OP_Daily', 'Developed_ex_US_6_Portfolios_ME_OP', 'Developed_ex_US_6_Portfolios_ME_OP_Daily', 'Europe_6_Portfolios_ME_OP', 'Europe_6_Portfolios_ME_OP_Daily', 'Japan_6_Portfolios_ME_OP', 'Japan_6_Portfolios_ME_OP_Daily', 'Asia_Pacific_ex_Japan_6_Portfolios_ME_OP', 'Asia_Pacific_ex_Japan_6_Portfolios_ME_OP_Daily', 'North_America_6_Portfolios_ME_OP', 'North_America_6_Portfolios_ME_OP_Daily', 'Developed_25_Portfolios_ME_OP', 'Developed_25_Portfolios_ME_OP_Daily', 'Developed_ex_US_25_Portfolios_ME_OP', 'Developed_ex_US_25_Portfolios_ME_OP_Daily', 'Europe_25_Portfolios_ME_OP', 'Europe_25_Portfolios_ME_OP_Daily', 'Japan_25_Portfolios_ME_OP', 'Japan_25_Portfolios_ME_OP_Daily', 'Asia_Pacific_ex_Japan_25_Portfolios_ME_OP', 'Asia_Pacific_ex_Japan_25_Portfolios_ME_OP_Daily', 'North_America_25_Portfolios_ME_OP', 'North_America_25_Portfolios_ME_OP_Daily', 'Developed_6_Portfolios_ME_INV', 'Developed_6_Portfolios_ME_INV_Daily', 'Developed_ex_US_6_Portfolios_ME_INV', 'Developed_ex_US_6_Portfolios_ME_INV_Daily', 'Europe_6_Portfolios_ME_INV', 'Europe_6_Portfolios_ME_INV_Daily', 'Japan_6_Portfolios_ME_INV', 'Japan_6_Portfolios_ME_INV_Daily', 'Asia_Pacific_ex_Japan_6_Portfolios_ME_INV', 'Asia_Pacific_ex_Japan_6_Portfolios_ME_INV_Daily', 'North_America_6_Portfolios_ME_INV', 'North_America_6_Portfolios_ME_INV_Daily', 'Developed_25_Portfolios_ME_INV', 'Developed_25_Portfolios_ME_INV_Daily', 'Developed_ex_US_25_Portfolios_ME_INV', 'Developed_ex_US_25_Portfolios_ME_INV_Daily', 'Europe_25_Portfolios_ME_INV', 'Europe_25_Portfolios_ME_INV_Daily', 'Japan_25_Portfolios_ME_INV', 'Japan_25_Portfolios_ME_INV_Daily', 'Asia_Pacific_ex_Japan_25_Portfolios_ME_INV', 'Asia_Pacific_ex_Japan_25_Portfolios_ME_INV_Daily', 'North_America_25_Portfolios_ME_INV', 'North_America_25_Portfolios_ME_INV_Daily', 'Developed_6_Portfolios_ME_Prior_12_2', 'Developed_6_Portfolios_ME_Prior_250_20_daily', 'Developed_ex_US_6_Portfolios_ME_Prior_12_2', 'Developed_ex_US_6_Portfolios_ME_Prior_250_20_daily', 'Europe_6_Portfolios_ME_Prior_12_2', 'Europe_6_Portfolios_ME_Prior_250_20_daily', 'Japan_6_Portfolios_ME_Prior_12_2', 'Japan_6_Portfolios_ME_Prior_250_20_daily', 'Asia_Pacific_ex_Japan_6_Portfolios_ME_Prior_12_2', 'Asia_Pacific_ex_Japan_6_Portfolios_ME_Prior_250_20_daily', 'North_America_6_Portfolios_ME_Prior_12_2', 'North_America_6_Portfolios_ME_Prior_250_20_daily', 'Developed_25_Portfolios_ME_Prior_12_2', 'Developed_25_Portfolios_ME_Prior_250_20_daily', 'Developed_ex_US_25_Portfolios_ME_Prior_12_2', 'Developed_ex_US_25_Portfolios_ME_Prior_250_20_daily', 'Europe_25_Portfolios_ME_Prior_12_2', 'Europe_25_Portfolios_ME_Prior_250_20_daily', 'Japan_25_Portfolios_ME_Prior_12_2', 'Japan_25_Portfolios_ME_Prior_250_20_daily', 'Asia_Pacific_ex_Japan_25_Portfolios_ME_Prior_12_2', 'Asia_Pacific_ex_Japan_25_Portfolios_ME_Prior_250_20_daily', 'North_America_25_Portfolios_ME_Prior_12_2', 'North_America_25_Portfolios_ME_Prior_250_20_daily', 'Developed_32_Portfolios_ME_BE-ME_OP_2x4x4', 'Developed_ex_US_32_Portfolios_ME_BE-ME_OP_2x4x4', 'Europe_32_Portfolios_ME_BE-ME_OP_2x4x4', 'Japan_32_Portfolios_ME_BE-ME_OP_2x4x4', 'Asia_Pacific_ex_Japan_32_Portfolios_ME_BE-ME_OP_2x4x4', 'North_America_32_Portfolios_ME_BE-ME_OP_2x4x4', 'Developed_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4', 'Developed_ex_US_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4', 'Europe_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4', 'Japan_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4', 'Asia_Pacific_ex_Japan_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4', 'North_America_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4', 'Developed_32_Portfolios_ME_INV(TA)_OP_2x4x4', 'Developed_ex_US_32_Portfolios_ME_INV(TA)_OP_2x4x4', 'Europe_32_Portfolios_ME_INV(TA)_OP_2x4x4', 'Japan_32_Portfolios_ME_INV(TA)_OP_2x4x4', 'Asia_Pacific_ex_Japan_32_Portfolios_ME_INV(TA)_OP_2x4x4', 'North_America_32_Portfolios_ME_INV(TA)_OP_2x4x4', 'Emerging_5_Factors', 'Emerging_MOM_Factor', 'Emerging_Markets_6_Portfolios_ME_BE-ME', 'Emerging_Markets_6_Portfolios_ME_OP', 'Emerging_Markets_6_Portfolios_ME_INV', 'Emerging_Markets_6_Portfolios_ME_Prior_12_2', 'Emerging_Markets_4_Portfolios_BE-ME_OP', 'Emerging_Markets_4_Portfolios_OP_INV', 'Emerging_Markets_4_Portfolios_BE-ME_INV']
SELECT
F-F_Research_Data_Factors
30_Industry_Portfolios
start_date = dt.datetime(1926, 7, 1)
end_date = dt.datetime(2022, 8, 1)
ff_factors = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start = start_date, end = end_date)
print(ff_factors['DESCR'])
F-F Research Data Factors ------------------------- This file was created by CMPT_ME_BEME_RETS using the 202208 CRSP database. The 1-month TBill return is from Ibbotson and Associates, Inc. Copyright 2022 Kenneth R. French 0 : (1154 rows x 4 cols) 1 : Annual Factors: January-December (95 rows x 4 cols)
ff_monthly = ff_factors[0]
ff_monthly
| Mkt-RF | SMB | HML | RF | |
|---|---|---|---|---|
| Date | ||||
| 1926-07 | 2.96 | -2.56 | -2.43 | 0.22 |
| 1926-08 | 2.64 | -1.17 | 3.82 | 0.25 |
| 1926-09 | 0.36 | -1.40 | 0.13 | 0.23 |
| 1926-10 | -3.24 | -0.09 | 0.70 | 0.32 |
| 1926-11 | 2.53 | -0.10 | -0.51 | 0.31 |
| ... | ... | ... | ... | ... |
| 2022-04 | -9.46 | -1.41 | 6.19 | 0.01 |
| 2022-05 | -0.34 | -1.85 | 8.41 | 0.03 |
| 2022-06 | -8.43 | 2.09 | -5.97 | 0.06 |
| 2022-07 | 9.57 | 2.81 | -4.10 | 0.08 |
| 2022-08 | -3.78 | 1.39 | 0.31 | 0.19 |
1154 rows × 4 columns
industry_port = web.DataReader('30_Industry_Portfolios', 'famafrench', start = start_date, end = end_date)
print(industry_port['DESCR'])
30 Industry Portfolios ---------------------- This file was created by CMPT_IND_RETS using the 202208 CRSP database. It contains value- and equal-weighted returns for 30 industry portfolios. The portfolios are constructed at the end of June. The annual returns are from January to December. Missing data are indicated by -99.99 or -999. Copyright 2022 Kenneth R. French 0 : Average Value Weighted Returns -- Monthly (1154 rows x 30 cols) 1 : Average Equal Weighted Returns -- Monthly (1154 rows x 30 cols) 2 : Average Value Weighted Returns -- Annual (95 rows x 30 cols) 3 : Average Equal Weighted Returns -- Annual (95 rows x 30 cols) 4 : Number of Firms in Portfolios (1154 rows x 30 cols) 5 : Average Firm Size (1154 rows x 30 cols) 6 : Sum of BE / Sum of ME (97 rows x 30 cols) 7 : Value-Weighted Average of BE/ME (97 rows x 30 cols)
ind_monthly = industry_port[0]
ind_monthly
| Food | Beer | Smoke | Games | Books | Hshld | Clths | Hlth | Chems | Txtls | ... | Telcm | Servs | BusEq | Paper | Trans | Whlsl | Rtail | Meals | Fin | Other | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 1926-07 | 0.56 | -5.19 | 1.29 | 2.93 | 10.97 | -0.48 | 8.08 | 1.77 | 8.14 | 0.39 | ... | 0.83 | 9.22 | 2.06 | 7.70 | 1.91 | -23.79 | 0.07 | 1.87 | -0.02 | 5.20 |
| 1926-08 | 2.59 | 27.03 | 6.50 | 0.55 | 10.01 | -3.58 | -2.51 | 4.25 | 5.50 | 7.97 | ... | 2.17 | 2.02 | 4.39 | -2.38 | 4.85 | 5.39 | -0.75 | -0.13 | 4.47 | 6.76 |
| 1926-09 | 1.16 | 4.02 | 1.26 | 6.58 | -0.99 | 0.73 | -0.51 | 0.69 | 5.33 | 2.30 | ... | 2.41 | 2.25 | 0.19 | -5.54 | 0.07 | -7.87 | 0.25 | -0.56 | -1.61 | -3.86 |
| 1926-10 | -3.06 | -3.31 | 1.06 | -4.76 | 9.47 | -4.68 | 0.12 | -0.57 | -4.76 | 1.00 | ... | -0.11 | -2.00 | -1.09 | -5.08 | -2.61 | -15.38 | -2.20 | -4.11 | -5.51 | -8.49 |
| 1926-11 | 6.35 | 7.29 | 4.55 | 1.66 | -5.80 | -0.54 | 1.87 | 5.42 | 5.20 | 3.10 | ... | 1.63 | 3.77 | 3.64 | 3.84 | 1.61 | 4.67 | 6.52 | 4.33 | 2.34 | 4.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-04 | 3.01 | 3.03 | 6.37 | -25.22 | -10.76 | 2.04 | -7.00 | -6.80 | -2.28 | 6.63 | ... | -10.70 | -12.59 | -12.26 | -0.74 | -10.93 | -2.14 | -11.41 | -5.47 | -7.99 | -7.65 |
| 2022-05 | -1.68 | -1.60 | 2.67 | -2.93 | -7.40 | -5.12 | -6.45 | 0.99 | 4.52 | 2.38 | ... | 8.54 | -3.35 | -0.75 | -0.66 | -4.59 | 1.03 | -5.64 | -3.29 | 2.80 | -1.19 |
| 2022-06 | -1.64 | -0.02 | -11.63 | -11.33 | -12.53 | -2.56 | -12.00 | -2.05 | -15.65 | -11.17 | ... | -6.72 | -6.79 | -10.19 | -8.51 | -7.14 | -6.43 | -8.50 | -9.02 | -9.05 | -11.78 |
| 2022-07 | 3.67 | 5.49 | 0.56 | 14.62 | 12.10 | 0.76 | 11.86 | 2.75 | 7.66 | 6.86 | ... | -0.40 | 8.60 | 15.68 | 7.22 | 9.33 | 9.08 | 16.33 | 11.89 | 7.38 | 9.19 |
| 2022-08 | -1.61 | -1.87 | -0.12 | -2.95 | -4.97 | -2.16 | -6.01 | -5.07 | -1.39 | -12.20 | ... | -3.00 | -4.72 | -5.89 | -7.66 | -1.46 | -1.60 | -3.46 | -1.47 | -2.24 | -3.65 |
1154 rows × 30 columns
df = pd.concat([ff_monthly, ind_monthly], axis = 1, join = 'inner')
df
| Mkt-RF | SMB | HML | RF | Food | Beer | Smoke | Games | Books | Hshld | ... | Telcm | Servs | BusEq | Paper | Trans | Whlsl | Rtail | Meals | Fin | Other | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 1926-07 | 2.96 | -2.56 | -2.43 | 0.22 | 0.56 | -5.19 | 1.29 | 2.93 | 10.97 | -0.48 | ... | 0.83 | 9.22 | 2.06 | 7.70 | 1.91 | -23.79 | 0.07 | 1.87 | -0.02 | 5.20 |
| 1926-08 | 2.64 | -1.17 | 3.82 | 0.25 | 2.59 | 27.03 | 6.50 | 0.55 | 10.01 | -3.58 | ... | 2.17 | 2.02 | 4.39 | -2.38 | 4.85 | 5.39 | -0.75 | -0.13 | 4.47 | 6.76 |
| 1926-09 | 0.36 | -1.40 | 0.13 | 0.23 | 1.16 | 4.02 | 1.26 | 6.58 | -0.99 | 0.73 | ... | 2.41 | 2.25 | 0.19 | -5.54 | 0.07 | -7.87 | 0.25 | -0.56 | -1.61 | -3.86 |
| 1926-10 | -3.24 | -0.09 | 0.70 | 0.32 | -3.06 | -3.31 | 1.06 | -4.76 | 9.47 | -4.68 | ... | -0.11 | -2.00 | -1.09 | -5.08 | -2.61 | -15.38 | -2.20 | -4.11 | -5.51 | -8.49 |
| 1926-11 | 2.53 | -0.10 | -0.51 | 0.31 | 6.35 | 7.29 | 4.55 | 1.66 | -5.80 | -0.54 | ... | 1.63 | 3.77 | 3.64 | 3.84 | 1.61 | 4.67 | 6.52 | 4.33 | 2.34 | 4.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-04 | -9.46 | -1.41 | 6.19 | 0.01 | 3.01 | 3.03 | 6.37 | -25.22 | -10.76 | 2.04 | ... | -10.70 | -12.59 | -12.26 | -0.74 | -10.93 | -2.14 | -11.41 | -5.47 | -7.99 | -7.65 |
| 2022-05 | -0.34 | -1.85 | 8.41 | 0.03 | -1.68 | -1.60 | 2.67 | -2.93 | -7.40 | -5.12 | ... | 8.54 | -3.35 | -0.75 | -0.66 | -4.59 | 1.03 | -5.64 | -3.29 | 2.80 | -1.19 |
| 2022-06 | -8.43 | 2.09 | -5.97 | 0.06 | -1.64 | -0.02 | -11.63 | -11.33 | -12.53 | -2.56 | ... | -6.72 | -6.79 | -10.19 | -8.51 | -7.14 | -6.43 | -8.50 | -9.02 | -9.05 | -11.78 |
| 2022-07 | 9.57 | 2.81 | -4.10 | 0.08 | 3.67 | 5.49 | 0.56 | 14.62 | 12.10 | 0.76 | ... | -0.40 | 8.60 | 15.68 | 7.22 | 9.33 | 9.08 | 16.33 | 11.89 | 7.38 | 9.19 |
| 2022-08 | -3.78 | 1.39 | 0.31 | 0.19 | -1.61 | -1.87 | -0.12 | -2.95 | -4.97 | -2.16 | ... | -3.00 | -4.72 | -5.89 | -7.66 | -1.46 | -1.60 | -3.46 | -1.47 | -2.24 | -3.65 |
1154 rows × 34 columns
Create blank dataframe
ind_name = ind_monthly.columns
beta = pd.DataFrame(data = np.nan, index = ind_name, columns = ['BETAs'])
beta
| BETAs | |
|---|---|
| Food | NaN |
| Beer | NaN |
| Smoke | NaN |
| Games | NaN |
| Books | NaN |
| Hshld | NaN |
| Clths | NaN |
| Hlth | NaN |
| Chems | NaN |
| Txtls | NaN |
| Cnstr | NaN |
| Steel | NaN |
| FabPr | NaN |
| ElcEq | NaN |
| Autos | NaN |
| Carry | NaN |
| Mines | NaN |
| Coal | NaN |
| Oil | NaN |
| Util | NaN |
| Telcm | NaN |
| Servs | NaN |
| BusEq | NaN |
| Paper | NaN |
| Trans | NaN |
| Whlsl | NaN |
| Rtail | NaN |
| Meals | NaN |
| Fin | NaN |
| Other | NaN |
y = Ri - Rf
x = E(Rm) - Rf
from sklearn import linear_model
for i in range(len(ind_name)):
model = linear_model.LinearRegression()
model = model.fit(df[['Mkt-RF']], pd.DataFrame(df[ind_name[i]]-df['RF']))
beta.iloc[i,:][0] = model.coef_
beta
| BETAs | |
|---|---|
| Food | 0.729095 |
| Beer | 0.924026 |
| Smoke | 0.623424 |
| Games | 1.385464 |
| Books | 1.110920 |
| Hshld | 0.884306 |
| Clths | 0.831563 |
| Hlth | 0.833813 |
| Chems | 1.043679 |
| Txtls | 1.145560 |
| Cnstr | 1.181354 |
| Steel | 1.361442 |
| FabPr | 1.239619 |
| ElcEq | 1.288733 |
| Autos | 1.286918 |
| Carry | 1.188716 |
| Mines | 0.913507 |
| Coal | 1.278337 |
| Oil | 0.889536 |
| Util | 0.763865 |
| Telcm | 0.664651 |
| Servs | 0.823919 |
| BusEq | 1.080205 |
| Paper | 0.948937 |
| Trans | 1.138238 |
| Whlsl | 1.086695 |
| Rtail | 0.967677 |
| Meals | 0.946614 |
| Fin | 1.158976 |
| Other | 1.055502 |
exp_ri = pd.DataFrame(data = np.nan, index = ind_name, columns = ['Average Excess Return'])
for i in range(len(ind_name)):
exp_ri.iloc[i] = np.sum(df[ind_name[i]]-df['RF'])/df.shape[0]
exp_ri
| Average Excess Return | |
|---|---|
| Food | 0.699220 |
| Beer | 0.929593 |
| Smoke | 0.866482 |
| Games | 0.835659 |
| Books | 0.631750 |
| Hshld | 0.655269 |
| Clths | 0.664896 |
| Hlth | 0.807920 |
| Chems | 0.777340 |
| Txtls | 0.675537 |
| Cnstr | 0.694168 |
| Steel | 0.677374 |
| FabPr | 0.800849 |
| ElcEq | 0.896854 |
| Autos | 0.923614 |
| Carry | 0.850295 |
| Mines | 0.640659 |
| Coal | 0.828094 |
| Oil | 0.773960 |
| Util | 0.618354 |
| Telcm | 0.566352 |
| Servs | 0.953033 |
| BusEq | 0.903787 |
| Paper | 0.721256 |
| Trans | 0.655191 |
| Whlsl | 0.575069 |
| Rtail | 0.777964 |
| Meals | 0.800208 |
| Fin | 0.742305 |
| Other | 0.522088 |
plt.scatter(beta, exp_ri)
plt.xlabel('Beta')
plt.ylabel('Excess Return')
plt.title('Beta and Excess Return')
plt.ylim(0.1,2)
plt.xlim(0.1,2)
plt.show()
plt.savefig('Beta and Excess Return.png')
<Figure size 432x288 with 0 Axes>
ax = sns.regplot(x = beta, y = exp_ri)
ax.set_title("Beta and Excess Return", fontsize=10)
ax.set_xlabel("Beta", fontsize = 10)
ax.set_ylabel("Excess Return", fontsize = 10)
plt.savefig("REG - Beta and Excess Return.png")
CONCLUSION
we cannot observe SML right away because they are lots of error terms from the linear line.
create blank dataframe
metric_CAPM = pd.DataFrame(data = np.nan, index = ind_name, columns = ['RMSE','MAE'])
metric_CAPM
| RMSE | MAE | |
|---|---|---|
| Food | NaN | NaN |
| Beer | NaN | NaN |
| Smoke | NaN | NaN |
| Games | NaN | NaN |
| Books | NaN | NaN |
| Hshld | NaN | NaN |
| Clths | NaN | NaN |
| Hlth | NaN | NaN |
| Chems | NaN | NaN |
| Txtls | NaN | NaN |
| Cnstr | NaN | NaN |
| Steel | NaN | NaN |
| FabPr | NaN | NaN |
| ElcEq | NaN | NaN |
| Autos | NaN | NaN |
| Carry | NaN | NaN |
| Mines | NaN | NaN |
| Coal | NaN | NaN |
| Oil | NaN | NaN |
| Util | NaN | NaN |
| Telcm | NaN | NaN |
| Servs | NaN | NaN |
| BusEq | NaN | NaN |
| Paper | NaN | NaN |
| Trans | NaN | NaN |
| Whlsl | NaN | NaN |
| Rtail | NaN | NaN |
| Meals | NaN | NaN |
| Fin | NaN | NaN |
| Other | NaN | NaN |
from sklearn.model_selection import KFold
kf = KFold(n_splits=10,shuffle=False)
for i in range(len(ind_name)):
err_rmse_test = 0
err_mae_test = 0
mkt = df['Mkt-RF'].to_numpy()
ri_rf = (df[ind_name[i]]-df['RF']).to_numpy()
for train,test in kf.split(mkt):
lr=linear_model.LinearRegression()
reg=lr.fit(mkt[train].reshape(-1, 1),ri_rf[train])
ri_rf_pred_test =reg.predict(mkt[test].reshape(-1, 1))
e_test = ri_rf[test]-ri_rf_pred_test
err_rmse_test += np.sqrt(np.mean(e_test*e_test))
err_mae_test += np.mean(np.abs(e_test))
rmse_10cv_test = err_rmse_test/10
mae_10cv_test = err_mae_test/10
metric_CAPM.iloc[i][0] = rmse_10cv_test
metric_CAPM.iloc[i][1] = mae_10cv_test
metric_CAPM
| RMSE | MAE | |
|---|---|---|
| Food | 2.538109 | 1.898086 |
| Beer | 4.636514 | 3.384528 |
| Smoke | 4.526541 | 3.521301 |
| Games | 4.874197 | 3.623334 |
| Books | 3.861875 | 2.956903 |
| Hshld | 3.213406 | 2.409892 |
| Clths | 4.235036 | 3.093844 |
| Hlth | 3.229728 | 2.437323 |
| Chems | 2.905301 | 2.200431 |
| Txtls | 4.653665 | 3.403551 |
| Cnstr | 2.829698 | 2.153414 |
| Steel | 4.417177 | 3.314934 |
| FabPr | 2.817130 | 2.186627 |
| ElcEq | 3.274492 | 2.547459 |
| Autos | 4.572182 | 3.280924 |
| Carry | 4.224256 | 3.154163 |
| Mines | 5.255009 | 4.125605 |
| Coal | 8.114324 | 5.838560 |
| Oil | 4.139266 | 3.138144 |
| Util | 3.519019 | 2.705937 |
| Telcm | 2.901631 | 2.226987 |
| Servs | 5.632045 | 3.950464 |
| BusEq | 3.421839 | 2.596730 |
| Paper | 2.839679 | 2.148962 |
| Trans | 3.515633 | 2.602099 |
| Whlsl | 3.747144 | 2.795816 |
| Rtail | 2.954227 | 2.257817 |
| Meals | 3.997552 | 3.018501 |
| Fin | 2.691408 | 2.016012 |
| Other | 3.483393 | 2.651443 |
mean_rmse_CAPM = np.mean(metric['RMSE'])
mean_mae_CAPM = np.mean(metric['MAE'])
print(f"RMSE: {mean_rmse_CAPM}")
print(f"MAE: {mean_mae_CAPM}")
RMSE: 3.900715824121209 MAE: 2.9213263884838936
Blank dataframe
beta_ff = pd.DataFrame(data = np.nan, index = ind_name, columns = ['MKT', 'SMB', 'HML'])
beta_ff
| MKT | SMB | HML | |
|---|---|---|---|
| Food | NaN | NaN | NaN |
| Beer | NaN | NaN | NaN |
| Smoke | NaN | NaN | NaN |
| Games | NaN | NaN | NaN |
| Books | NaN | NaN | NaN |
| Hshld | NaN | NaN | NaN |
| Clths | NaN | NaN | NaN |
| Hlth | NaN | NaN | NaN |
| Chems | NaN | NaN | NaN |
| Txtls | NaN | NaN | NaN |
| Cnstr | NaN | NaN | NaN |
| Steel | NaN | NaN | NaN |
| FabPr | NaN | NaN | NaN |
| ElcEq | NaN | NaN | NaN |
| Autos | NaN | NaN | NaN |
| Carry | NaN | NaN | NaN |
| Mines | NaN | NaN | NaN |
| Coal | NaN | NaN | NaN |
| Oil | NaN | NaN | NaN |
| Util | NaN | NaN | NaN |
| Telcm | NaN | NaN | NaN |
| Servs | NaN | NaN | NaN |
| BusEq | NaN | NaN | NaN |
| Paper | NaN | NaN | NaN |
| Trans | NaN | NaN | NaN |
| Whlsl | NaN | NaN | NaN |
| Rtail | NaN | NaN | NaN |
| Meals | NaN | NaN | NaN |
| Fin | NaN | NaN | NaN |
| Other | NaN | NaN | NaN |
#the same as industry but 3 columns
from sklearn import linear_model
for i in range(len(ind_name)):
model = linear_model.LinearRegression()
model = model.fit(df[['Mkt-RF','SMB','HML']], pd.DataFrame(df[ind_name[i]]-df['RF']))
beta_ff.iloc[i] = model.coef_
beta_ff
| MKT | SMB | HML | |
|---|---|---|---|
| Food | 0.747453 | -0.140384 | 0.052259 |
| Beer | 0.866732 | 0.196442 | 0.133388 |
| Smoke | 0.649355 | -0.216873 | 0.096605 |
| Games | 1.287535 | 0.408923 | 0.138250 |
| Books | 1.012498 | 0.372491 | 0.186158 |
| Hshld | 0.906274 | -0.088170 | -0.035378 |
| Clths | 0.761506 | 0.422109 | -0.060045 |
| Hlth | 0.878894 | -0.085659 | -0.189481 |
| Chems | 1.068119 | -0.151514 | 0.026175 |
| Txtls | 0.982731 | 0.563268 | 0.372973 |
| Cnstr | 1.118341 | 0.247742 | 0.107824 |
| Steel | 1.259659 | 0.232049 | 0.380399 |
| FabPr | 1.173377 | 0.261550 | 0.111985 |
| ElcEq | 1.295485 | -0.035460 | -0.000616 |
| Autos | 1.247708 | 0.055086 | 0.188627 |
| Carry | 1.097555 | 0.218124 | 0.328079 |
| Mines | 0.842647 | 0.258731 | 0.145621 |
| Coal | 1.084163 | 0.485378 | 0.673339 |
| Oil | 0.878785 | -0.179207 | 0.290080 |
| Util | 0.755451 | -0.166903 | 0.259715 |
| Telcm | 0.694991 | -0.131654 | -0.036739 |
| Servs | 0.832129 | 0.368626 | -0.505840 |
| BusEq | 1.121167 | 0.153322 | -0.455735 |
| Paper | 0.952825 | -0.057684 | 0.045355 |
| Trans | 1.038001 | 0.163473 | 0.454423 |
| Whlsl | 0.972969 | 0.550152 | 0.068217 |
| Rtail | 0.979982 | 0.041639 | -0.131479 |
| Meals | 0.896939 | 0.289515 | -0.030568 |
| Fin | 1.121536 | -0.057539 | 0.315218 |
| Other | 0.999547 | 0.283097 | 0.018335 |
import matplotlib.pyplot as plt
plt.scatter(beta_ff['MKT'], exp_ri)
plt.xlabel('MKT_FF')
plt.ylabel('Excess Return')
plt.title('MKT_FF and Excess Return')
plt.ylim(0.1,2)
plt.xlim(0.1,2)
plt.show()
plt.savefig('MKT_FF and Excess Return.png')
<Figure size 432x288 with 0 Axes>
ax = sns.regplot(x = beta_ff[['MKT']], y = exp_ri)
ax.set_title("MKT_FF and Excess Return", fontsize=10)
ax.set_xlabel("MKT_FF", fontsize = 10)
ax.set_ylabel("Excess Return", fontsize = 10)
plt.savefig("REG - MKT_FF and Excess Return.png")
import matplotlib.pyplot as plt
plt.scatter(beta_ff['SMB'], exp_ri)
plt.xlabel('SMB_FF')
plt.ylabel('Excess Return')
plt.title('SMB_FF and Excess Return')
plt.ylim(0.1,2)
plt.xlim(0.1,2)
plt.show()
plt.savefig('SMB_FF and Excess Return.png')
<Figure size 432x288 with 0 Axes>
ax = sns.regplot(x = beta_ff[['SMB']], y = exp_ri)
ax.set_title("SMB_FF and Excess Return", fontsize=10)
ax.set_xlabel("SMB_FF", fontsize = 10)
ax.set_ylabel("Excess Return", fontsize = 10)
plt.savefig("REG - SMB_FF and Excess Return.png")
import matplotlib.pyplot as plt
plt.scatter(beta_ff['HML'], exp_ri)
plt.xlabel('HML_FF')
plt.ylabel('Excess Return')
plt.title('HML_FF and Excess Return')
plt.ylim(0.1,2)
plt.xlim(-1,1)
plt.show()
plt.savefig('HML_FF and Excess Return.png')
<Figure size 432x288 with 0 Axes>
ax = sns.regplot(x = beta_ff[['HML']], y = exp_ri)
ax.set_title("HML_FF and Excess Return", fontsize=10)
ax.set_xlabel("HML_FF", fontsize = 10)
ax.set_ylabel("Excess Return", fontsize = 10)
plt.savefig("REG - HML_FF and Excess Return.png")
CONCLUSION
Since I plot a regplot that obviously show the linear line, but as can be seen, only MKT_FF show the positive relationship with the excess return while another 2 roughly show the negative relationship but there are many outliers.
Easily compared, when we use the CAPM to analyze the relationship, SML reveals that the linear is more obvious rather than CAPM. Therefore the Three factors model is more suitable than CAPM.
metric_ff = pd.DataFrame(data = np.nan, index = ind_name, columns = ['RMSE','MAE'])
metric_ff
| RMSE | MAE | |
|---|---|---|
| Food | NaN | NaN |
| Beer | NaN | NaN |
| Smoke | NaN | NaN |
| Games | NaN | NaN |
| Books | NaN | NaN |
| Hshld | NaN | NaN |
| Clths | NaN | NaN |
| Hlth | NaN | NaN |
| Chems | NaN | NaN |
| Txtls | NaN | NaN |
| Cnstr | NaN | NaN |
| Steel | NaN | NaN |
| FabPr | NaN | NaN |
| ElcEq | NaN | NaN |
| Autos | NaN | NaN |
| Carry | NaN | NaN |
| Mines | NaN | NaN |
| Coal | NaN | NaN |
| Oil | NaN | NaN |
| Util | NaN | NaN |
| Telcm | NaN | NaN |
| Servs | NaN | NaN |
| BusEq | NaN | NaN |
| Paper | NaN | NaN |
| Trans | NaN | NaN |
| Whlsl | NaN | NaN |
| Rtail | NaN | NaN |
| Meals | NaN | NaN |
| Fin | NaN | NaN |
| Other | NaN | NaN |
from sklearn.model_selection import KFold
kf = KFold(n_splits=10,shuffle=False)
for i in range(len(ind_name)):
err_rmse_test = 0
err_mae_test = 0
FFs = df[['Mkt-RF','SMB','HML']].to_numpy()
ri_rf = (df[ind_name[i]]-df['RF']).to_numpy()
for train,test in kf.split(mkt):
lr=linear_model.LinearRegression()
reg=lr.fit(FFs[train],ri_rf[train])
ri_rf_pred_test =reg.predict(FFs[test])
e_test = ri_rf[test]-ri_rf_pred_test
err_rmse_test += np.sqrt(np.mean(e_test*e_test))
err_mae_test += np.mean(np.abs(e_test))
rmse_10cv_test = err_rmse_test/10
mae_10cv_test = err_mae_test/10
metric_ff.iloc[i][0] = rmse_10cv_test
metric_ff.iloc[i][1] = mae_10cv_test
metric_ff
| RMSE | MAE | |
|---|---|---|
| Food | 2.583992 | 1.950460 |
| Beer | 4.727382 | 3.441337 |
| Smoke | 4.534637 | 3.511960 |
| Games | 4.822235 | 3.520520 |
| Books | 3.725951 | 2.855786 |
| Hshld | 3.235331 | 2.434792 |
| Clths | 4.197140 | 2.975686 |
| Hlth | 3.197743 | 2.386017 |
| Chems | 2.943988 | 2.243567 |
| Txtls | 4.255104 | 3.106636 |
| Cnstr | 2.807713 | 2.112981 |
| Steel | 4.192168 | 3.179183 |
| FabPr | 2.690331 | 2.090893 |
| ElcEq | 3.334103 | 2.592332 |
| Autos | 4.564839 | 3.317047 |
| Carry | 4.102379 | 3.079451 |
| Mines | 5.209853 | 4.070646 |
| Coal | 7.683888 | 5.632237 |
| Oil | 4.087016 | 3.117979 |
| Util | 3.389365 | 2.590805 |
| Telcm | 2.908926 | 2.232419 |
| Servs | 5.143993 | 3.536210 |
| BusEq | 3.147124 | 2.411187 |
| Paper | 2.924536 | 2.204255 |
| Trans | 3.201444 | 2.472775 |
| Whlsl | 3.581311 | 2.605731 |
| Rtail | 2.952258 | 2.250535 |
| Meals | 4.032527 | 3.011346 |
| Fin | 2.514929 | 1.901121 |
| Other | 3.480490 | 2.631941 |
mean_rmse_ff = np.mean(metric['RMSE'])
mean_mae_ff = np.mean(metric['MAE'])
print(f"RMSE: {mean_rmse_ff}")
print(f"MAE: {mean_mae_ff}")
RMSE: 3.900715824121209 MAE: 2.9213263884838936