# Installing pykalman library
!pip install pykalman

Collecting pykalman
  Downloading pykalman-0.9.7-py2.py3-none-any.whl (251 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 251.6/251.6 kB 1.7 MB/s eta 0:00:00
Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from pykalman) (1.25.2)
Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from pykalman) (1.11.4)
Installing collected packages: pykalman
Successfully installed pykalman-0.9.7

# Importing numpy for working with arrays
import numpy as np

# Importing pandas for data manipulation
import pandas as pd

# Importing function to apply mathematical operations on polynomials
from scipy import poly1d

# Importing matplotlib for data visualization
import matplotlib.pyplot as plt

# Importing pykalman to implement kalman filter
from pykalman import KalmanFilter

# Importing yfinance to fetch data from yahoo finance
import yfinance as yf

# To ignore warnings in jupyter notebook
import warnings
warnings.filterwarnings('ignore')

# Fetching price data for MSFT from yahoo finance 2021-01-01 till 2022-01-01
msft_price = yf.download('MSFT', start='2021-01-01', end='2022-01-01')

# We are only interested to compute moving average for adjusted close price
msft_price = msft_price['Adj Close']

[*********************100%%**********************]  1 of 1 completed

# Showing price data
msft_price

Date
2021-01-04    211.224319
2021-01-05    211.428085
2021-01-06    205.945892
2021-01-07    211.806519
2021-01-08    213.097000
                 ...    
2021-12-27    335.064056
2021-12-28    333.889893
2021-12-29    334.574829
2021-12-30    332.001495
2021-12-31    329.066223
Name: Adj Close, Length: 252, dtype: float64

# Constructing a Kalman filter
kf = KalmanFilter(transition_matrices = [1],
                  observation_matrices = [1],
                  initial_state_mean = 0,
                  initial_state_covariance = 1,
                  observation_covariance=1,
                  transition_covariance=.01)

# Computing rolling mean using the observed values of the price
state_means, _ = kf.filter(msft_price.values)
state_means = pd.DataFrame({'index':msft_price.index, 'states':state_means.flatten()}).set_index('index')['states']

# Computing the rolling mean with various lookback windows i.e. 30, 60 and 90 days
mean30 = msft_price.rolling(30).mean()
mean60 = msft_price.rolling(60).mean()
mean90 = msft_price.rolling(90).mean()

# Plotting original data and estimated mean
figure = plt.figure(figsize=(16, 8))
plt.plot(state_means)
plt.plot(msft_price)
plt.plot(mean30)
plt.plot(mean60)
plt.plot(mean90)
plt.title('Kalman filter estimate of average')
plt.legend(['Kalman Estimate', 'MSFT observed price', '30-day Moving Average', '60-day Moving Average','90-day Moving Average'])
plt.xlabel('Day')
plt.ylabel('Price');

figure = plt.figure(figsize=(16, 8))
plt.plot(state_means[-200:])
plt.plot(msft_price[-200:])
plt.plot(mean30[-200:])
plt.plot(mean60[-200:])
plt.plot(mean90[-200:])
plt.title('Kalman filter estimate of average')
plt.legend(['Kalman Estimate', 'MSFT observed price', '30-day Moving Average', '60-day Moving Average','90-day Moving Average'])
plt.xlabel('Day')
plt.ylabel('Price');

# Fetching price data for MSFT from yahoo finance 2021-01-01 till 2022-01-01
msft_price = pd.DataFrame(yf.download('MSFT', start='2021-01-01', end='2022-01-01')[['Adj Close']])

# Fetching price data for S&P 500 from yahoo finance 2021-01-01 till 2022-01-01
snp_price = pd.DataFrame(yf.download('^GSPC', start='2021-01-01', end='2022-01-01')[['Adj Close']])

# Joining MSFT and S&P 500 to create one single dataframe
joined_data = pd.concat([msft_price, snp_price["Adj Close"].rename("sp500")], axis=1)

# Dropping those date with missing values
joined_data = joined_data.dropna()

# Showing head of the dataset
joined_data.head()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

figure = plt.figure(figsize=(16, 8))
cm = plt.get_cmap('jet')
colors = np.linspace(0.1, 1, len(joined_data['sp500']))
sc = plt.scatter(joined_data['sp500'], joined_data['Adj Close'], s=30, c=colors, cmap=cm, edgecolor='k', alpha=0.7)
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in joined_data['sp500'][::len(joined_data['sp500'])//9].index])
plt.xlabel('SPY')
plt.ylabel('MSFT');

delta = 1e-3
trans_cov = delta / (1 - delta) * np.eye(2)
obs_mat = np.expand_dims(np.vstack([[joined_data['sp500']], [np.ones(len(joined_data['sp500']))]]).T, axis=1)

kf = KalmanFilter(n_dim_obs=1, n_dim_state=2,
                  initial_state_mean=[0,0],
                  initial_state_covariance=np.ones((2, 2)),
                  transition_matrices=np.eye(2),
                  observation_matrices=obs_mat,
                  observation_covariance=2,
                  transition_covariance=trans_cov)

# Using the observations y to get running estimates and errors for the state parameters
state_means, state_covs = kf.filter(joined_data['Adj Close'])

_, axarr = plt.subplots(2, sharex=True, figsize=(16, 8))
axarr[0].plot(joined_data['sp500'].index, state_means[:,0], label='slope')
axarr[0].legend()
axarr[1].plot(joined_data['sp500'].index, state_means[:,1], label='intercept')
axarr[1].legend()
plt.tight_layout();

figure = plt.figure(figsize=(16, 8))

# Plotting data points using colormap
cm = plt.get_cmap('jet')
colors = np.linspace(0.1, 1, len(joined_data['sp500']))
sc = plt.scatter(joined_data['sp500'], joined_data['Adj Close'], s=30, c=colors, cmap=cm, edgecolor='k', alpha=0.7)
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in joined_data['sp500'][::len(joined_data['sp500'])//9].index])

# Plotting every fifth line
step = 5
xi = np.linspace(joined_data['sp500'].min()-5, joined_data['sp500'].max()+5, 2)
colors_l = np.linspace(0.1, 1, len(state_means[::step]))
for i, beta in enumerate(state_means[::step]):
    plt.plot(xi, beta[0] * xi + beta[1], alpha=.2, lw=1, c=cm(colors_l[i]))

# Plotting the OLS regression line
plt.plot(xi, poly1d(np.polyfit(joined_data['sp500'], joined_data['Adj Close'], 1))(xi), '0.4')

# Labelling axes
plt.xlabel('SPY')
plt.ylabel('MSFT');

figure = plt.figure(figsize=(16, 8))
# Getting returns from pricing data
x_r = joined_data['sp500'].pct_change()[1:]
y_r = joined_data['Adj Close'].pct_change()[1:]

# Run Kalman filter on returns data
delta_r = 1e-2
trans_cov_r = delta_r / (1 - delta_r) * np.eye(2) # How much random walk wiggles
obs_mat_r = np.expand_dims(np.vstack([[x_r], [np.ones(len(x_r))]]).T, axis=1)
kf_r = KalmanFilter(n_dim_obs=1, n_dim_state=2, # y_r is 1-dimensional, (alpha, beta) is 2-dimensional
                  initial_state_mean=[0,0],
                  initial_state_covariance=np.ones((2, 2)),
                  transition_matrices=np.eye(2),
                  observation_matrices=obs_mat_r,
                  observation_covariance=.01,
                  transition_covariance=trans_cov_r)
state_means_r, _ = kf_r.filter(y_r.values)

# Plotting data points using colormap
colors_r = np.linspace(0.1, 1, len(x_r))
sc = plt.scatter(x_r, y_r, s=30, c=colors_r, cmap=cm, edgecolor='k', alpha=0.7)
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in x_r[::len(x_r)//9].index])

# Plotting every fifth line
step = 5
xi = np.linspace(x_r.min()-4, x_r.max()+4, 2)
colors_l = np.linspace(0.1, 1, len(state_means_r[::step]))
for i, beta in enumerate(state_means_r[::step]):
    plt.plot(xi, beta[0] * xi + beta[1], alpha=.2, lw=1, c=cm(colors_l[i]))

# Plotting the OLS regression line
plt.plot(xi, poly1d(np.polyfit(x_r, y_r, 1))(xi), '0.4')

# Adjusting axes for visibility
plt.axis([-0.03,0.03,-0.11, 0.11])

# Labelling axes
plt.xlabel('SPY returns')
plt.ylabel('MSFT returns');

# Convert notebook to html
!jupyter nbconvert --to html "/content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_Eight_-_Networking_and_Graphical_Models/Case_Studies/Financial_Market_Price_Estimation/Financial_Market_Price_Estimation.ipynb"

State Estimator	Model	Assumed Distribution
Kalman Filter	Linear	Gaussian
Extended Kalman Filter	Locally Linear	Gaussian
Unscented Kalman Filter	Nonlinear	Gaussian
Particle Filter	Nonlinear	Non Gaussian

Financial Market Price Estimation with Kalman Filters¶

Introduction to Kalman Filters¶

Case Study - Context¶

Kalman Filter as Moving Average¶

Dataset¶

Installing pykalman library¶

Importing the necessary libraries¶

Loading the price data¶

Linear Regression¶

Additional Reading¶

	Adj Close	sp500
Date
2021-01-04	211.224319	3700.649902
2021-01-05	211.428085	3726.860107
2021-01-06	205.945892	3748.139893
2021-01-07	211.806519	3803.790039
2021-01-08	213.097000	3824.679932