We present here two methods for calibrating the Vasicek model (link) to historical data:
- Least Squares
- Maximum Likelihood Estimation
The Python code is available below.
Presentation
Save 10% on All Quant Next Courses with the Coupon Code: QuantNextBlog10
For students and graduates: We offer a 50% discount on all courses, please contact us if you are interested: contact@quant-next.com
Python Code
Import Libraries
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import math
import numpy as np
import pandas as pd
from scipy.stats import norm
%matplotlib inline
from sklearn.linear_model import LinearRegression
Simulation Vasicek Process with Euler-Maruyama Discretisation
def vasicek(r0, a, b, sigma, T, num_steps, num_paths):
dt = T / num_steps
rates = np.zeros((num_steps + 1, num_paths))
rates[0] = r0
for t in range(1, num_steps + 1):
dW = np.random.normal(0, 1, num_paths)
rates[t] = rates[t - 1] + a * (b - rates[t - 1]) * dt + sigma * np.sqrt(dt) * dW
return rates
# Model parameters
r0 = 0.02 # Initial short rate
a = 0.5 # Mean reversion speed
b = 0.03 # Long-term mean
sigma = 0.01 # Volatility
T = 10 # Time horizon
num_steps = 10000 # Number of steps
num_paths = 20 # Number of paths
# Simulate Vasicek model
simulated_rates = vasicek(r0, a, b, sigma, T, num_steps, num_paths)
# Time axis
time_axis = np.linspace(0, T, num_steps + 1)
#average value
average_rates = [r0 * np.exp(-a * t) + b * (1 - np.exp(-a * t)) for t in time_axis]
# standard deviation
std_dev = [(sigma**2 / (2 * a) * (1 - np.exp(-2 * a * t)))**.5 for t in time_axis]
# Calculate upper and lower bounds (±2 sigma)
upper_bound = [average_rates[i] + 2 * std_dev[i] for i in range(len(time_axis))]
lower_bound = [average_rates[i] - 2 * std_dev[i] for i in range(len(time_axis))]
# Plotting multiple paths with time on x-axis
plt.figure(figsize=(10, 6))
plt.title('Vasicek Model - Simulated Interest Rate Paths')
plt.xlabel('Time (years)')
plt.ylabel('Interest Rate')
for i in range(num_paths):
plt.plot(time_axis, simulated_rates[:, i])
plt.plot(time_axis, average_rates, color='black',linestyle='--', label ='Average', linewidth = 3)
plt.plot(time_axis, upper_bound, color='grey', linestyle='--', label='Upper Bound (2Σ)', linewidth = 3)
plt.plot(time_axis, lower_bound, color='grey', linestyle='--', label='Lower Bound (2Σ)', linewidth = 3)
plt.legend()
plt.show()
Real Parameters
# Model parameters we want to estimate
a = .5 # Mean reversion speed
b = 0.03 # Long-term mean
sigma = 0.01 # Volatility
Simulation one path
r0 = 0.03 # Initial short rate
T = 10 # Time horizon
num_steps = 10000 # Number of steps
num_paths = 1 # Number of paths
# Simulate Vasicek model
simulated_rates = vasicek(r0, a, b, sigma, T, num_steps, num_paths)
# Time axis
time_axis = np.linspace(0, T, num_steps + 1)
# Plotting multiple paths with time on x-axis
plt.figure(figsize=(10, 6))
plt.title('Simulation Interest Rates with Vasicek Model')
plt.xlabel('Time (years)')
plt.ylabel('Interest Rate')
for i in range(num_paths):
plt.plot(time_axis, simulated_rates[:, i], color='red')
plt.show()
Least Squares
def Vasicek_LS(r, dt):
#Linear Regression
r0 = r[:-1,]
r1 = r[1:, 0]
reg = LinearRegression().fit(r0, r1)
#estimation a and b
a_LS = (1 - reg.coef_) / dt
b_LS = reg.intercept_ / dt / a_LS
#estimation sigma
epsilon = r[1:, 0] - r[:-1,0] * reg.coef_
sigma_LS = np.std(epsilon) / dt**.5
return a_LS[0], b_LS[0], sigma_LS
LS_Estimate = Vasicek_LS(simulated_rates, T / num_steps)
print("a_est: " + str(np.round(LS_Estimate[0],3)))
print("b_est: " + str(np.round(LS_Estimate[1],3)))
print("sigma_est: " + str(np.round(LS_Estimate[2],3)))
Maximum Likelihood Estimation
def Vasicek_MLE(r, dt):
r = r[:, 0]
n = len(r)
#estimation a and b
S0 = 0
S1 = 0
S00 = 0
S01 = 0
for i in range(n-1):
S0 = S0 + r[i]
S1 = S1 + r[i + 1]
S00 = S00 + r[i] * r[i]
S01 = S01 + r[i] * r[i + 1]
S0 = S0 / (n-1)
S1 = S1 / (n-1)
S00 = S00 / (n-1)
S01 = S01 / (n-1)
b_MLE = (S1 * S00 - S0 * S01) / (S0 * S1 - S0**2 - S01 + S00)
a_MLE = 1 / dt * np.log((S0 - b_MLE) / (S1 - b_MLE))
#estimation sigma
beta = 1 / a * (1 - np.exp(-a * dt))
temp = 0
for i in range(n-1):
mi = b * a * beta + r[i] * (1 - a * beta)
temp = temp + (r[i+1] - mi)**2
sigma_MLE = (1 / ((n - 1) * beta * (1 - .5 * a * beta)) * temp)**.5
return a_MLE, b_MLE, sigma_MLE
MLE_Estimate = Vasicek_MLE(simulated_rates, T / num_steps)
print("a_est: " + str(np.round(MLE_Estimate[0],3)))
print("b_est: " + str(np.round(MLE_Estimate[1],3)))
print("sigma_est: " + str(np.round(MLE_Estimate[2],3)))
Accuracy Estimates
# Model parameters
r0 = 0.02 # Initial short rate
a = 0.5 # Mean reversion speed
b = 0.03 # Long-term mean
sigma = 0.01 # Volatility
T = 10 # Time horizon
num_steps = 10000 # Number of steps
num_paths = 100 # Number of paths
a_est = []
b_est = []
sigma_est = []
for i in range(num_paths):
simulated_rates = vasicek(r0, a, b, sigma, T, num_steps, 1)
LS_Estimate = Vasicek_LS(simulated_rates, T / num_steps)
a_est.append(LS_Estimate[0])
b_est.append(LS_Estimate[1])
sigma_est.append(LS_Estimate[2])
plt.figure(figsize=(10, 6))
plt.title('Distribution a_estimate')
plt.hist(a_est,bins = 10);
plt.figure(figsize=(10, 6))
plt.title('Distribution b_estimate')
plt.hist(b_est,bins = 10);
plt.figure(figsize=(10, 6))
plt.title('Distribution sigma_stimate')
plt.hist(sigma_est, bins = 10);