Calibration of the Vasicek Model to Historical Data with Python Code

January 29, 2024

We present here two methods for calibrating the Vasicek model (link) to historical data:

  1. Least Squares
  2. Maximum Likelihood Estimation

The Python code is available below.

Import Libraries

import matplotlib.pyplot as plt
plt.style.use('ggplot')
import math
import numpy as np
import pandas as pd
from scipy.stats import norm
%matplotlib inline
from sklearn.linear_model import LinearRegression

Simulation Vasicek Process with Euler-Maruyama Discretisation

def vasicek(r0, a, b, sigma, T, num_steps, num_paths):
    dt = T / num_steps
    rates = np.zeros((num_steps + 1, num_paths))
    rates[0] = r0
    
    for t in range(1, num_steps + 1):
        dW = np.random.normal(0, 1, num_paths)
        rates[t] = rates[t - 1] + a * (b - rates[t - 1]) * dt + sigma * np.sqrt(dt) * dW
    
    return rates
# Model parameters
r0 = 0.02  # Initial short rate
a = 0.5    # Mean reversion speed
b = 0.03  # Long-term mean
sigma = 0.01   # Volatility
T = 10      # Time horizon
num_steps = 10000  # Number of steps
num_paths = 20   # Number of paths

# Simulate Vasicek model
simulated_rates = vasicek(r0, a, b, sigma, T, num_steps, num_paths)

# Time axis
time_axis = np.linspace(0, T, num_steps + 1)

#average value
average_rates = [r0 * np.exp(-a * t) + b * (1 - np.exp(-a * t)) for t in time_axis]

# standard deviation
std_dev = [(sigma**2 / (2 * a) * (1 - np.exp(-2 * a * t)))**.5 for t in time_axis]

# Calculate upper and lower bounds (±2 sigma)
upper_bound = [average_rates[i] + 2 * std_dev[i] for i in range(len(time_axis))]
lower_bound = [average_rates[i] - 2 * std_dev[i] for i in range(len(time_axis))]

# Plotting multiple paths with time on x-axis
plt.figure(figsize=(10, 6))
plt.title('Vasicek Model - Simulated Interest Rate Paths')
plt.xlabel('Time (years)')
plt.ylabel('Interest Rate')
for i in range(num_paths):
    plt.plot(time_axis, simulated_rates[:, i])
    
plt.plot(time_axis, average_rates, color='black',linestyle='--', label ='Average', linewidth = 3)
plt.plot(time_axis, upper_bound, color='grey', linestyle='--', label='Upper Bound (2Σ)', linewidth = 3)
plt.plot(time_axis, lower_bound, color='grey', linestyle='--', label='Lower Bound (2Σ)', linewidth = 3)
plt.legend()
plt.show()

Real Parameters

# Model parameters we want to estimate
a = .5   # Mean reversion speed
b = 0.03  # Long-term mean
sigma = 0.01   # Volatility

Simulation one path

r0 = 0.03  # Initial short rate
T = 10     # Time horizon
num_steps = 10000  # Number of steps
num_paths = 1   # Number of paths

# Simulate Vasicek model
simulated_rates = vasicek(r0, a, b, sigma, T, num_steps, num_paths)

# Time axis
time_axis = np.linspace(0, T, num_steps + 1)

# Plotting multiple paths with time on x-axis
plt.figure(figsize=(10, 6))
plt.title('Simulation Interest Rates with Vasicek Model')
plt.xlabel('Time (years)')
plt.ylabel('Interest Rate')
for i in range(num_paths):
    plt.plot(time_axis, simulated_rates[:, i], color='red')

plt.show()

Least Squares

def Vasicek_LS(r, dt):
    
    #Linear Regression
    r0 = r[:-1,]
    r1 = r[1:, 0]
    reg = LinearRegression().fit(r0, r1)
    
    #estimation a and b
    a_LS = (1 - reg.coef_) / dt
    b_LS = reg.intercept_ / dt / a_LS
    
    #estimation sigma
    epsilon = r[1:, 0] - r[:-1,0] * reg.coef_
    sigma_LS = np.std(epsilon) / dt**.5

    return a_LS[0], b_LS[0], sigma_LS
LS_Estimate = Vasicek_LS(simulated_rates, T / num_steps)
print("a_est: " + str(np.round(LS_Estimate[0],3)))
print("b_est: " + str(np.round(LS_Estimate[1],3)))
print("sigma_est: " + str(np.round(LS_Estimate[2],3)))

Maximum Likelihood Estimation

def Vasicek_MLE(r, dt):
    r = r[:, 0]
    n = len(r)
    #estimation a and b
    S0 = 0
    S1 = 0
    S00 = 0
    S01 = 0
    for i in range(n-1):
        S0 = S0 + r[i]
        S1 = S1 + r[i + 1]
        S00 = S00 + r[i] * r[i]
        S01 = S01 + r[i] * r[i + 1]
    S0 = S0 / (n-1)
    S1 = S1 / (n-1)
    S00 = S00 / (n-1)
    S01 = S01 / (n-1)
    b_MLE = (S1 * S00 - S0 * S01) / (S0 * S1 - S0**2 - S01 + S00)
    a_MLE = 1 / dt * np.log((S0 - b_MLE) / (S1 - b_MLE))
    
    #estimation sigma
    beta = 1 / a * (1 - np.exp(-a * dt))
    temp = 0
    for i in range(n-1):
        mi = b * a * beta + r[i] * (1 - a * beta)
        temp = temp + (r[i+1] - mi)**2
    sigma_MLE = (1 / ((n - 1) * beta * (1 - .5 * a * beta)) * temp)**.5
    return a_MLE, b_MLE, sigma_MLE
MLE_Estimate = Vasicek_MLE(simulated_rates, T / num_steps)
print("a_est: " + str(np.round(MLE_Estimate[0],3)))
print("b_est: " + str(np.round(MLE_Estimate[1],3)))
print("sigma_est: " + str(np.round(MLE_Estimate[2],3)))

Accuracy Estimates

# Model parameters
r0 = 0.02  # Initial short rate
a = 0.5    # Mean reversion speed
b = 0.03  # Long-term mean
sigma = 0.01   # Volatility
T = 10      # Time horizon
num_steps = 10000  # Number of steps
num_paths = 100   # Number of paths

a_est = []
b_est = []
sigma_est = []
for i in range(num_paths):
    simulated_rates = vasicek(r0, a, b, sigma, T, num_steps, 1)
    LS_Estimate = Vasicek_LS(simulated_rates, T / num_steps)
    a_est.append(LS_Estimate[0])
    b_est.append(LS_Estimate[1])
    sigma_est.append(LS_Estimate[2])
plt.figure(figsize=(10, 6))
plt.title('Distribution a_estimate')
plt.hist(a_est,bins = 10);
plt.figure(figsize=(10, 6))
plt.title('Distribution b_estimate')
plt.hist(b_est,bins = 10);
plt.figure(figsize=(10, 6))
plt.title('Distribution sigma_stimate')
plt.hist(sigma_est, bins = 10);
To go further...