Calibration of the Vasicek Model to Historical Data with Python Code

January 29, 2024

We present here two methods for calibrating the Vasicek model (link) to historical data:

  1. Least Squares
  2. Maximum Likelihood Estimation

The Python code is available below.

We summarize below quantitative finance training courses proposed by Quant Next. Courses are 100% digital, they are composed of many videos, quizzes, applications and tutorials in Python.

Complete training program:

Options, Pricing, and Risk Management Part I: introduction to derivatives, arbitrage free pricing, Black-Scholes model, option Greeks and risk management.

Options, Pricing, and Risk Management Part II: numerical methods for option pricing (Monte Carlo simulations, finite difference methods), replication and risk management of exotic options.

Options, Pricing, and Risk Management Part III:  modelling of the volatility surface,  parametric models with a focus on the SVI model, and stochastic volatility models with a focus on the Heston and the SABR models.

A la carte:

Monte Carlo Simulations for Option Pricing: introduction to Monte Carlo simulations, applications to price options, methods to accelerate computation speed (quasi-Monte Carlo, variance reduction, code optimisation).

Finite Difference Methods for Option Pricing: numerical solving of the Black-Scholes equation, focus on the three main methods: explicit, implicit and Crank-Nicolson.

Replication and Risk Management of Exotic Options: dynamic and static replication methods of exotic options with several concrete examples.

Volatility Surface Parameterization: the SVI Model: introduction on the modelling of the volatility surface implied by option prices, focus on the parametric methods, and particularly on the Stochastic Volatility Inspired (SVI) model and some of its extensions.

The SABR Model: deep dive on on the SABR (Stochastic Alpha Beta Rho) model, one popular stochastic volatility model developed to model the dynamic of the forward price and to price options.

The Heston Model for Option Pricing: deep dive on the Heston model, one of the most popular stochastic volatility model for the pricing of options.

Import Libraries

import matplotlib.pyplot as plt
plt.style.use('ggplot')
import math
import numpy as np
import pandas as pd
from scipy.stats import norm
%matplotlib inline
from sklearn.linear_model import LinearRegression

Simulation Vasicek Process with Euler-Maruyama Discretisation

def vasicek(r0, a, b, sigma, T, num_steps, num_paths):
    dt = T / num_steps
    rates = np.zeros((num_steps + 1, num_paths))
    rates[0] = r0
    
    for t in range(1, num_steps + 1):
        dW = np.random.normal(0, 1, num_paths)
        rates[t] = rates[t - 1] + a * (b - rates[t - 1]) * dt + sigma * np.sqrt(dt) * dW
    
    return rates
# Model parameters
r0 = 0.02  # Initial short rate
a = 0.5    # Mean reversion speed
b = 0.03  # Long-term mean
sigma = 0.01   # Volatility
T = 10      # Time horizon
num_steps = 10000  # Number of steps
num_paths = 20   # Number of paths

# Simulate Vasicek model
simulated_rates = vasicek(r0, a, b, sigma, T, num_steps, num_paths)

# Time axis
time_axis = np.linspace(0, T, num_steps + 1)

#average value
average_rates = [r0 * np.exp(-a * t) + b * (1 - np.exp(-a * t)) for t in time_axis]

# standard deviation
std_dev = [(sigma**2 / (2 * a) * (1 - np.exp(-2 * a * t)))**.5 for t in time_axis]

# Calculate upper and lower bounds (±2 sigma)
upper_bound = [average_rates[i] + 2 * std_dev[i] for i in range(len(time_axis))]
lower_bound = [average_rates[i] - 2 * std_dev[i] for i in range(len(time_axis))]

# Plotting multiple paths with time on x-axis
plt.figure(figsize=(10, 6))
plt.title('Vasicek Model - Simulated Interest Rate Paths')
plt.xlabel('Time (years)')
plt.ylabel('Interest Rate')
for i in range(num_paths):
    plt.plot(time_axis, simulated_rates[:, i])
    
plt.plot(time_axis, average_rates, color='black',linestyle='--', label ='Average', linewidth = 3)
plt.plot(time_axis, upper_bound, color='grey', linestyle='--', label='Upper Bound (2Σ)', linewidth = 3)
plt.plot(time_axis, lower_bound, color='grey', linestyle='--', label='Lower Bound (2Σ)', linewidth = 3)
plt.legend()
plt.show()

Real Parameters

# Model parameters we want to estimate
a = .5   # Mean reversion speed
b = 0.03  # Long-term mean
sigma = 0.01   # Volatility

Simulation one path

r0 = 0.03  # Initial short rate
T = 10     # Time horizon
num_steps = 10000  # Number of steps
num_paths = 1   # Number of paths

# Simulate Vasicek model
simulated_rates = vasicek(r0, a, b, sigma, T, num_steps, num_paths)

# Time axis
time_axis = np.linspace(0, T, num_steps + 1)

# Plotting multiple paths with time on x-axis
plt.figure(figsize=(10, 6))
plt.title('Simulation Interest Rates with Vasicek Model')
plt.xlabel('Time (years)')
plt.ylabel('Interest Rate')
for i in range(num_paths):
    plt.plot(time_axis, simulated_rates[:, i], color='red')

plt.show()

Least Squares

def Vasicek_LS(r, dt):
    
    #Linear Regression
    r0 = r[:-1,]
    r1 = r[1:, 0]
    reg = LinearRegression().fit(r0, r1)
    
    #estimation a and b
    a_LS = (1 - reg.coef_) / dt
    b_LS = reg.intercept_ / dt / a_LS
    
    #estimation sigma
    epsilon = r[1:, 0] - r[:-1,0] * reg.coef_
    sigma_LS = np.std(epsilon) / dt**.5

    return a_LS[0], b_LS[0], sigma_LS
LS_Estimate = Vasicek_LS(simulated_rates, T / num_steps)
print("a_est: " + str(np.round(LS_Estimate[0],3)))
print("b_est: " + str(np.round(LS_Estimate[1],3)))
print("sigma_est: " + str(np.round(LS_Estimate[2],3)))

Maximum Likelihood Estimation

def Vasicek_MLE(r, dt):
    r = r[:, 0]
    n = len(r)
    #estimation a and b
    S0 = 0
    S1 = 0
    S00 = 0
    S01 = 0
    for i in range(n-1):
        S0 = S0 + r[i]
        S1 = S1 + r[i + 1]
        S00 = S00 + r[i] * r[i]
        S01 = S01 + r[i] * r[i + 1]
    S0 = S0 / (n-1)
    S1 = S1 / (n-1)
    S00 = S00 / (n-1)
    S01 = S01 / (n-1)
    b_MLE = (S1 * S00 - S0 * S01) / (S0 * S1 - S0**2 - S01 + S00)
    a_MLE = 1 / dt * np.log((S0 - b_MLE) / (S1 - b_MLE))
    
    #estimation sigma
    beta = 1 / a * (1 - np.exp(-a * dt))
    temp = 0
    for i in range(n-1):
        mi = b * a * beta + r[i] * (1 - a * beta)
        temp = temp + (r[i+1] - mi)**2
    sigma_MLE = (1 / ((n - 1) * beta * (1 - .5 * a * beta)) * temp)**.5
    return a_MLE, b_MLE, sigma_MLE
MLE_Estimate = Vasicek_MLE(simulated_rates, T / num_steps)
print("a_est: " + str(np.round(MLE_Estimate[0],3)))
print("b_est: " + str(np.round(MLE_Estimate[1],3)))
print("sigma_est: " + str(np.round(MLE_Estimate[2],3)))

Accuracy Estimates

# Model parameters
r0 = 0.02  # Initial short rate
a = 0.5    # Mean reversion speed
b = 0.03  # Long-term mean
sigma = 0.01   # Volatility
T = 10      # Time horizon
num_steps = 10000  # Number of steps
num_paths = 100   # Number of paths

a_est = []
b_est = []
sigma_est = []
for i in range(num_paths):
    simulated_rates = vasicek(r0, a, b, sigma, T, num_steps, 1)
    LS_Estimate = Vasicek_LS(simulated_rates, T / num_steps)
    a_est.append(LS_Estimate[0])
    b_est.append(LS_Estimate[1])
    sigma_est.append(LS_Estimate[2])
plt.figure(figsize=(10, 6))
plt.title('Distribution a_estimate')
plt.hist(a_est,bins = 10);
plt.figure(figsize=(10, 6))
plt.title('Distribution b_estimate')
plt.hist(b_est,bins = 10);
plt.figure(figsize=(10, 6))
plt.title('Distribution sigma_stimate')
plt.hist(sigma_est, bins = 10);
To go further...