本章会用到 金融数据分析库 DX
下载地址:https://github.com/yhilpisch/dx
找到setup.py,手动安装:python setup.py install
我们要用到的很多模型这个库中都有
参考:Python金融大数据分析——第10章 推断统计学
生成标准正态分布随机数的函数
# 生成标准正态分布随机数的函数
import numpy as np
def sn_random_numbers(shape, antithetic=True, moment_matching=True, fixed_seed=False):
"""
Returns an array of shape shape with (pseudo) random numbers
that are standard normally distributed.
:param shape: tuple(0,n,m)
generation of array with shape(0,n,m)
:param antithetic: Boolean
generation of antithetic variates
:param moment_matching: Boolean
matching of first and second moments
:param fixed_seed: Boolean
flag to fix and seed
:return:
ran:(0,n,m) array of (pseudo) random numbers
"""
if fixed_seed:
np.random.seed(1000)
if antithetic:
ran = np.random.standard_normal((shape[0], shape[1], shape[2] / 2))
ran = np.concatenate((ran, -ran), axis=2)
else:
ran = np.random.standard_normal(shape)
if moment_matching:
ran = ran - np.mean(ran)
ran = ran / np.std(ran)
if shape[0] == 1:
return ran[0]
else:
return ran
snrn = sn_random_numbers((2, 2, 2), antithetic=False, moment_matching=False, fixed_seed=True)
snrn
# array([[[-0.8044583 , 0.32093155],
# [-0.02548288, 0.64432383]],
# [[-0.30079667, 0.38947455],
# [-0.1074373 , -0.47998308]]])
snrn_mm = sn_random_numbers((2, 3, 2), antithetic=False, moment_matching=True, fixed_seed=True)
snrn_mm
# array([[[-1.47414161, 0.67072537],
# [ 0.01049828, 1.28707482],
# [-0.51421897, 0.80136066]],
# [[-0.14569767, -0.85572818],
# [ 1.19313679, -0.82653845],
# [ 1.3308292 , -1.47730025]]])
snrn_mm.mean()
# 3.700743415417188e-17
snrn_mm.std()
# 1.0
面向对象建模允许属性和方法的继承。这是我们在构建模拟类时想要利用的:从一个泛型模拟类开始,它包含所有其他模拟类共享的属性和方法。
首先要注意的是, 我们实例化任何模拟类的一个对象都 “ 只” 提供3个属性:
name:用作模型模拟对象名称的字符科才象
mar_env : maket_environment 类的一个实例
corr:表示对象是否相关的一个标志(布尔型)
这再次说明了市场环境的作用:在一步中提供模拟和估值所需的所有数据和对象。泛型类的方法如下:
generate_time_grid:
这个方法生成模拟所用的相关日期的 “时间网格”。这个任务对于每个模拟类都相同。
get_instument_values:
每个模拟类都必须返回包含模拟金融工具价值的 ndarray 对象(例如模拟的股票价格, 商品价格, 波动率)。
泛型金融模型模拟类
# 泛型金融模型模拟类
import numpy as np
import pandas as pd
class simulation_class(object):
"""
Providing base methods for simulation classes.
"""
def __init__(self, name, mar_env, corr):
"""
:param name: string
name of the object
:param mar_env: instance of market_environment
market environment data for simulation
:param corr: Boolean
True of correlated with other model object
"""
try:
self.name = name
self.pricing_data = mar_env.pricing_date
self.initial_value = mar_env.get_constant('initial_value')
self.volatility = mar_env.get_constant('volatility')
self.final_date = mar_env.get_constant('final_date')
self.currency = mar_env.get_constant('currency')
self.frequency = mar_env.get_constant('frequency')
self.paths = mar_env.get_constant('paths')
self.discount_curve = mar_env.get_curve('discount_curve')
try:
# if time_grid in mar_env take this
# (for portfolio valuation)
self.time_grid = mar_env.get_list('time_grid')
except:
self.time_grid = None
try:
# if there are special dates, then add these
self.special_dates = mar_env.get_list('special_dates')
except:
self.special_dates = []
self.instrument_values = None
self.correlated = corr
if corr:
# only needed in a portfolio context when
# risk factors are correlated
self.cholesky_matrix = mar_env.get_list('cholesky_matrix')
self.rn_set = mar_env.get_list('rn_set')[self.name]
self.random_numbers = mar_env.get_list('random_numbers')
except:
print("Error parsing market environment.")
def generate_time_grid(self):
start = self.pricing_data
end = self.final_date
# pandas date_range function
# freq = e.g. 'B' for Business Day,
# 'W' for Weekly, 'M'for Monthly
time_grid = pd.date_range(start=start, end=end, freq=self.frequency).to_pydatetime()
time_grid = list(time_grid)
if start not in time_grid:
time_grid.insert(0, start) # insert start date if not in list
if end not in time_grid:
time_grid.append(end) # insert end date if not in list
if len(self.special_dates) > 0:
# add all special dates
time_grid.extend(self.special_dates)
# delete duplicates
time_grid = list(set(time_grid))
# sort list
time_grid.sort()
self.time_grid = np.array(time_grid)
def get_instrument_values(self, fixed_seed=True):
if self.instrument_values is None:
# only initiate simulation if there are no instrument values
self.generate_paths(fixed_seed=fixed_seed, day_count=365.)
elif fixed_seed is False:
# also initiate resimulation when fixed_seed is False
self.generate_paths(fixed_seed=fixed_seed, day_count=365.)
return self.instrument_values
所有模拟类的市场环境元素
元素 | 类型 | 强制 | 描述 |
---|---|---|---|
initial_value | 常量 | 是 | pricing_date(定价日)时的过程初始值 |
volatility | 常量 | 是 | 过程的波动性系数 |
final_date | 常量 | 是 | 模拟范围 |
cuπency | 常量 | 是 | 金融实体的货币 |
fequency | 常量 | 是 | 日期频率,和pandas freq参数相同 |
paths | 常量 | 是 | 模拟路径数量 |
discount_curve | 曲线 | 是 | constant_short_rate 实例 |
time_grid | 列表 | 否 | 相关日期的时间网格(在投资组合背景下) |
random_numbers | 列表 | 否 | 随机数数组(用于相关对象) |
cholesky_matrix | 列表 | 否 | Cholesky 矩阵(用于相关对象) |
rn_set | 列表 | 否 | 包含指向相关随机数值指针的字典对象 |
公式 几何布朗运动的随机微分方程
公式 模拟几何布朗运动的微分方程
import numpy as np
class geometric_brownian_motion(simulation_class):
"""
Class to generate simulated paths based on
the Black_Scholes-Merton geometric Brownian motion model.
"""
def __init__(self, name, mar_env, corr=False):
super(geometric_brownian_motion, self).__init__(name, mar_env, corr)
def update(self, initial_value=None, volatility=None, final_date=None):
if initial_value is not None:
self.initial_value = initial_value
if volatility is not None:
self.volatility = volatility
if final_date is not None:
self.final_date = final_date
self.instrument_values = None
def generate_paths(self, fixed_seed=False, day_count=365.):
if self.time_grid is None:
self.generate_time_grid()
M = len(self.time_grid)
I = self.paths
paths = np.zeros((M, I))
paths[0] = self.initial_value
if not self.correlated:
rand = sn_random_numbers((1, M, I), fixed_seed=fixed_seed)
else:
rand = self.random_numbers
short_rate = self.discount_curve.short_rate
for t in range(1, len(self.time_grid)):
if not self.correlated:
ran = rand[t]
else:
ran = np.dot(self.cholesky_matrix, rand[:, t, :])
rand = ran[self.rn_set]
dt = (self.time_grid[t] - self.time_grid[t - 1]).days / day_count
paths[t] = paths[t - 1] * np.exp((short_rate - 0.5 * self.volatility ** 2) * dt
+ self.volatility * np.sqrt(dt) * ran)
self.instrument_values = paths
from dx import *
me_gbm = market_environment('me_gbm', dt.datetime(2018, 1, 1))
me_gbm.add_constant('initial_value', 36.)
me_gbm.add_constant('volatility', 0.2)
me_gbm.add_constant('final_date', dt.datetime(2018, 12, 31))
me_gbm.add_constant('currency', 'EUR')
me_gbm.add_constant('frequency', 'M')
me_gbm.add_constant('paths', 10000)
csr = constant_short_rate('csr', 0.05)
me_gbm.add_curve('discount_curve', csr)
gbm = geometric_brownian_motion('gbm', me_gbm)
gbm.generate_time_grid()
gbm.time_grid
# array([datetime.datetime(2018, 1, 1, 0, 0),
# datetime.datetime(2018, 1, 31, 0, 0),
# datetime.datetime(2018, 2, 28, 0, 0),
# datetime.datetime(2018, 3, 31, 0, 0),
# datetime.datetime(2018, 4, 30, 0, 0),
# datetime.datetime(2018, 5, 31, 0, 0),
# datetime.datetime(2018, 6, 30, 0, 0),
# datetime.datetime(2018, 7, 31, 0, 0),
# datetime.datetime(2018, 8, 31, 0, 0),
# datetime.datetime(2018, 9, 30, 0, 0),
# datetime.datetime(2018, 10, 31, 0, 0),
# datetime.datetime(2018, 11, 30, 0, 0),
# datetime.datetime(2018, 12, 31, 0, 0)], dtype=object)
%time paths_1 = gbm.get_instrument_values()
# Wall time: 8.99 ms
paths_1
# array([[36. , 36. , 36. , ..., 36. ,
# 36. , 36. ],
# [37.37221481, 38.08890977, 34.37156575, ..., 36.22258915,
# 35.05503522, 39.63544014],
# [39.45866146, 42.18817025, 32.38579992, ..., 34.80319951,
# 33.60600939, 37.62733874],
# ...,
# [40.15717404, 33.16701733, 23.32556112, ..., 37.5619937 ,
# 29.89282508, 30.2202427 ],
# [42.0974104 , 36.59006321, 21.70771374, ..., 35.70950512,
# 30.64670854, 30.45901309],
# [43.33170027, 37.42993532, 23.8840177 , ..., 35.92624556,
# 27.87720187, 28.77424561]])
gbm.update(volatility=0.5)
%time paths_2 = gbm.get_instrument_values()
# Wall time: 8 ms
# GBM 模拟类中的模拟路程
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 6))
p1 = plt.plot(gbm.time_grid, paths_1[:, :10], 'b')
p2 = plt.plot(gbm.time_grid, paths_2[:, :10], 'r-.')
plt.grid(True)
l1 = plt.legend([p1[0], p2[0]], ['low volatility', 'high volatility'], loc=2)
plt.gca().add_artist(l1)
plt.xticks(rotation=30)
公式 Meron跳跃扩散模型的随分方程
下面公式介绍一种用于跳跃扩散的欧拉离散化公式,其中 znt z t n 呈标准正态分布,yt呈密度为λ的泊松分布。
公式 Meron跳跃扩散模型的欧拉离散化
#
# DX Analytics
# Base Classes and Model Classes for Simulation
# jump_diffusion.py
#
class jump_diffusion(simulation_class):
''' Class to generate simulated paths based on
the Merton (1976) jump diffusion model.
Attributes
==========
name : string
name of the object
mar_env : instance of market_environment
market environment data for simulation
corr : boolean
True if correlated with other model object
Methods
=======
update :
updates parameters
generate_paths :
returns Monte Carlo paths given the market environment
'''
def __init__(self, name, mar_env, corr=False):
super(jump_diffusion, self).__init__(name, mar_env, corr)
try:
self.lamb = mar_env.get_constant('lambda')
self.mu = mar_env.get_constant('mu')
self.delt = mar_env.get_constant('delta')
except:
print('Error parsing market environment.')
def update(self, pricing_date=None, initial_value=None,
volatility=None, lamb=None, mu=None, delta=None,
final_date=None):
if pricing_date is not None:
self.pricing_date = pricing_date
self.time_grid = None
self.generate_time_grid()
if initial_value is not None:
self.initial_value = initial_value
if volatility is not None:
self.volatility = volatility
if lamb is not None:
self.lamb = lamb
if mu is not None:
self.mu = mu
if delta is not None:
self.delt = delta
if final_date is not None:
self.final_date = final_date
self.instrument_values = None
def generate_paths(self, fixed_seed=False, day_count=365.):
if self.time_grid is None:
self.generate_time_grid()
# method from generic model simulation class
# number of dates for time grid
M = len(self.time_grid)
# number of paths
I = self.paths
# array initialization for path simulation
paths = np.zeros((M, I))
# initialize first date with initial_value
paths[0] = self.initial_value
if self.correlated is False:
# if not correlated generate random numbers
sn1 = sn_random_numbers((1, M, I),
fixed_seed=fixed_seed)
else:
# if correlated use random number object as provided
# in market environment
sn1 = self.random_numbers
# Standard normally distributed seudo-random numbers
# for the jump component
sn2 = sn_random_numbers((1, M, I),
fixed_seed=fixed_seed)
forward_rates = self.discount_curve.get_forward_rates(
self.time_grid, self.paths, dtobjects=True)[1]
rj = self.lamb * (np.exp(self.mu + 0.5 * self.delt ** 2) - 1)
for t in range(1, len(self.time_grid)):
# select the right time slice from the relevant
# random number set
if self.correlated is False:
ran = sn1[t]
else:
# only with correlation in portfolio context
ran = np.dot(self.cholesky_matrix, sn1[:, t, :])
ran = ran[self.rn_set]
dt = (self.time_grid[t] - self.time_grid[t - 1]).days / day_count
# difference between two dates as year fraction
poi = np.random.poisson(self.lamb * dt, I)
# Poisson distributed pseudo-random numbers for jump component
rt = (forward_rates[t - 1] + forward_rates[t]) / 2
paths[t] = paths[t - 1] * (
np.exp((rt - rj - 0.5 * self.volatility ** 2) * dt +
self.volatility * np.sqrt(dt) * ran) +
(np.exp(self.mu + self.delt * sn2[t]) - 1) * poi)
self.instrument_values = paths
jump_diffusion 类的特殊市场环境元素
元素 | 类型 | 强制 | 描述 |
---|---|---|---|
lambda | 常量 | 是 | 跳跃密度(概率、按年) |
mu | 常量 | 是 | 预期跳跃规律 |
delta | 常量 | 是 | 跳跃规律的标准差 |
me_jd = market_environment('me_jd', dt.datetime(2018, 1, 1))
me_jd.add_constant('lambda', 0.3)
me_jd.add_constant('mu', -0.75)
me_jd.add_constant('delta', 0.1)
me_jd.add_environment(me_gbm)
jd = jump_diffusion('jd', me_jd)
%time paths_3 = jd.get_instrument_values()
# Wall time: 21 ms
jd.update(lamb=0.9)
%time paths_4 = jd.get_instrument_values()
# Wall time: 19 ms
plt.figure(figsize=(8, 6))
p1 = plt.plot(gbm.time_grid, paths_3[:, :10], 'b')
p2 = plt.plot(gbm.time_grid, paths_4[:, :10], 'r-.')
plt.grid(True)
l1 = plt.legend([p1[0], p2[0]], ['low volatility', 'high volatility'], loc=2)
plt.gca().add_artist(l1)
plt.xticks(rotation=30)
公式 平方根扩散的随机微分方程
众所周知, xt x t 的值呈卡方分布。但是,许多金融模型可以使用正态分布进行离散化和近似计算(即所谓的欧拉离散化格式)。虽然欧拉格式对几何布朗运动很准确 ,但是对于大部分其他随机过程则会产生偏差。即使有精确的格式,因为数值化或者计算的原因,使欧拉格式可能最合适。定义 s≡t−Δt s ≡ t − Δ t 和 x+≡max(x,0) x + ≡ m a x ( x , 0 ) ,下面公式提出了一种欧拉格式。这种特殊格式在文献中通常称作完全截断。
公式 平方根扩散的欧拉离散化
#
# DX Analytics
# Base Classes and Model Classes for Simulation
# square_root_diffusion.py
#
class square_root_diffusion(simulation_class):
''' Class to generate simulated paths based on
the Cox-Ingersoll-Ross (1985) square-root diffusion.
Attributes
==========
name : string
name of the object
mar_env : instance of market_environment
market environment data for simulation
corr : boolean
True if correlated with other model object
Methods
=======
update :
updates parameters
generate_paths :
returns Monte Carlo paths given the market environment
'''
def __init__(self, name, mar_env, corr=False):
super(square_root_diffusion, self).__init__(name, mar_env, corr)
try:
self.kappa = mar_env.get_constant('kappa')
self.theta = mar_env.get_constant('theta')
except:
print('Error parsing market environment.')
def update(self, pricing_date=None, initial_value=None, volatility=None,
kappa=None, theta=None, final_date=None):
if pricing_date is not None:
self.pricing_date = pricing_date
self.time_grid = None
self.generate_time_grid()
if initial_value is not None:
self.initial_value = initial_value
if volatility is not None:
self.volatility = volatility
if kappa is not None:
self.kappa = kappa
if theta is not None:
self.theta = theta
if final_date is not None:
self.final_date = final_date
self.instrument_values = None
def generate_paths(self, fixed_seed=True, day_count=365.):
if self.time_grid is None:
self.generate_time_grid()
M = len(self.time_grid)
I = self.paths
paths = np.zeros((M, I))
paths_ = np.zeros_like(paths)
paths[0] = self.initial_value
paths_[0] = self.initial_value
if self.correlated is False:
rand = sn_random_numbers((1, M, I),
fixed_seed=fixed_seed)
else:
rand = self.random_numbers
for t in range(1, len(self.time_grid)):
dt = (self.time_grid[t] - self.time_grid[t - 1]).days / day_count
if self.correlated is False:
ran = rand[t]
else:
ran = np.dot(self.cholesky_matrix, rand[:, t, :])
ran = ran[self.rn_set]
# full truncation Euler discretization
paths_[t] = (paths_[t - 1] + self.kappa *
(self.theta - np.maximum(0, paths_[t - 1])) * dt +
np.sqrt(np.maximum(0, paths_[t - 1])) *
self.volatility * np.sqrt(dt) * ran)
paths[t] = np.maximum(0, paths_[t])
self.instrument_values = paths
square_root_diffusion 类市场环境的特定元素
元素 | 类型 | 强制 | 描述 |
---|---|---|---|
kappa | 常量 | 是 | 均值回归因子 |
theta | 常量 | 是 | 过程长期均值 |
me_srd = market_environment('me_srd', dt.datetime(2018, 1, 1))
me_srd.add_constant('initial_value', 0.25)
me_srd.add_constant('volatility', 0.05)
me_srd.add_constant('final_date', dt.datetime(2018,12,31))
me_srd.add_constant('currency', 'EUR')
me_srd.add_constant('frequency', 'W')
me_srd.add_constant('paths', 10000)
me_srd.add_constant('kappa',4.0)
me_srd.add_constant('theta',0.2)
me_srd.add_curve('discount_curve',constant_short_rate('r',0.0))
srd=square_root_diffusion('srd',me_srd)
srd_paths=srd.get_instrument_values()[:,:10]
plt.figure(figsize=(8, 6))
p1 = plt.plot(srd.time_grid, srd.get_instrument_values()[:, :10])
plt.axhline(me_srd.get_constant('theta'), color='r', ls='--', lw=2.0)
plt.grid(True)
plt.xticks(rotation=30)
来自平方根扩融模拟类的模拟路径(虚线=长期均值 theta)
^_^