本科毕业几个月才有时间整理一下自己的成果,希望能帮助更多的人了解数学模型。
通过本文模型可以简单的、较为精准的预测未来1-3期的数据(实测),本文应用的数据是来自国家统计网,代码可以借鉴,但请勿雷同。
1.什么是灰色模型
灰色模型的叙述网上可以找到很多资源,本文给出链接,读者可自行查看灰色预测模型GM(1,1) 与例题分析_DASEason的博客-CSDN博客_灰色预测模型
2.什么是马尔可夫模型
马尔可夫模型同理,本文给出链接,读者可自行查看
马尔科夫模型 Markov Model_-柚子皮-的博客-CSDN博客_马尔科夫模型
代码如下:
import pandas as pd
import numpy as np
# 级比检验
def level_ratio_test(x0):
n = len(x0)
# 公式2.1,求解级比
class_ratio = [x0[i] / x0[i + 1] for i in range(len(x0) - 1)]
print(f"级比:{class_ratio}")
# 级比范围la=lambda
min_la, max_la = min(class_ratio), max(class_ratio)
tared_la = [np.exp(-2 / (n + 2)), np.exp(2 / (n + 2))]
d = 0
if min_la < tared_la[0] or max_la > tared_la[-1]:
print("级比超过灰色模型的范围")
else:
print("级比满足要求,可用GM(1,1)模型")
return class_ratio, tared_la
# GM(1.1)模型
def predict(x0):
n = len(x0)
# 1-AGO
x1 = np.cumsum(x0)
# 给出一个n-1的0元组
z = np.zeros(n - 1)
# 邻值生成
for i in range(n - 1):
z[i] = 0.5 * (x1[i] + x1[i + 1])
print(f"z1的值{z}")
B = [-z, [1] * (n - 1)]
Y = x0[1:]
# 最小二乘法U[0]=a,U[1]=b
U = np.dot(np.linalg.inv(np.dot(B, np.transpose(B))), np.dot(B, Y))
print("a的值为", U[0])
print("b的值为", U[1])
print(f"序列Z1的值:{z}")
x1_solve = np.zeros(n + 1)
x0_solve = np.zeros(n)
x1_solve[0] = x0_solve[0] = x1[0]
for i in range(1, n + 1):
# 求预测值
x1_solve[i] = (x1[0] - U[1] / U[0]) * np.exp(-U[0] * i) + U[1] / U[0]
# x1_solve = [j - c for j in x1_solve]
for i in range(1, n):
x0_solve[i] = x1_solve[i] - x1_solve[i - 1]
predict_x0 = np.zeros(n)
for i in range(0, n):
predict_x0[i] = x0_solve[i]
print(f"预测值:{predict_x0}")
#验证集
# G = x1_solve[n] - x1_solve[n - 1]
# S = (x1[0] - U[1] / U[0]) * np.exp(-U[0] * 11) + U[1] / U[0] - x1_solve[n]
# V = (x1[0] - U[1] / U[0]) * np.exp(-U[0] * 12) + U[1] / U[0] - \
# ((x1[0] - U[1] / U[0]) * np.exp(-U[0] * 11) + U[1] / U[0])
# print(f"2019年预测值:{G}")
# print(f"2020年预测值:{S}")
# print(f"2021年预测值:{V}")
pre_2022 = (x1[0] - U[1] / U[0]) * np.exp(-U[0] * 13) + U[1] / U[0] - \
((x1[0] - U[1] / U[0]) * np.exp(-U[0] * 12) + U[1] / U[0])
pre_2023 = (x1[0] - U[1] / U[0]) * np.exp(-U[0] * 14) + U[1] / U[0] - \
((x1[0] - U[1] / U[0]) * np.exp(-U[0] * 13) + U[1] / U[0])
pre_2024 = (x1[0] - U[1] / U[0]) * np.exp(-U[0] * 15) + U[1] / U[0] - \
((x1[0] - U[1] / U[0]) * np.exp(-U[0] * 14) + U[1] / U[0])
print(f"2022年预测值:{pre_2022}")
print(f"2023年预测值:{pre_2023}")
print(f"2024年预测值:{pre_2024}")
return x0_solve, x1_solve, U
# 灰色精度检验
def accuracy(x0, x0_solve):
n = len(x0)
epsilon = x0 - x0_solve
segema = epsilon / x0
print(f"误差:{segema}")
Q = np.mean(abs(segema))
print(f"平均相对误差:{Q}")
return epsilon, segema
if __name__ == '__main__':
data = pd.DataFrame(
data={"year": [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021],
"eqLarry": [371273, 383600, 429994, 486455, 513626, 535863, 551522, 563938, 578045, 604368, 639666,
728627, 772761]})
x0 = np.array(data.iloc[:, 1])
class_ratio, tared_la = level_ratio_test(x0)
x0_solve, x1_solve, u = predict(x0)
epsilon, segema = accuracy(x0, x0_solve)
# main()
可以得到如下结果
级比:[0.9678649635036496, 0.8921054712391335, 0.8839337657131697, 0.9470996405945182, 0.958502453052366, 0.9716076602565265, 0.9779833953377853, 0.9755953256234376, 0.9564454107431234, 0.9448180769338999, 0.8779059793282434, 0.942887904539696]
级比满足要求,可用GM(1,1)模型
z1的值[ 563073. 969870. 1428094.5 1928135. 2452879.5 2996572. 3554302.
4125293.5 4716500. 5338517. 6022663.5 6773357.5]
a的值为 -0.053828999022545086
b的值为 382376.1481650299
序列Z1的值:[ 563073. 969870. 1428094.5 1928135. 2452879.5 2996572. 3554302.
4125293.5 4716500. 5338517. 6022663.5 6773357.5]
预测值:[371273. 413387.712306 436249.75954518 460376.17238692
485836.87546924 512705.66055129 541060.40038119 570983.27439155
602561.00687687 635885.11834322 671052.19075849 708164.14747219
747328.54861581]
2022年预测值:788658.9028402548
2023年预测值:832274.9962934209
2024年预测值:878303.2397917584
误差:[ 0. -0.07765306 -0.01454848 0.05360995 0.05410381 0.04321504
0.0189686 -0.01249299 -0.04241193 -0.05214889 -0.04906653 0.02808413
0.03291115]
平均相对误差:0.03686265795297468
import pandas as pd
import numpy as np
import math
# 状态区间
def states_learn(y0):
n = len(y0)
min_sta, max_sta = min(y0), max(y0)
states_avg = (max_sta - min_sta) / 3
states_M1 = [np.nan] * n
states_M2 = [np.nan] * n
states_M3 = [np.nan] * n
for i in range(0, n - 1):
if min_sta <= y0[i] <= min_sta + states_avg:
states_M1[i] = y0[i]
if min_sta + states_avg < y0[i] <= min_sta + 2 * states_avg:
states_M2[i] = y0[i]
if min_sta + 2 * states_avg < y0[i] <= max_sta:
states_M3[i] = y0[i]
print(f"处于状态M1{states_M1}")
print(f"处于状态M2{states_M2}")
print(f"处于状态M3{states_M3}")
return states_M1, states_M2, states_M3, min_sta, max_sta, states_avg
# 概率矩阵3*3
def transfer_mat(y0, y1):
global mat_M2
n = len(y0)
M1 = n - states_M1.count(np.nan)
M2 = n - states_M2.count(np.nan)
M3 = n - states_M3.count(np.nan)
if M1 != 0:
m11 = 0
for i in range(1, n):
while min_sta <= y0[i - 1] <= min_sta + states_avg and min_sta <= y0[i] <= min_sta + states_avg:
m11 += 1
break
m12 = 0
for i in range(1, n):
while min_sta <= y0[i - 1] <= min_sta + states_avg < y0[i] <= min_sta + 2 * states_avg:
m12 += 1
break
m13 = 0
for i in range(1, n):
while min_sta <= y0[i - 1] <= min_sta + states_avg and min_sta + 2 * states_avg < \
y0[i] <= max_sta:
m13 += 1
break
mat_M1 = [m11 / M1, m12 / M1, m13 / M1]
if M1 == 0:
mat_M1 = [0, 0, 0]
if M2 != 0:
m21 = 0
for i in range(1, n):
while min_sta + 2 * states_avg >= y0[i - 1] > min_sta + states_avg >= y0[i] >= min_sta:
m21 += 1
break
m22 = 0
for i in range(1, n):
while min_sta + states_avg < y0[i - 1] <= min_sta + 2 * states_avg and min_sta + states_avg < y0[i] \
<= min_sta + 2 * states_avg:
m22 += 1
break
m23 = 0
for i in range(1, n):
while min_sta + states_avg < y0[i - 1] <= min_sta + 2 * states_avg and min_sta \
+ 2 * states_avg < y0[i] <= max_sta:
m23 += 1
break
mat_M2 = [m21 / M2, m22 / M2, m23 / M2]
if M2 == 0:
mat_M2 = [0, 0, 0]
if M3 != 0:
m31 = 0
for i in range(1, n):
while min_sta + 2 * states_avg < y0[i - 1] <= max_sta and min_sta <= y0[i] <= min_sta + \
states_avg:
m31 += 1
break
m32 = 0
for i in range(1, n):
while max_sta >= y0[i - 1] > min_sta + 2 * states_avg >= y0[i] > min_sta + states_avg:
m32 += 1
break
m33 = 0
for i in range(1, n):
while min_sta + 2 * states_avg < y0[i - 1] <= max_sta and min_sta + 2 * states_avg < y0[i] \
<= max_sta:
m33 += 1
break
mat_M3 = [m31 / M3, m32 / M3, m33 / M3]
if M3 == 0:
mat_M3 = [0, 0, 0]
mat_tran = np.mat([mat_M1, mat_M2, mat_M3])
print("转移频数")
print(m11, m12, m13)
print(m21, m22, m23)
print(m31, m32, m33)
print(f"一步转移概率矩阵{mat_tran}")
mat_tran_2 = matrixPow(mat_tran, 2)
print(f"二步转移概率矩阵{mat_tran_2}")
mat_tran_3 = matrixPow(mat_tran, 3)
print(f"三步转移概率矩阵{mat_tran_3}")
# 最大概率
next_coin1 = max(mat_M1) * M1
next_coin2 = max(mat_M2) * M2
next_coin3 = max(mat_M3) * M3
y1_solve = np.zeros(n)
# 需要能判断转移概率相等时的程序,max只返回第一个最大值
for i in range(0, n):
while next_coin1 == m11 and next_coin1 == m12 and y0[i] == states_M1[i]:
y1_solve[i] = y1[i] / (1 - (min_sta + states_avg))
y0[i] = 'nan'
break
while next_coin1 == m11 and next_coin1 == m13 and y0[i] == states_M1[i]:
y1_solve[i] = y1[i] / (1 - (min_sta + 1.5 * states_avg))
y0[i] = 'nan'
break
while next_coin1 == m12 and next_coin1 == m13 and y0[i] == states_M1[i]:
y1_solve[i] = y1[i] / (1 - (min_sta + 2 * states_avg))
y0[i] = 'nan'
break
while next_coin2 == m21 and next_coin2 == m22 and y0[i] == states_M2[i]:
y1_solve[i] = y1[i] / (1 - min_sta + states_avg)
y0[i] = 'nan'
break
while next_coin2 == m21 and next_coin2 == m23 and y0[i] == states_M2[i]:
y1_solve[i] = y1[i] / (1 - (min_sta + 1.5 * states_avg))
y0[i] = 'nan'
break
while next_coin2 == m22 and next_coin2 == m23 and y0[i] == states_M2[i]:
y1_solve[i] = y1[i] / (1 - (min_sta + 2 * states_avg))
y0[i] = 'nan'
break
while next_coin3 == m31 and next_coin3 == m32 and y0[i] == states_M3[i]:
y1_solve[i] = y1[i] / (1 - min_sta + states_avg)
y0[i] = 'nan'
break
while next_coin3 == m31 and next_coin3 == m33 and y0[i] == states_M3[i]:
y1_solve[i] = y1[i] / (1 - (min_sta + 1.5 * states_avg))
y0[i] = 'nan'
break
while next_coin3 == m32 and next_coin3 == m33 and y0[i] == states_M3[i]:
y1_solve[i] = y1[i] / (1 - (min_sta + 2 * states_avg))
y0[i] = 'nan'
break
while next_coin1 == m11 and y0[i] == states_M1[i]:
y1_solve[i] = y1[i] / (1 - 1 / 2 * (min_sta + min_sta + states_avg))
break
while next_coin1 == m12 and y0[i] == states_M1[i]:
y1_solve[i] = y1[i] / (1 - 1 / 2 * (min_sta + states_avg + min_sta + 2 * states_avg))
break
while next_coin1 == m13 and y0[i] == states_M1[i]:
y1_solve[i] = y1[i] / (1 - 1 / 2 * (min_sta + 2 * states_avg + max_sta))
break
while next_coin2 == m21 and y0[i] == states_M2[i]:
y1_solve[i] = y1[i] / (1 - 1 / 2 * (min_sta + min_sta + states_avg))
break
while next_coin2 == m22 and y0[i] == states_M2[i]:
y1_solve[i] = y1[i] / (1 - 1 / 2 * (min_sta + states_avg + min_sta + 2 * states_avg))
break
while next_coin2 == m23 and y0[i] == states_M2[i]:
y1_solve[i] = y1[i] / (1 - 1 / 2 * (min_sta + 2 * states_avg + max_sta))
break
while next_coin3 == m31 and y0[i] == states_M3[i]:
y1_solve[i] = y1[i] / (1 - 1 / 2 * (min_sta + min_sta + states_avg))
break
while next_coin3 == m32 and y0[i] == states_M3[i]:
y1_solve[i] = y1[i] / (1 - 1 / 2 * (min_sta + states_avg + min_sta + 2 * states_avg))
break
while next_coin3 == m33 and y0[i] == states_M3[i]:
y1_solve[i] = y1[i] / (1 - 1 / 2 * (min_sta + 2 * states_avg + max_sta))
break
y1_solve[0] = y1[0]
y1_solve[n - 1] = y1[n - 1] / (1 - 1 / 2 * (min_sta + states_avg + min_sta + 2 * states_avg))
print(f"灰色马尔可夫模型训练集预测值{y1_solve}")
# 验证集
# # 灰色预测值650576.6408,状态区间为训练集最后一个灰色预测值所处的状态区间,值-0.02638119,位于状态2
# G = 650576.6408 / (1 - (min_sta + 1.5 * states_avg))
# # 由二步转移概率矩阵得处于状态3
# V = 682317.9328 / (1 - 1 / 2 * (min_sta + 2 * states_avg + max_sta))
# # 由三步转移概率矩阵得处于状态3
# S = 715607.8658 / (1 - 1 / 2 * (min_sta + 2 * states_avg + max_sta))
# print(f"2019年验证预测值:{G}")
# print(f"2020年验证预测值:{V}")
# print(f"2021年验证预测值:{S}")
# 预测集
pre_2022 = 788658.9028402548 / (1 - 1 / 2 * (min_sta + 2 * states_avg + max_sta))
pre_2023 = 832274.9962934209 / (1 - 1 / 2 * (min_sta + 2 * states_avg + max_sta))
pre_2024 = 878303.2397917584 / (1 - 1 / 2 * (min_sta + 2 * states_avg + max_sta))
print(f"2022年预测值:{pre_2022}")
print(f"2023年预测值:{pre_2023}")
print(f"2024年预测值:{pre_2024}")
return mat_tran, y1_solve
# 计算n步概率转移矩阵
def matrixPow(Matrix, n):
if (type(Matrix) == list):
Matrix = np.array(Matrix)
if (n == 1):
return Matrix
else:
return np.matmul(Matrix, matrixPow(Matrix, n - 1))
def accuracy(y1_solve):
x0 = [371273, 383600, 429994, 486455, 513626, 535863, 551522, 563938, 578045, 604368, ]
epsilon = x0 - y1_solve
schema = epsilon / x0
Q = np.mean(abs(schema))
print(f"三区间平均相对误差{Q}")
print(f"误差{schema}")
return Q
if __name__ == '__main__':
data = pd.DataFrame(data={"pre": [360213.9437, 399987.204, 440527.0594, 475235.0446, 504950.1046, 530390.4979,
552171.1576, 570818.5555, 586783.4277, 600451.6696,
],
"eli": [0.029786858, -0.04271951, -0.024495829, 0.023064735, 0.016891465, 0.010212502,
-0.001177029, -0.012200908, -0.01511721, 0.006480043,
]})
time_list = np.array(data.iloc[:, 1])
Prey_grey = np.array(data.iloc[:, 0])
y0 = time_list
y1 = Prey_grey
states_M1, states_M2, states_M3, min_sta, max_sta, states_avg = states_learn(y0)
transfer_ma, y1_solve = transfer_mat(y0, y1)
Q = accuracy(y1_solve)
通过修改概率矩阵的大小就可以轻松的得到划分空间不同的马尔科夫模型,这里留给读者,作者编写时有代入多步转移矩阵,上文的next_coin123分别表示一步,两步,三步转移矩阵,实质上是实现了的,以普通程序员的水准可以轻松读懂。
另附上数据误差检测网站,大家用了都说好,还是免费的
SPSSAU_相关|回归分析_因子|方差分析_SPSS下载-在线SPSS分析软件
在编写马尔科夫代码的过程中应当注意区间(开和闭),不然数据误差在数据量较小时会非常大。