python按时间坐标预测销量

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import datetime

import warnings
warnings.filterwarnings("ignore")

import statsmodels.api as sm
from statsmodels.graphics.api import qqplot
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller as ADF

## 设置字符集,防止中文乱码
mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False


data=pd.read_excel("天猫销售数据.xlsx")
data.info()
#fx=lambda x: pd.datetime.strptime(x,"%Y/%m/%d")
#data["Date"]=data["Date"].apply(fx)
df=data.set_index("Date") #将Date设置为索引
df
df=pd.Series(df.Orders,index=df.index ) 

fig=plt.figure(figsize=(10,4))
ax=fig.add_subplot(111)
plt.plot(df)
ax.set(title="Orders of Date",
      ylabel="Orders",
      xlabel="Date")
plt.show()

## 做差分,检查平稳性
def diff(timeseries):
    time_diff1=timeseries.diff(1).fillna(0) #1阶差分
    time_diff2=time_diff1.diff(1).fillna(0) #2阶差分

    time_adf=ADF(timeseries)
    time_diff1_adf=ADF(time_diff1)
    time_diff2_adf=ADF(time_diff2)

    return [time_diff1_adf,time_diff2_adf]

diff(df)
#[(-3.683025128820224,
#  0.004358356299291195,
#  10,
#  138,
#  {'1%': -3.47864788917503,
#   '5%': -2.882721765644168,
#   '10%': -2.578065326612056},
#  1423.5325819802563),
# (-9.388939765399352,
#  6.641377737915045e-16,
#  9,
#  139,
#  {'1%': -3.4782936965183815,
#   '5%': -2.882567574015525,
#   '10%': -2.5779830117488745},
#  1434.1670882621088)]
# 
 
def autocorr(time_series,lags):
    fig=plt.figure(figsize=(12,8))
    ax1=fig.add_subplot(211)
    sm.graphics.tsa.plot_acf(time_series,lags=lags,ax=ax1)
    
    ax2=fig.add_subplot(212)
    sm.graphics.tsa.plot_pacf(time_series,lags=lags,ax=ax2)
    
    plt.show()

time_diff1=df.diff(1).fillna(0) 
autocorr(time_diff1,30)  

data_eva=sm.tsa.arma_order_select_ic(df,ic=["aic","bic"],trend="nc",max_ar=7,max_ma=7)
print("data_AIC",data_eva.aic_min_order)
print("data_BIC",data_eva.bic_min_order)
#
#data_AIC (7, 7)
#data_BIC (1, 1)
#data_AIC (1, 3)
#data_BIC (1, 1)


arma_77=sm.tsa.SARIMAX(df,order=(3,1,1)).fit()
print("arma_77",arma_77.aic,arma_77.bic,arma_77.hqic)

arma_71=sm.tsa.SARIMAX(df,order=(1,1,7)).fit()
print("arma_71",arma_71.aic,arma_71.bic,arma_71.hqic)

arma_11=sm.tsa.SARIMAX(df,order=(1,1,1)).fit()
print("arma_11",arma_11.aic,arma_11.bic,arma_11.hqic)

arma_35=sm.tsa.SARIMAX(df,order=(3,1,5)).fit()
print("arma_35",arma_35.aic,arma_35.bic,arma_35.hqic)


arma_77.plot_diagnostics(figsize=(12,8))

 # D-W检验
 # DW趋近2,P=0,不存在自相关性
print(sm.stats.durbin_watson(arma_77.resid.values))

resid=arma_77.resid
fig=plt.figure(figsize=(16,12))
ax1=fig.add_subplot(211)
sm.graphics.tsa.plot_acf(resid,lags=15,ax=ax1)  #自相关系数

ax2=fig.add_subplot(212)
sm.graphics.tsa.plot_pacf(resid,lags=15,ax=ax2)  #偏相关系数
acf,q,p=sm.tsa.acf(resid.values.squeeze(),nlags=20,qstat=True)
data=np.c_[range(1,21),acf[1:],q,p]
table=pd.DataFrame(data,columns=["lag","AC","Q","P-value"])
print(table.set_index("lag"))



pre=arma_77.predict("2020-07-01","2020-07-23",dynamic=True)
#绘制预测曲线图
fig,ax=plt.subplots(figsize=(12,8))
ax=df.ix["2020-04-05":].plot(ax=ax)
fig=arma_77.predict("2020-07-01","2020-07-23",dynamic=True,ax=ax,plot_insample=False).plot(style="r-.")
plt.title("未来10天的销售额预测",fontsize=20)
plt.show()

你可能感兴趣的:(python基础)