import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_squared_error
from math import sqrt
# 假设你已经有一个CSV文件,其中包含日期和相应的温度与湿度数据
file_path = 'weather_data.csv'
# 加载数据
df = pd.read_csv(file_path, parse_dates=['date'], index_col='date')
# 探索数据
print(df.head())
print(df.describe())
# 数据可视化
plt.figure(figsize=(12, 6))
plt.plot(df['temperature'], label='Temperature')
plt.plot(df['humidity'], label='Humidity')
plt.legend()
plt.title('Temperature and Humidity Over Time')
plt.show()
# 差分以确保数据平稳
def difference(dataset, interval=1):
diffed_data = list()
for i in range(interval, len(dataset)):
diff = dataset[i] - dataset[i - interval]
diffed_data.append(diff)
return diffed_data
# 以温度数据为例,进行一阶差分
temperature_diff = difference(df['temperature'], 1)
# 绘制差分后的数据图
plt.figure(figsize=(12, 6))
plt.plot(temperature_diff)
plt.title('Temperature Difference')
plt.show()
# ACF和PACF图确定p, d, q参数
plot_acf(temperature_diff, lags=50)
plot_pacf(temperature_diff, lags=50)
plt.show()
# 假设通过ACF和PACF图我们选择了p=1, d=1, q=1
p, d, q = 1, 1, 1
# 拟合SARIMA模型
model = SARIMAX(df['temperature'], order=(p, d, q), seasonal_order=(0, 0, 0, 0))
model_fit = model.fit()
# 模型摘要
print(model_fit.summary())
# 预测未来值
forecast = model_fit.forecast(steps=5)
confidence_intervals = forecast.conf_int()
# 绘制预测结果和置信区间
plt.figure(figsize=(12, 6))
plt.plot(df['temperature'], label='Observed')
plt.plot(forecast, label='Forecast', color='red')
plt.fill_between(confidence_intervals.index, confidence_intervals.iloc[:, 0], confidence_intervals.iloc[:, 1], color='pink')
plt.legend()
plt.title('Temperature Forecast')
plt.show()
# 评估模型
# 假设我们已经将数据集分为了训练集和测试集
train = df[:-5]
test = df[-5:]
# 训练模型
model = SARIMAX(train['temperature'], order=(p, d, q))
model_fit = model.fit()
# 预测测试集
test_forecast = model_fit.predict(start=len(train), end=len(train) + len(test) - 1, dynamic=True)
# 计算MSE
mse = mean_squared_error(test['temperature'], test_forecast)
rmse = sqrt(mse)
print('Test MSE: ', mse)
print('Test RMSE: ', rmse)