提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
22亚太杯数学建模竞赛C题全球气温预测代码思路
竞赛提供的数据集总共有239177条,包含了从1820年到2013年的全球100个城市的气温数据,要进行温度预测,首先要将数据处理一下,时空数据预测可用的机器学习方法很多,这里提供的例子为LightGBM。
代码如下:
import pandas as pd
import numpy as np
import lightgbm as lgb
from datetime import datetime
from sklearn import preprocessing
from sklearn import metrics
import pickle
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import metrics
import time
import random
import math
import matplotlib.pyplot as plt
import warnings
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import cohen_kappa_score
import seaborn as sns
from datetime import datetime
from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
代码如下:
df=pd.read_csv('2022_APMCM_C_Data.csv')
df
代码如下:
df['dt'] = pd.to_datetime(df['dt'])
#df['年月日'] = df['dt'].apply(lambda x: x.strftime('%Y%m%d'))
df['年']=df['dt'].dt.year
df['月']=df['dt'].dt.month
df['日']=df['dt'].dt.day
df.drop(['dt'],axis=1,inplace=True)
df
warnings.simplefilter(action='ignore', category=FutureWarning)
colum = ['Latitude', 'Longitude']
data=df
lbl = preprocessing.LabelEncoder()
for strflag in colum:
data[strflag] = lbl.fit_transform(data[strflag].astype(str))
final_labels = data.columns.tolist()[1:6]
#data.replace('?',np.nan,inplace=True)
#data.replace('',np.nan,inplace=True)
#data.replace(-np.inf, np.nan,inplace=True)
#data.replace(np.inf, np.nan,inplace=True)
data=data.dropna(axis=0)
data
代码如下:
train_name=data.columns[1:6]
train=data[train_name]
label=data['AverageTemperature']
train_x, TEST_X, train_y, TEST_y = train_test_split(
train,
label,
test_size=0.3,
random_state=1,
)
train_y=pd.DataFrame(train_y)
train_y.columns=['AverageTemperature']
TEST_y=pd.DataFrame(TEST_y)
TEST_y.columns=['AverageTemperature']
train_x.reset_index(inplace=True,drop=True)
train_y.reset_index(inplace=True,drop=True)
TEST_X.reset_index(inplace=True,drop=True)
TEST_y.reset_index(inplace=True,drop=True)
代码如下:
## 参数
params = {
'boosting_type': 'gbdt',
'objective': 'regression',
'metric': 'rmse',
'bagging_freq':2,
'num_leaves': 511,
'max_depth': 9,
'device': 'cpu',
'min_data_in_leaf': 220,
'learning_rate': 0.01,
'feature_fraction': 0.8,
'bagging_fraction': 0.9,
'lambda_l1': 0.3,
'lambda_l2': 0.5,
'min_gain_to_split': 0.2,
'is_unbalance': True,
'subsample':0.9,
'colsample_bytree': 0.7,
'colsample_bylevel': 0.7,
'eta': 0.04,
}
##训练
from lightgbm import LGBMRegressor
model1 = LGBMRegressor(learning_rate=0.2,
num_leaves=255,
bagging_freq=2,
metric='rmse',
max_depth=8,
min_data_in_leaf=220,
feature_fraction=0.9,
bagging_fraction=0.9,
lambda_l1=0.3,
lambda_l2=0.5,
min_gain_to_split=0.2,
subsample=0.9,
colsample_bytree=0.7,
colsample_bylevel=0.7
)
model1.fit(train_x,train_y)
y_predict=model1.predict(TEST_X)
y_predict
y_predict=pd.DataFrame(y_predict)
y_predict.columns=['AverageTemperature1']
##测试
test_err=pd.concat([y_predict,TEST_y],axis=1)
test_err1=test_err.dropna(axis=0)
test_err1