python aic准则_pythonAIC准则下线性回归实现及模型检验案例分析

#coding=utf/8

#time:2019/8/11

#function:线性回归

#author:Karen

import pandas as pd

import numpy as np

import statsmodels.api as sm

import matplotlib.pyplot as plt

from sklearn import preprocessing

import statsmodels.formula.api as smf

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error

import math

#解决负号显示问题

from matplotlib import font_manager as fm

#数据预处理及数据探索

def get_data():

f=open('北京高端酒店价格.csv',encoding='utf/8')

data=pd.read_csv(f,encoding='utf/8')

st=''

for i in data['地区'].unique():

st=st+','+i

print(st)

print(data['位置评分'].describe())

data['装修时间']=data['装修时间'].apply(lambda x: '新装修' if x>2015 else '旧装修')

# tim=data['装修时间'].unique()

# typ=data['房间类型'].unique()

# location=data['地区'].unique()

#哑变量

df1=pd.get_dummies(data['房间类型'])

df2=pd.get_dummies(data['地区'])

df3=pd.get_dummies(data['装修时间'])

data=pd.concat([data,df1,df2,df3],axis=1)

data.drop(['房间类型','地区'],inplace=True,axis=1)

#改变索引

price=data['房价']

data.drop('房价',inplace=True,axis=1)

data.insert(0,'房价',price)

# data['新装修']=data.loc[22]

print(data.head())

data.drop(['酒店名称','经度','纬度','地址','其他城区','标准间','装修时间','旧装修'],inplace=True,axis=1)

data['对数房价']=data['房价'].apply(lambda x:math.log(x))

print(data.head())

data=data.round(1)

#数据标准化

你可能感兴趣的:(python,aic准则)