常规操作
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings
## 设置字符集,防止中文乱码
mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False
## 拦截异常
warnings.filterwarnings(action = 'ignore', category=ConvergenceWarning)
from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV, ElasticNetCV
from sklearn.preprocessing import PolynomialFeatures#数据预处理,标准化
from sklearn.pipeline import Pipeline
from sklearn.linear_model.coordinate_descent import ConvergenceWarning
一、pandas
pandas.DataFrame.describe
pandas.DataFrame.dtypes
pandas.DataFrame.iloc
获取所有行的前两列: X = data.iloc[:,0:2]
二、sklearn
sklearn.preprocessing.scale
sklearn.preprocessing.StandardScale
sklearn.preprocessing.MinMaxScaler
sklearn.preprocessing.normalize
参考:技巧 - 剖析归一化和标准化
sklearn.pipeline.Pipeline
参考:API - Sklearn三大模型
sklearn.pipeline.PolynomialFeatures
三、numpy
numpy.arange
# np.arange(3) 输出 [0, 1, 2]
t=np.arange(len(X_test))
plt.plot(t, Y_test, 'r-', label=u'真实值', ms=10, zorder=N)
numpy.linspace
degree = np.arange(1, N, 4) # 阶
colors = [] # 颜色
for c in np.linspace(16711680, 255, degree.size):
# int(c) 转6位16进制,前补0
colors.append('#%06x' % int(c))
numpy.set_printoptions
np.random.seed(100)
#显示方式设置,每行的字符数用于插入换行符,是否使用科学计数法
np.set_printoptions(linewidth=1000, suppress=True)
N = 10
x = np.linspace(0, 6, N) + np.random.randn(N)
y = 1.8*x**3 + x**2 - 14*x - 7 + np.random.randn(N)
## 将其设置为矩阵
x.shape = -1, 1
y.shape = -1, 1
参考: 确定浮点数字、数组、和numpy对象的显示形式
向上 - numpy.ceil
向下 - numpy.floor
四舍五入 - numpy.rint
截取整数部分 - numpy.trunc
四、matplotlib
matplotlib.pyplot.plot
plt.plot(x, y, 'ro', ms=10, zorder=N)
ms - 宽度 zorder - 图像的层
01 matplotlib - 折线图、绘图属性、Web安全色、子图、保存画板
02 matplotlib - 柱状图、直方图、散点图 、饼图