# 多分类
import pandas as pd
import sklearn
data = pd.read_csv('./iris.data',
header=None, # 重新用names去命名header
names=['sepal_length_cm', 'sepal_width_cm', 'petal_length_cm', 'petal_width_cm', 'cla'])
data.head()
data.cla.unique() # 总共3个分类
# array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)
x = data.iloc[:, :-1] # 取出所有参数(不包括最后一列)
y = data.iloc[:, -1] # 取出特征
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y)
x_train.shape, x_test.shape
# ((112, 4), (38, 4))
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(x_train, y_train)
model.score(x_train, y_train)
# 1.0
model.score(x_test, y_test)
# 0.9210526315789473
model.predict(x_test)
# array(['Iris-setosa', 'Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
# 'Iris-virginica', 'Iris-versicolor', 'Iris-versicolor',
# 'Iris-virginica', 'Iris-virginica', 'Iris-versicolor',
# 'Iris-virginica', 'Iris-setosa', 'Iris-versicolor',
# 'Iris-versicolor', 'Iris-virginica', 'Iris-virginica',
# 'Iris-versicolor', 'Iris-setosa', 'Iris-setosa', 'Iris-versicolor',
# 'Iris-setosa', 'Iris-setosa', 'Iris-virginica', 'Iris-versicolor',
# 'Iris-versicolor', 'Iris-setosa', 'Iris-virginica',
# 'Iris-virginica', 'Iris-setosa', 'Iris-versicolor', 'Iris-setosa',
# 'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
# 'Iris-versicolor', 'Iris-setosa', 'Iris-versicolor',
# 'Iris-virginica'], dtype=object)