36. 日月光华 Python数据分析 - 机器学习 - 自然语言处理 随机森林

# 多分类

import pandas as pd
import sklearn

data = pd.read_csv('./iris.data', 
                   header=None,    # 重新用names去命名header
                   names=['sepal_length_cm', 'sepal_width_cm', 'petal_length_cm', 'petal_width_cm', 'cla'])

data.head()
image.png
data.cla.unique()   # 总共3个分类
# array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

x = data.iloc[:, :-1]   # 取出所有参数(不包括最后一列)
y = data.iloc[:, -1]    # 取出特征

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y)
x_train.shape, x_test.shape
# ((112, 4), (38, 4))

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(x_train, y_train)
model.score(x_train, y_train)
# 1.0

model.score(x_test, y_test)
# 0.9210526315789473

model.predict(x_test)
# array(['Iris-setosa', 'Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
#       'Iris-virginica', 'Iris-versicolor', 'Iris-versicolor',
#       'Iris-virginica', 'Iris-virginica', 'Iris-versicolor',
#       'Iris-virginica', 'Iris-setosa', 'Iris-versicolor',
#       'Iris-versicolor', 'Iris-virginica', 'Iris-virginica',
#       'Iris-versicolor', 'Iris-setosa', 'Iris-setosa', 'Iris-versicolor',
#       'Iris-setosa', 'Iris-setosa', 'Iris-virginica', 'Iris-versicolor',
#       'Iris-versicolor', 'Iris-setosa', 'Iris-virginica',
#       'Iris-virginica', 'Iris-setosa', 'Iris-versicolor', 'Iris-setosa',
#       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
#       'Iris-versicolor', 'Iris-setosa', 'Iris-versicolor',
#       'Iris-virginica'], dtype=object)

你可能感兴趣的:(36. 日月光华 Python数据分析 - 机器学习 - 自然语言处理 随机森林)