import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from pandas.plotting import scatter_matrix
from sklearn.datasets import load_iris
iris = load_iris()
iris_d = pd.DataFrame(iris['data'], columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'])
iris_d['Species'] = iris.target
iris_d.dropna(inplace=True)
iris_d
|
Sepal_Length |
Sepal_Width |
Petal_Length |
Petal_Width |
Species |
0 |
5.1 |
3.5 |
1.4 |
0.2 |
0 |
1 |
4.9 |
3.0 |
1.4 |
0.2 |
0 |
2 |
4.7 |
3.2 |
1.3 |
0.2 |
0 |
3 |
4.6 |
3.1 |
1.5 |
0.2 |
0 |
4 |
5.0 |
3.6 |
1.4 |
0.2 |
0 |
... |
... |
... |
... |
... |
... |
145 |
6.7 |
3.0 |
5.2 |
2.3 |
2 |
146 |
6.3 |
2.5 |
5.0 |
1.9 |
2 |
147 |
6.5 |
3.0 |
5.2 |
2.0 |
2 |
148 |
6.2 |
3.4 |
5.4 |
2.3 |
2 |
149 |
5.9 |
3.0 |
5.1 |
1.8 |
2 |
150 rows × 5 columns
iris_d.groupby('Species').size()
Species
0 50
1 50
2 50
dtype: int64
iris_d.describe()
|
Sepal_Length |
Sepal_Width |
Petal_Length |
Petal_Width |
Species |
count |
150.000000 |
150.000000 |
150.000000 |
150.000000 |
150.000000 |
mean |
5.843333 |
3.057333 |
3.758000 |
1.199333 |
1.000000 |
std |
0.828066 |
0.435866 |
1.765298 |
0.762238 |
0.819232 |
min |
4.300000 |
2.000000 |
1.000000 |
0.100000 |
0.000000 |
25% |
5.100000 |
2.800000 |
1.600000 |
0.300000 |
0.000000 |
50% |
5.800000 |
3.000000 |
4.350000 |
1.300000 |
1.000000 |
75% |
6.400000 |
3.300000 |
5.100000 |
1.800000 |
2.000000 |
max |
7.900000 |
4.400000 |
6.900000 |
2.500000 |
2.000000 |
iris_d.plot(kind = 'box')
plt.show()

iris_d.hist()
plt.show()

scatter_matrix(iris_d)
plt.show()

array = iris_d.values
X = array[:,0:4]
Y = array[:,4]
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, Y, test_size=0.2, random_state=7)
model = KNeighborsClassifier()
model.fit(x_train,y_train)
predictions = model.predict(x_test)
print(accuracy_score(y_test, predictions))
0.9
model = SVC()
model.fit(x_train,y_train)
predictions = model.predict(x_test)
print(accuracy_score(y_test, predictions))
0.8666666666666667
model = RandomForestClassifier()
model.fit(x_train,y_train)
predictions = model.predict(x_test)
print(accuracy_score(y_test, predictions))
0.8666666666666667
model = LogisticRegression()
model.fit(x_train,y_train)
predictions = model.predict(x_test)
print(accuracy_score(y_test, predictions))
0.8666666666666667
d:\program files\python3.7\lib\site-packages\sklearn\linear_model\_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)