import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import datasets
iris=datasets.load_iris()
print(iris)
{'DESCR': '.. _iris_dataset:\n\nIris plants dataset\n--------------------\n\n**Data Set Characteristics:**\n\n :Number of Instances: 150 (50 in each of three classes)\n :Number of Attributes: 4 numeric, predictive attributes and the class\n :Attribute Information:\n - sepal length in cm\n - sepal width in cm\n - petal length in cm\n - petal width in cm\n - class:\n - Iris-Setosa\n - Iris-Versicolour\n - Iris-Virginica\n \n :Summary Statistics:\n\n ============== ==== ==== ======= ===== ====================\n Min Max Mean SD Class Correlation\n ============== ==== ==== ======= ===== ====================\n sepal length: 4.3 7.9 5.84 0.83 0.7826\n sepal width: 2.0 4.4 3.05 0.43 -0.4194\n petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)\n petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)\n ============== ==== ==== ======= ===== ====================\n\n :Missing Attribute Values: None\n :Class Distribution: 33.3% for each of 3 classes.\n :Creator: R.A. Fisher\n :Donor: Michael Marshall (MARSHALL%[email protected])\n :Date: July, 1988\n\nThe famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\nfrom Fisher\'s paper. Note that it\'s the same as in R, but not as in the UCI\nMachine Learning Repository, which has two wrong data points.\n\nThis is perhaps the best known database to be found in the\npattern recognition literature. Fisher\'s paper is a classic in the field and\nis referenced frequently to this day. (See Duda & Hart, for example.) The\ndata set contains 3 classes of 50 instances each, where each class refers to a\ntype of iris plant. One class is linearly separable from the other 2; the\nlatter are NOT linearly separable from each other.\n\n.. topic:: References\n\n - Fisher, R.A. "The use of multiple measurements in taxonomic problems"\n Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to\n Mathematical Statistics" (John Wiley, NY, 1950).\n - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System\n Structure and Classification Rule for Recognition in Partially Exposed\n Environments". IEEE Transactions on Pattern Analysis and Machine\n Intelligence, Vol. PAMI-2, No. 1, 67-71.\n - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE Transactions\n on Information Theory, May 1972, 431-433.\n - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al"s AUTOCLASS II\n conceptual clustering system finds 3 classes in the data.\n - Many, many more ...', 'filename': 'e:\\application\\python\\lib\\site-packages\\sklearn\\datasets\\data\\iris.csv', 'data': array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
[5.4, 3.9, 1.7, 0.4],
[4.6, 3.4, 1.4, 0.3],
[5. , 3.4, 1.5, 0.2],
[4.4, 2.9, 1.4, 0.2],
[4.9, 3.1, 1.5, 0.1],
[5.4, 3.7, 1.5, 0.2],
[4.8, 3.4, 1.6, 0.2],
[4.8, 3. , 1.4, 0.1],
[4.3, 3. , 1.1, 0.1],
[5.8, 4. , 1.2, 0.2],
[5.7, 4.4, 1.5, 0.4],
[5.4, 3.9, 1.3, 0.4],
[5.1, 3.5, 1.4, 0.3],
[5.7, 3.8, 1.7, 0.3],
[5.1, 3.8, 1.5, 0.3],
[5.4, 3.4, 1.7, 0.2],
[5.1, 3.7, 1.5, 0.4],
[4.6, 3.6, 1. , 0.2],
[5.1, 3.3, 1.7, 0.5],
[4.8, 3.4, 1.9, 0.2],
[5. , 3. , 1.6, 0.2],
[5. , 3.4, 1.6, 0.4],
[5.2, 3.5, 1.5, 0.2],
[5.2, 3.4, 1.4, 0.2],
[4.7, 3.2, 1.6, 0.2],
[4.8, 3.1, 1.6, 0.2],
[5.4, 3.4, 1.5, 0.4],
[5.2, 4.1, 1.5, 0.1],
[5.5, 4.2, 1.4, 0.2],
[4.9, 3.1, 1.5, 0.2],
[5. , 3.2, 1.2, 0.2],
[5.5, 3.5, 1.3, 0.2],
[4.9, 3.6, 1.4, 0.1],
[4.4, 3. , 1.3, 0.2],
[5.1, 3.4, 1.5, 0.2],
[5. , 3.5, 1.3, 0.3],
[4.5, 2.3, 1.3, 0.3],
[4.4, 3.2, 1.3, 0.2],
[5. , 3.5, 1.6, 0.6],
[5.1, 3.8, 1.9, 0.4],
[4.8, 3. , 1.4, 0.3],
[5.1, 3.8, 1.6, 0.2],
[4.6, 3.2, 1.4, 0.2],
[5.3, 3.7, 1.5, 0.2],
[5. , 3.3, 1.4, 0.2],
[7. , 3.2, 4.7, 1.4],
[6.4, 3.2, 4.5, 1.5],
[6.9, 3.1, 4.9, 1.5],
[5.5, 2.3, 4. , 1.3],
[6.5, 2.8, 4.6, 1.5],
[5.7, 2.8, 4.5, 1.3],
[6.3, 3.3, 4.7, 1.6],
[4.9, 2.4, 3.3, 1. ],
[6.6, 2.9, 4.6, 1.3],
[5.2, 2.7, 3.9, 1.4],
[5. , 2. , 3.5, 1. ],
[5.9, 3. , 4.2, 1.5],
[6. , 2.2, 4. , 1. ],
[6.1, 2.9, 4.7, 1.4],
[5.6, 2.9, 3.6, 1.3],
[6.7, 3.1, 4.4, 1.4],
[5.6, 3. , 4.5, 1.5],
[5.8, 2.7, 4.1, 1. ],
[6.2, 2.2, 4.5, 1.5],
[5.6, 2.5, 3.9, 1.1],
[5.9, 3.2, 4.8, 1.8],
[6.1, 2.8, 4. , 1.3],
[6.3, 2.5, 4.9, 1.5],
[6.1, 2.8, 4.7, 1.2],
[6.4, 2.9, 4.3, 1.3],
[6.6, 3. , 4.4, 1.4],
[6.8, 2.8, 4.8, 1.4],
[6.7, 3. , 5. , 1.7],
[6. , 2.9, 4.5, 1.5],
[5.7, 2.6, 3.5, 1. ],
[5.5, 2.4, 3.8, 1.1],
[5.5, 2.4, 3.7, 1. ],
[5.8, 2.7, 3.9, 1.2],
[6. , 2.7, 5.1, 1.6],
[5.4, 3. , 4.5, 1.5],
[6. , 3.4, 4.5, 1.6],
[6.7, 3.1, 4.7, 1.5],
[6.3, 2.3, 4.4, 1.3],
[5.6, 3. , 4.1, 1.3],
[5.5, 2.5, 4. , 1.3],
[5.5, 2.6, 4.4, 1.2],
[6.1, 3. , 4.6, 1.4],
[5.8, 2.6, 4. , 1.2],
[5. , 2.3, 3.3, 1. ],
[5.6, 2.7, 4.2, 1.3],
[5.7, 3. , 4.2, 1.2],
[5.7, 2.9, 4.2, 1.3],
[6.2, 2.9, 4.3, 1.3],
[5.1, 2.5, 3. , 1.1],
[5.7, 2.8, 4.1, 1.3],
[6.3, 3.3, 6. , 2.5],
[5.8, 2.7, 5.1, 1.9],
[7.1, 3. , 5.9, 2.1],
[6.3, 2.9, 5.6, 1.8],
[6.5, 3. , 5.8, 2.2],
[7.6, 3. , 6.6, 2.1],
[4.9, 2.5, 4.5, 1.7],
[7.3, 2.9, 6.3, 1.8],
[6.7, 2.5, 5.8, 1.8],
[7.2, 3.6, 6.1, 2.5],
[6.5, 3.2, 5.1, 2. ],
[6.4, 2.7, 5.3, 1.9],
[6.8, 3. , 5.5, 2.1],
[5.7, 2.5, 5. , 2. ],
[5.8, 2.8, 5.1, 2.4],
[6.4, 3.2, 5.3, 2.3],
[6.5, 3. , 5.5, 1.8],
[7.7, 3.8, 6.7, 2.2],
[7.7, 2.6, 6.9, 2.3],
[6. , 2.2, 5. , 1.5],
[6.9, 3.2, 5.7, 2.3],
[5.6, 2.8, 4.9, 2. ],
[7.7, 2.8, 6.7, 2. ],
[6.3, 2.7, 4.9, 1.8],
[6.7, 3.3, 5.7, 2.1],
[7.2, 3.2, 6. , 1.8],
[6.2, 2.8, 4.8, 1.8],
[6.1, 3. , 4.9, 1.8],
[6.4, 2.8, 5.6, 2.1],
[7.2, 3. , 5.8, 1.6],
[7.4, 2.8, 6.1, 1.9],
[7.9, 3.8, 6.4, 2. ],
[6.4, 2.8, 5.6, 2.2],
[6.3, 2.8, 5.1, 1.5],
[6.1, 2.6, 5.6, 1.4],
[7.7, 3. , 6.1, 2.3],
[6.3, 3.4, 5.6, 2.4],
[6.4, 3.1, 5.5, 1.8],
[6. , 3. , 4.8, 1.8],
[6.9, 3.1, 5.4, 2.1],
[6.7, 3.1, 5.6, 2.4],
[6.9, 3.1, 5.1, 2.3],
[5.8, 2.7, 5.1, 1.9],
[6.8, 3.2, 5.9, 2.3],
[6.7, 3.3, 5.7, 2.5],
[6.7, 3. , 5.2, 2.3],
[6.3, 2.5, 5. , 1.9],
[6.5, 3. , 5.2, 2. ],
[6.2, 3.4, 5.4, 2.3],
[5.9, 3. , 5.1, 1.8]]), 'feature_names': ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'], 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='
X_train,Xtest,y_train,y_test=train_test_split(iris.data,iris.target,
random_state=12)
print(X_train.shape)
print(Xtest.shape)
(112, 4)
(38, 4)
clf=GaussianNB()
clf.fit(X_train,y_train)
GaussianNB(priors=None, var_smoothing=1e-09)
clf.predict(Xtest)
array([0, 2, 0, 1, 2, 2, 2, 0, 2, 0, 1, 0, 0, 0, 1, 2, 2, 1, 0, 1, 0, 1,
2, 1, 0, 2, 2, 1, 0, 0, 0, 1, 2, 0, 2, 0, 1, 1])
clf.predict_proba(Xtest)
array([[1.00000000e+000, 2.32926069e-017, 1.81656357e-023],
[4.28952299e-154, 2.48576754e-002, 9.75142325e-001],
[1.00000000e+000, 7.45528845e-018, 3.79800436e-024],
[3.59748710e-076, 9.99751806e-001, 2.48194200e-004],
[2.20411871e-239, 4.45798016e-009, 9.99999996e-001],
[1.23795145e-173, 1.95814902e-003, 9.98041851e-001],
[2.45866589e-206, 2.34481513e-007, 9.99999766e-001],
[1.00000000e+000, 2.61810906e-017, 2.67446831e-023],
[3.07448595e-259, 9.07196639e-011, 1.00000000e+000],
[1.00000000e+000, 1.14549667e-010, 3.00314173e-017],
[1.64566141e-101, 9.87428016e-001, 1.25719837e-002],
[1.00000000e+000, 5.62770009e-016, 8.77233124e-022],
[1.00000000e+000, 9.78098062e-014, 4.81247272e-020],
[1.00000000e+000, 3.96616431e-015, 3.17162008e-021],
[2.58159395e-110, 7.85918892e-001, 2.14081108e-001],
[8.01004975e-208, 8.36611920e-006, 9.99991634e-001],
[2.27845999e-193, 5.52863568e-004, 9.99447136e-001],
[2.52133012e-090, 9.94597495e-001, 5.40250471e-003],
[1.00000000e+000, 4.06675976e-017, 2.53312064e-023],
[3.29537129e-123, 9.22312452e-001, 7.76875484e-002],
[1.00000000e+000, 4.66765440e-017, 1.99662820e-023],
[7.54708431e-074, 9.99690656e-001, 3.09343577e-004],
[6.27117035e-136, 1.83265786e-001, 8.16734214e-001],
[4.68960290e-103, 9.82756006e-001, 1.72439943e-002],
[1.00000000e+000, 2.15636250e-014, 2.25086772e-020],
[5.92924136e-199, 5.41122729e-007, 9.99999459e-001],
[4.07679795e-141, 7.38689632e-002, 9.26131037e-001],
[2.77929930e-083, 9.99806458e-001, 1.93541791e-004],
[1.00000000e+000, 4.48465501e-017, 4.36464333e-023],
[1.00000000e+000, 1.64440161e-014, 1.13341951e-021],
[1.00000000e+000, 8.68192867e-017, 6.71630735e-023],
[7.15007036e-050, 9.99997055e-001, 2.94492877e-006],
[1.73414331e-178, 2.06441448e-003, 9.97935586e-001],
[1.00000000e+000, 4.90168069e-019, 3.86471595e-024],
[1.35600871e-156, 2.28929843e-002, 9.77107016e-001],
[1.00000000e+000, 1.78544881e-015, 1.09390819e-020],
[1.86074590e-058, 9.99948860e-001, 5.11400371e-005],
[3.69548269e-057, 9.99992986e-001, 7.01435008e-006]])
accuracy_score(y_test,clf.predict(Xtest))
0.9736842105263158
import numpy as np
import pandas as pd
import random
dataSet=pd.read_csv('iris.txt',header=None)
dataSet.head(10)
0
1
2
3
4
0
5.1
3.5
1.4
0.2
Iris-setosa
1
4.9
3.0
1.4
0.2
Iris-setosa
2
4.7
3.2
1.3
0.2
Iris-setosa
3
4.6
3.1
1.5
0.2
Iris-setosa
4
5.0
3.6
1.4
0.2
Iris-setosa
5
5.4
3.9
1.7
0.4
Iris-setosa
6
4.6
3.4
1.4
0.3
Iris-setosa
7
5.0
3.4
1.5
0.2
Iris-setosa
8
4.4
2.9
1.4
0.2
Iris-setosa
9
4.9
3.1
1.5
0.1
Iris-setosa
dataSet.shape
dataSet.index
list(dataSet.index)
[0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
89,
90,
91,
92,
93,
94,
95,
96,
97,
98,
99,
100,
101,
102,
103,
104,
105,
106,
107,
108,
109,
110,
111,
112,
113,
114,
115,
116,
117,
118,
119,
120,
121,
122,
123,
124,
125,
126,
127,
128,
129,
130,
131,
132,
133,
134,
135,
136,
137,
138,
139,
140,
141,
142,
143,
144,
145,
146,
147,
148,
149]
import random
def randSplit(dataSet,rate):
l=list(dataSet.index)
random.shuffle(l)
dataSet.index=l
n=dataSet.shape[0]
m=int(n*rate)
train=dataSet.loc[range(m),:]
test=dataSet.loc[range(m,n),:]
dataSet.index=range(dataSet.shape[0])
test.index=range(test.shape[0])
return train,test
x_train,x_test=randSplit(dataSet,0.8)
x_train
0
1
2
3
4
0
5.0
3.5
1.3
0.3
Iris-setosa
1
5.6
2.7
4.2
1.3
Iris-versicolor
2
6.3
3.3
4.7
1.6
Iris-versicolor
3
4.4
3.0
1.3
0.2
Iris-setosa
4
5.5
2.6
4.4
1.2
Iris-versicolor
5
6.4
3.1
5.5
1.8
Iris-virginica
6
4.9
2.4
3.3
1.0
Iris-versicolor
7
5.4
3.9
1.3
0.4
Iris-setosa
8
6.0
3.4
4.5
1.6
Iris-versicolor
9
6.4
2.8
5.6
2.2
Iris-virginica
10
5.0
3.5
1.6
0.6
Iris-setosa
11
6.0
2.7
5.1
1.6
Iris-versicolor
12
5.0
2.0
3.5
1.0
Iris-versicolor
13
4.9
3.0
1.4
0.2
Iris-setosa
14
5.1
3.3
1.7
0.5
Iris-setosa
15
6.3
2.5
4.9
1.5
Iris-versicolor
16
5.6
2.9
3.6
1.3
Iris-versicolor
17
5.0
3.3
1.4
0.2
Iris-setosa
18
7.3
2.9
6.3
1.8
Iris-virginica
19
4.6
3.2
1.4
0.2
Iris-setosa
20
5.8
4.0
1.2
0.2
Iris-setosa
21
6.5
3.0
5.2
2.0
Iris-virginica
22
5.5
2.3
4.0
1.3
Iris-versicolor
23
5.1
3.8
1.9
0.4
Iris-setosa
24
6.3
2.9
5.6
1.8
Iris-virginica
25
5.2
2.7
3.9
1.4
Iris-versicolor
26
6.7
2.5
5.8
1.8
Iris-virginica
27
4.9
2.5
4.5
1.7
Iris-virginica
28
6.7
3.0
5.2
2.3
Iris-virginica
29
7.1
3.0
5.9
2.1
Iris-virginica
...
...
...
...
...
...
90
7.2
3.2
6.0
1.8
Iris-virginica
91
7.0
3.2
4.7
1.4
Iris-versicolor
92
5.4
3.4
1.7
0.2
Iris-setosa
93
5.8
2.7
4.1
1.0
Iris-versicolor
94
6.8
3.0
5.5
2.1
Iris-virginica
95
5.1
3.7
1.5
0.4
Iris-setosa
96
5.6
3.0
4.1
1.3
Iris-versicolor
97
5.7
2.9
4.2
1.3
Iris-versicolor
98
6.0
2.2
4.0
1.0
Iris-versicolor
99
7.6
3.0
6.6
2.1
Iris-virginica
100
6.5
3.2
5.1
2.0
Iris-virginica
101
4.5
2.3
1.3
0.3
Iris-setosa
102
5.7
2.5
5.0
2.0
Iris-virginica
103
5.4
3.4
1.5
0.4
Iris-setosa
104
7.4
2.8
6.1
1.9
Iris-virginica
105
5.8
2.7
5.1
1.9
Iris-virginica
106
6.2
3.4
5.4
2.3
Iris-virginica
107
4.6
3.6
1.0
0.2
Iris-setosa
108
6.1
3.0
4.9
1.8
Iris-virginica
109
5.0
3.6
1.4
0.2
Iris-setosa
110
5.5
2.5
4.0
1.3
Iris-versicolor
111
6.2
2.8
4.8
1.8
Iris-virginica
112
6.5
3.0
5.8
2.2
Iris-virginica
113
6.7
3.1
4.4
1.4
Iris-versicolor
114
4.8
3.0
1.4
0.1
Iris-setosa
115
6.1
2.9
4.7
1.4
Iris-versicolor
116
5.9
3.2
4.8
1.8
Iris-versicolor
117
4.9
3.1
1.5
0.1
Iris-setosa
118
6.3
3.4
5.6
2.4
Iris-virginica
119
5.8
2.7
3.9
1.2
Iris-versicolor
120 rows × 5 columns
x_test
0
1
2
3
4
0
5.7
4.4
1.5
0.4
Iris-setosa
1
6.4
2.7
5.3
1.9
Iris-virginica
2
6.0
3.0
4.8
1.8
Iris-virginica
3
5.1
3.8
1.5
0.3
Iris-setosa
4
4.8
3.4
1.6
0.2
Iris-setosa
5
4.6
3.1
1.5
0.2
Iris-setosa
6
6.5
3.0
5.5
1.8
Iris-virginica
7
4.9
3.1
1.5
0.1
Iris-setosa
8
6.3
2.5
5.0
1.9
Iris-virginica
9
5.4
3.9
1.7
0.4
Iris-setosa
10
5.1
3.4
1.5
0.2
Iris-setosa
11
5.1
3.5
1.4
0.2
Iris-setosa
12
4.8
3.0
1.4
0.3
Iris-setosa
13
6.6
2.9
4.6
1.3
Iris-versicolor
14
5.9
3.0
5.1
1.8
Iris-virginica
15
5.2
3.4
1.4
0.2
Iris-setosa
16
7.7
2.6
6.9
2.3
Iris-virginica
17
5.4
3.0
4.5
1.5
Iris-versicolor
18
5.8
2.7
5.1
1.9
Iris-virginica
19
6.7
3.0
5.0
1.7
Iris-versicolor
20
5.8
2.6
4.0
1.2
Iris-versicolor
21
4.7
3.2
1.6
0.2
Iris-setosa
22
6.3
3.3
6.0
2.5
Iris-virginica
23
5.0
2.3
3.3
1.0
Iris-versicolor
24
5.3
3.7
1.5
0.2
Iris-setosa
25
5.7
3.8
1.7
0.3
Iris-setosa
26
6.7
3.1
4.7
1.5
Iris-versicolor
27
7.9
3.8
6.4
2.0
Iris-virginica
28
5.1
2.5
3.0
1.1
Iris-versicolor
29
6.2
2.9
4.3
1.3
Iris-versicolor
labels=x_train.loc[:,4]#标签索引
labels=x_train.iloc[:,-1]#位置索引
labels
0 Iris-setosa
1 Iris-versicolor
2 Iris-versicolor
3 Iris-setosa
4 Iris-versicolor
5 Iris-virginica
6 Iris-versicolor
7 Iris-setosa
8 Iris-versicolor
9 Iris-virginica
10 Iris-setosa
11 Iris-versicolor
12 Iris-versicolor
13 Iris-setosa
14 Iris-setosa
15 Iris-versicolor
16 Iris-versicolor
17 Iris-setosa
18 Iris-virginica
19 Iris-setosa
20 Iris-setosa
21 Iris-virginica
22 Iris-versicolor
23 Iris-setosa
24 Iris-virginica
25 Iris-versicolor
26 Iris-virginica
27 Iris-virginica
28 Iris-virginica
29 Iris-virginica
...
90 Iris-virginica
91 Iris-versicolor
92 Iris-setosa
93 Iris-versicolor
94 Iris-virginica
95 Iris-setosa
96 Iris-versicolor
97 Iris-versicolor
98 Iris-versicolor
99 Iris-virginica
100 Iris-virginica
101 Iris-setosa
102 Iris-virginica
103 Iris-setosa
104 Iris-virginica
105 Iris-virginica
106 Iris-virginica
107 Iris-setosa
108 Iris-virginica
109 Iris-setosa
110 Iris-versicolor
111 Iris-virginica
112 Iris-virginica
113 Iris-versicolor
114 Iris-setosa
115 Iris-versicolor
116 Iris-versicolor
117 Iris-setosa
118 Iris-virginica
119 Iris-versicolor
Name: 4, Length: 120, dtype: object
labels=x_train.iloc[:,-1].value_counts()
labels
Index(['Iris-versicolor', 'Iris-virginica', 'Iris-setosa'], dtype='object')
labels=x_train.iloc[:,-1].value_counts().index
labels
Index(['Iris-versicolor', 'Iris-virginica', 'Iris-setosa'], dtype='object')
### 计算方差与均值
mean=[]
std=[]
for i in labels:
item=x_train.loc[x_train.iloc[:,-1]==i,:]
m=item.iloc[:,:-1]
item
0
1
2
3
4
0
6.0
2.2
5.0
1.5
Iris-virginica
2
6.0
3.0
4.8
1.8
Iris-virginica
3
5.8
2.7
5.1
1.9
Iris-virginica
6
7.2
3.6
6.1
2.5
Iris-virginica
11
6.3
3.3
6.0
2.5
Iris-virginica
13
6.7
3.0
5.2
2.3
Iris-virginica
18
6.8
3.0
5.5
2.1
Iris-virginica
22
7.4
2.8
6.1
1.9
Iris-virginica
25
6.3
3.4
5.6
2.4
Iris-virginica
26
6.4
3.1
5.5
1.8
Iris-virginica
31
6.1
3.0
4.9
1.8
Iris-virginica
36
7.2
3.0
5.8
1.6
Iris-virginica
37
6.3
2.9
5.6
1.8
Iris-virginica
40
6.1
2.6
5.6
1.4
Iris-virginica
46
7.3
2.9
6.3
1.8
Iris-virginica
52
6.3
2.5
5.0
1.9
Iris-virginica
63
7.9
3.8
6.4
2.0
Iris-virginica
64
7.7
3.8
6.7
2.2
Iris-virginica
65
6.2
3.4
5.4
2.3
Iris-virginica
69
4.9
2.5
4.5
1.7
Iris-virginica
72
6.7
3.3
5.7
2.1
Iris-virginica
73
6.5
3.0
5.2
2.0
Iris-virginica
74
6.9
3.2
5.7
2.3
Iris-virginica
81
6.3
2.7
4.9
1.8
Iris-virginica
82
6.9
3.1
5.4
2.1
Iris-virginica
83
6.4
2.7
5.3
1.9
Iris-virginica
84
7.1
3.0
5.9
2.1
Iris-virginica
86
7.7
2.6
6.9
2.3
Iris-virginica
90
7.7
3.0
6.1
2.3
Iris-virginica
93
5.6
2.8
4.9
2.0
Iris-virginica
94
6.5
3.0
5.8
2.2
Iris-virginica
97
6.4
2.8
5.6
2.1
Iris-virginica
101
6.3
2.8
5.1
1.5
Iris-virginica
102
6.4
2.8
5.6
2.2
Iris-virginica
109
6.8
3.2
5.9
2.3
Iris-virginica
111
7.2
3.2
6.0
1.8
Iris-virginica
112
6.9
3.1
5.1
2.3
Iris-virginica
m
0 4.970270
1 3.383784
2 1.443243
3 0.243243
dtype: float64
mean=[]
std=[]
for i in labels:
item=x_train.loc[x_train.iloc[:,-1]==i,:]
m=item.iloc[:,:-1].mean()
m
0 4.970270
1 3.383784
2 1.443243
3 0.243243
dtype: float64
mean=[]
std=[]
for i in labels:
item=x_train.loc[x_train.iloc[:,-1]==i,:]
m=item.iloc[:,:-1].mean()
s=np.sum((item.iloc[:,:-1]-m)**2)/item.shape[0]
(item.iloc[:,:-1]-m)**2
0
1
2
3
0
0.000884
0.013506
0.020519
0.003221
3
0.325208
0.147290
0.020519
0.001870
7
0.184668
0.266479
0.020519
0.024573
10
0.000884
0.013506
0.024573
0.127275
13
0.004938
0.147290
0.001870
0.001870
14
0.016830
0.007020
0.065924
0.065924
17
0.000884
0.007020
0.001870
0.001870
19
0.137100
0.033776
0.001870
0.001870
20
0.688451
0.379722
0.059167
0.001870
23
0.016830
0.173236
0.208627
0.024573
37
0.325208
0.234047
0.001870
0.001870
40
0.052776
0.013506
0.003221
0.001870
43
0.000884
0.033776
0.059167
0.001870
44
0.137100
0.000263
0.001870
0.003221
45
0.000884
0.147290
0.024573
0.001870
49
0.184668
0.099993
0.003221
0.001870
56
0.028992
0.000263
0.208627
0.001870
59
0.449262
0.147290
0.117816
0.020519
65
0.016830
0.173236
0.024573
0.001870
71
0.016830
0.013506
0.001870
0.003221
74
0.073046
0.033776
0.020519
0.001870
75
0.280614
0.666209
0.001870
0.001870
76
0.325208
0.033776
0.020519
0.001870
77
0.004938
0.080533
0.003221
0.020519
80
0.052776
0.512966
0.003221
0.020519
81
0.000884
0.000263
0.003221
0.001870
84
0.000884
0.000263
0.024573
0.024573
86
0.280614
0.013506
0.020519
0.001870
87
0.028992
0.080533
0.024573
0.001870
92
0.184668
0.000263
0.065924
0.001870
95
0.016830
0.099993
0.003221
0.024573
101
0.221154
1.174587
0.020519
0.003221
103
0.184668
0.000263
0.003221
0.024573
107
0.137100
0.046749
0.196465
0.001870
109
0.000884
0.046749
0.001870
0.001870
114
0.028992
0.147290
0.001870
0.020519
117
0.004938
0.080533
0.003221
0.020519
s
0 0.119386
1 0.137034
2 0.034887
3 0.012725
dtype: float64
mean=[]
std=[]
for i in labels:
item=x_train.loc[x_train.iloc[:,-1]==i,:]
m=item.iloc[:,:-1].mean()
s=np.sum((item.iloc[:,:-1]-m)**2)/item.shape[0]
mean.append(m)
std.append(s)
means=pd.DataFrame(mean,index=labels)
stds=pd.DataFrame(std,index=labels)
mean
[0 5.935714
1 2.766667
2 4.276190
3 1.326190
dtype: float64, 0 6.600000
1 2.978049
2 5.548780
3 2.034146
dtype: float64, 0 4.970270
1 3.383784
2 1.443243
3 0.243243
dtype: float64]
std
[0 0.226105
1 0.101270
2 0.174195
3 0.036695
dtype: float64, 0 0.370732
1 0.092445
2 0.264450
3 0.077858
dtype: float64, 0 0.119386
1 0.137034
2 0.034887
3 0.012725
dtype: float64]
means
0
1
2
3
Iris-versicolor
5.935714
2.766667
4.276190
1.326190
Iris-virginica
6.600000
2.978049
5.548780
2.034146
Iris-setosa
4.970270
3.383784
1.443243
0.243243
stds
0
1
2
3
Iris-versicolor
0.226105
0.101270
0.174195
0.036695
Iris-virginica
0.370732
0.092445
0.264450
0.077858
Iris-setosa
0.119386
0.137034
0.034887
0.012725
for j in range(x_test.shape[0]):
iset=x_test.iloc[j,:-1]
iset
0 6.2
1 2.9
2 4.3
3 1.3
Name: 29, dtype: object
for j in range(x_test.shape[0]):
iset=x_test.iloc[j,:-1].tolist()
iset
[6.2, 2.9, 4.3, 1.3]
for j in range(x_test.shape[0]):
iset=x_test.iloc[j,:-1].tolist()
iprob=np.exp(-1*(iset-means)**2/(stds*2))/np.sqrt(2*np.pi*stds)
iset-means
0
1
2
3
Iris-versicolor
0.264286
0.133333
0.023810
-0.026190
Iris-virginica
-0.400000
-0.078049
-1.248780
-0.734146
Iris-setosa
1.229730
-0.483784
2.856757
1.056757
iprob
0
1
2
3
Iris-versicolor
0.718911
1.148286
9.543013e-01
2.063229e+00
Iris-virginica
0.528037
1.269579
4.066561e-02
4.488144e-02
Iris-setosa
0.002051
0.458797
3.406877e-51
3.100002e-19
iprob[0]
Iris-versicol