ID3 算法实现决策树
1
/**/
/*
2![]()
3
ID3 算法实现决策树
4![]()
5![]()
6
----问题描述:
7![]()
8
Suppose we want ID3 to decide whether the weather is amenable to playing baseball. Over the course of 2 weeks, data is collected to help ID3 build a decision tree (see table 1).
9![]()
10
The target classification is "should we play baseball?" which can be yes or no.
11![]()
12
The weather attributes are outlook, temperature, humidity, and wind speed. They can have the following values:
13![]()
14
• outlook = { sunny, overcast, rain }
15
• temperature = {hot, mild, cool }
16
• humidity = { high, normal }
17
• wind = {weak, strong }
18![]()
19
Examples of set S are:
20![]()
21
Table. 1
22![]()
23
Day Outlook Temperature Humidity Wind Play ball
24![]()
25
D1 Sunny Hot High Weak No
26
D2 Sunny Hot High Strong No
27
D3 Overcast Hot High Weak Yes
28
D4 Rain Mild High Weak Yes
29
D5 Rain Cool Normal Weak Yes
30
D6 Rain Cool Normal Strong No
31
D7 Overcast Cool Normal Strong Yes
32
D8 Sunny Mild High Weak No
33
D9 Sunny Cool Normal Weak Yes
34
D10 Rain Mild Normal Weak Yes
35
D11 Sunny Mild Normal Strong Yes
36
D12 Overcast Mild High Strong Yes
37
D13 Overcast Hot Normal Weak Yes
38
D14 Rain Mild High Strong No
39![]()
40![]()
41
----输入:
42![]()
43
若干行,每行 5 个字符串,表示
44![]()
45
Outlook Temperature Humidity Wind Play ball
46![]()
47
如上表。
48![]()
49![]()
50
----输出:
51![]()
52
决策树。
53![]()
54![]()
55
----分析:
56![]()
57
经典 ID3 算法。
58![]()
59
代码假设训练集相容。
60![]()
61
非常经典的算法,第一次实现,如此仓促以至于得到如此恶心的代码!
62
我所写过的最恶心的代码!真想删了重新写。
63![]()
64
*/
65![]()
66![]()
67
#include
<
iostream
>
68
#include
<
cstdio
>
69
#include
<
string
>
70
#include
<
map
>
71
#include
<
iomanip
>
72
#include
<
cmath
>
73![]()
74
using
namespace
std;
75![]()
76
const
int
EXAMPLE_NUM
=
1009
;
77
const
int
PROP_NUM
=
4
;
78![]()
79![]()
const
int
MAX_PROP[ PROP_NUM ]
=
{ 3, 3, 2, 2 }
;
80![]()
81
struct
Example
82![]()
{
83
int prop[ PROP_NUM ];
84
bool ignp[ PROP_NUM ];
85
bool play;
86![]()
87
int node;
88
Example *link;
89
}
;
90![]()
91
struct
Node;
92![]()
93
struct
Link
94![]()
{
95
int prop;
96
Node *node;
97
Link *link;
98
}
;
99![]()
100
struct
Node
101![]()
{
102
int pid;
103
Link *link;
104
bool play;
105
}
;
106![]()
107
map
<
string
,
int
>
Str2Val;
108
map
<
int
,
string
>
Val2Str[ PROP_NUM ];
109![]()
string
Pro2Str[ PROP_NUM ]
=
{ "Outlook", "Temperature", "Humidity", "Wind" }
;
110![]()
111![]()
112![]()
double
entropy( Example
*
s,
int
nd,
int
&
ts )
{
113
int ct = 0, cf = 0, c = 0;
114
double es = 0;
115![]()
while ( NULL != s )
{
116![]()
if ( nd == s->node )
{
117
++c;
118![]()
if ( s->play )
{
119
++ct;
120
}
121![]()
else
{
122
++cf;
123
}
124
}
125
s = s->link;
126
}
127
ts = c;
128![]()
if ( 0 == c )
{
129
return 0;
130
}
131![]()
if ( 0 != ct )
{
132
es += -(((double)(ct))/c) * log(((double)(ct))/c);
133
}
134![]()
if ( 0 != cf )
{
135
es += -(((double)(cf))/c) * log(((double)(cf))/c);
136
}
137
return es;
138
}
139
//
pid 合法
140![]()
double
gain( Example
*
s,
int
nd,
int
pid )
{
141
Example *e;
142
double es, ev;
143
int ts, tv, i;
144![]()
145
es = entropy( s, nd, ts );
146![]()
147![]()
if ( 0 == ts )
{
148
return 0;
149
}
150![]()
151![]()
for ( i = 0; i < MAX_PROP[ pid ]; ++i )
{
152![]()
for ( e = s; NULL != e; e = e->link )
{
153![]()
if ( (nd == e->node) && (i == e->prop[pid]) && (! e->ignp[pid]) )
{
154
e->node = nd + 1;
155
}
156
}
157![]()
158
ev = entropy( s, nd+1, tv );
159![]()
160![]()
for ( e = s; NULL != e; e = e->link )
{
161![]()
if ( nd+1 == e->node )
{
162
e->node = nd;
163
}
164
}
165![]()
166
es -= ev * ( ((double)(tv)) / ts );
167
}
168![]()
169
return es;
170
}
171![]()
172![]()
int
gainMaxId( Example
*
s,
int
nd )
{
173
double m = -1e100, tm;
174
int k = -1, i;
175![]()
bool ign[ PROP_NUM ] =
{ 0 };
176![]()
177![]()
for ( Example *e = s; NULL != e; e = e->link )
{
178![]()
if ( e->node == nd )
{
179![]()
for ( i = 0; i < PROP_NUM; ++i )
{
180![]()
if ( e->ignp[ i ] )
{
181
ign[ i ] = true;
182
}
183
}
184
}
185
}
186![]()
187![]()
for ( i = 0; i < PROP_NUM; ++i )
{
188![]()
if ( ign[ i ] )
{
189
continue;
190
}
191![]()
192
tm = gain( s, nd, i );
193![]()
if ( tm > m )
{
194
m = tm;
195
k = i;
196
}
197
}
198![]()
199
return k;
200
}
201
//
s 非空
202![]()
bool
sameDecd( Example
*
s,
int
nd,
bool
&
decd )
{
203
bool set = false;
204
bool play;
205![]()
while ( NULL != s )
{
206![]()
if ( s->node == nd )
{
207
play = s->play;
208
set = true;
209
break;
210
}
211
s = s->link;
212
}
213![]()
if ( ! set )
{
214![]()
return false; /**/////////
215
}
216![]()
while ( NULL != s )
{
217![]()
if ( (s->node == nd) && (s->play != play) )
{
218
return false;
219
}
220
s = s->link;
221
}
222
decd = play;
223
return true;
224
}
225
//
用完所有属性
226![]()
bool
isLeaf( Example
*
s,
int
nd,
bool
&
decd )
{
227
int i;
228![]()
while ( NULL != s )
{
229![]()
if ( s->node == nd )
{
230![]()
for ( i = 0; (i < PROP_NUM)&&(s->ignp[i]); ++i )
{
231
}
232![]()
if ( i < PROP_NUM )
{
233
return false;
234
}
235
decd = s->play;
236
}
237
s = s->link;
238
}
239
return true;
240
}
241![]()
242
int
node;
243![]()
Node
*
createTreeSub( Example
*
example )
{
244
Node *res = new Node;
245
res->link = NULL;
246
res->pid = -1;
247![]()
if ( sameDecd( example, node, res->play ) )
{
248
return res;
249
}
250![]()
if ( isLeaf( example, node, res->play ) )
{
251
return res;
252
}
253![]()
254
res->pid = gainMaxId( example, node );
255![]()
256
int i, c, nd = node;
257
Example *e;
258![]()
for ( i = 0; i < MAX_PROP[ res->pid ]; ++i )
{
259
e = example;
260
c = 0;
261
++node;
262![]()
while ( NULL != e )
{
263![]()
if ( (nd == e->node) && (i == e->prop[ res->pid ]) )
{
264
e->ignp[ res->pid ] = true;
265
e->node = node;
266
++c;
267
}
268
e = e->link;
269
}
270![]()
if ( 0 < c )
{
271
Link *link = new Link;
272
link->node = createTreeSub( example );
273
link->prop = i;
274
link->link = res->link;
275
res->link = link;
276
}
277
}
278![]()
279
return res;
280
}
281![]()
282![]()
Node
*
createTree( Example
*
example )
{
283
Example *ptr;
284
int i;
285![]()
286![]()
if ( NULL == example )
{
287
return NULL;
288
}
289![]()
290![]()
for ( ptr = example; NULL != ptr; ptr = ptr->link )
{
291![]()
for ( i = 0; i < PROP_NUM; ++i )
{
292
ptr->ignp[ i ] = false;
293
}
294
ptr->node = 0;
295
}
296
node = 0;
297
return createTreeSub( example );
298
}
299![]()
300
void
outputTreeSub( Node
*
tree,
int
dep );
301![]()
302![]()
void
outputLink( Link
*
link,
int
dep,
int
pid )
{
303![]()
if ( (NULL == link) || (0 > dep) )
{
304
return;
305
}
306![]()
307![]()
for ( int i = 0; i < dep; ++i )
{
308
cout << setw(16) << " ";
309
}
310
cout << left << setw(16) << Val2Str[pid][link->prop];
311
outputTreeSub( link->node, dep+1 );
312![]()
313
outputLink( link->link, dep, pid );
314
}
315![]()
316![]()
void
outputTreeSub( Node
*
tree,
int
dep )
{
317![]()
if ( (NULL == tree) || (0 > dep) )
{
318
return;
319
}
320![]()
321![]()
if ( 0 > tree->pid )
{
322
cout << (tree->play ? "Yes *" : "No *") << endl;
323
return;
324
}
325![]()
326
cout << left << setw(16) << Pro2Str[tree->pid] << endl;
327
outputLink( tree->link, dep+1, tree->pid );
328
}
329![]()
330![]()
void
outputTree( Node
*
tree )
{
331
outputTreeSub( tree, 0 );
332
}
333![]()
334![]()
void
destroyTree( Node
**
pTree )
{
335![]()
if ( (NULL == pTree) || (NULL == *pTree) )
{
336
return;
337
}
338![]()
339
Link *link;
340![]()
for ( link = (*pTree)->link; NULL != link; link = link->link )
{
341
destroyTree( &(link->node) );
342
}
343![]()
344
delete *pTree;
345
*pTree = NULL;
346
}
347![]()
348![]()
void
destroyExample( Example
**
pExample )
{
349![]()
if ( (NULL == pExample) || (NULL == *pExample) )
{
350
return;
351
}
352
Example *head = *pExample, *p;
353![]()
while ( NULL != head )
{
354
p = head;
355
head = head->link;
356
delete p;
357
}
358
*pExample = NULL;
359
}
360![]()
361![]()
void
init()
{
362
Val2Str[ 0 ][ 0 ] = "Sunny";
363
Val2Str[ 0 ][ 1 ] = "Overcast";
364
Val2Str[ 0 ][ 2 ] = "Rain";
365
Str2Val[ "Sunny" ] = 0;
366
Str2Val[ "Overcast" ] = 1;
367
Str2Val[ "Rain" ] = 2;
368
369
Val2Str[ 1 ][ 0 ] = "Hot";
370
Val2Str[ 1 ][ 1 ] = "Mild";
371
Val2Str[ 1 ][ 2 ] = "Cool";
372
Str2Val[ "Hot" ] = 0;
373
Str2Val[ "Mild" ] = 1;
374
Str2Val[ "Cool" ] = 2;
375![]()
376
Val2Str[ 2 ][ 0 ] = "High";
377
Val2Str[ 2 ][ 1 ] = "Normal";
378
Str2Val[ "High" ] = 0;
379
Str2Val[ "Normal" ] = 1;
380![]()
381
Val2Str[ 3 ][ 0 ] = "Weak";
382
Val2Str[ 3 ][ 1 ] = "Strong";
383
Str2Val[ "Weak" ] = 0;
384
Str2Val[ "Strong" ] = 1;
385
}
386![]()
387![]()
Example
*
inputExample()
{
388
Example *preHead = new Example;
389
Example *ptr;
390
string Outlook, Temperature, Humidity, Wind, Play;
391![]()
392
preHead->link = NULL;
393
ptr = preHead;
394![]()
395![]()
while ( cin >> Outlook >> Temperature >> Humidity >> Wind >> Play )
{
396
ptr->link = new Example;
397
ptr = ptr->link;
398
ptr->link = NULL;
399![]()
400
if ( (Str2Val.find( Outlook ) == Str2Val.end()) ||
401
(Str2Val.find( Temperature ) == Str2Val.end()) ||
402
(Str2Val.find( Humidity ) == Str2Val.end()) ||
403
(Str2Val.find( Wind ) == Str2Val.end())
404![]()
)
{
405
destroyExample( &preHead );
406
return NULL;
407
}
408
ptr->prop[ 0 ] = Str2Val[ Outlook ];
409
ptr->prop[ 1 ] = Str2Val[ Temperature ];
410
ptr->prop[ 2 ] = Str2Val[ Humidity ];
411
ptr->prop[ 3 ] = Str2Val[ Wind ];
412![]()
413![]()
if ( "Yes" == Play )
{
414
ptr->play = true;
415
}
416![]()
else if ( "No" == Play )
{
417
ptr->play = false;
418
}
419![]()
else
{
420
destroyExample( &preHead );
421
return NULL;
422
}
423
}
424![]()
425
ptr = preHead->link;
426
delete preHead;
427
return ptr;
428
}
429![]()
430![]()
int
main()
{
431
init();
432![]()
433
Example *example = inputExample();
434![]()
if ( NULL == example )
{
435
cout << "输入不合法" << endl;
436
return 0;
437
}
438![]()
439
Node *tree = createTree( example );
440![]()
441
outputTree( tree );
442![]()
443
destroyTree( &tree );
444
destroyExample( &example );
445
return 0;
446
}
447![]()
448![]()
449![]()
/**/
/*
450![]()
451
输入:
452
Sunny Hot High Weak No
453
Sunny Hot High Strong No
454
Overcast Hot High Weak Yes
455
Rain Mild High Weak Yes
456
Rain Cool Normal Weak Yes
457
Rain Cool Normal Strong No
458
Overcast Cool Normal Strong Yes
459
Sunny Mild High Weak No
460
Sunny Cool Normal Weak Yes
461
Rain Mild Normal Weak Yes
462
Sunny Mild Normal Strong Yes
463
Overcast Mild High Strong Yes
464
Overcast Hot Normal Weak Yes
465
Rain Mild High Strong No
466![]()
467
输出:
468![]()
469
Outlook
470
Rain Wind
471
Strong No *
472
Weak Yes *
473
Overcast Yes *
474
Sunny Humidity
475
Normal Yes *
476
High No *
477![]()
478
*/
479
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

80
81
82

83
84
85
86
87
88
89
90
91
92
93
94

95
96
97
98
99
100
101

102
103
104
105
106
107
108
109

110
111
112

113
114
115

116

117
118

119
120
121

122
123
124
125
126
127
128

129
130
131

132
133
134

135
136
137
138
139
140

141
142
143
144
145
146
147

148
149
150
151

152

153

154
155
156
157
158
159
160

161

162
163
164
165
166
167
168
169
170
171
172

173
174
175

176
177

178

179

180

181
182
183
184
185
186
187

188

189
190
191
192
193

194
195
196
197
198
199
200
201
202

203
204
205

206

207
208
209
210
211
212
213

214
215
216

217

218
219
220
221
222
223
224
225
226

227
228

229

230

231
232

233
234
235
236
237
238
239
240
241
242
243

244
245
246
247

248
249
250

251
252
253
254
255
256
257
258

259
260
261
262

263

264
265
266
267
268
269
270

271
272
273
274
275
276
277
278
279
280
281
282

283
284
285
286

287
288
289
290

291

292
293
294
295
296
297
298
299
300
301
302

303

304
305
306
307

308
309
310
311
312
313
314
315
316

317

318
319
320
321

322
323
324
325
326
327
328
329
330

331
332
333
334

335

336
337
338
339
340

341
342
343
344
345
346
347
348

349

350
351
352
353

354
355
356
357
358
359
360
361

362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387

388
389
390
391
392
393
394
395

396
397
398
399
400
401
402
403
404

405
406
407
408
409
410
411
412
413

414
415
416

417
418
419

420
421
422
423
424
425
426
427
428
429
430

431
432
433
434

435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479