人工神经网络(artificial neural network,缩写ANN),简称神经网络(neural network,缩写NN)或类神经网络,是一种模仿生物神经网络(动物的中枢神经系统,特别是大脑)的结构和功能的数学模型或计算模型。神经网络由大量的人工神经元联结进行计算。大多数情况下人工神经网络能在外界信息的基础上改变内部结构,是一种自适应系统。现代神经网络是一种非线性统计性数据建模工具,常用来对输入和输出间复杂的关系进行建模,或用来探索数据的模式。(摘自https://zh.wikipedia.org/wiki/%E4%BA%BA%E5%B7%A5%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C)
给定葡萄酒的各项属性及评分,训练一个人工神经网络,对任意给定新的葡萄酒进行打分。
实验数据如下:
样品编号 | 蛋白质mg/100g(x1) | DPPH自由基1/IC50(g/L)(x2) | 总酚(mmol/kg)(x3) | 葡萄总黄酮(mmol/kg)(x4) | PH值(x5) | 果皮质量(g)(x6) | 总分(y) |
葡萄样品1 | 555.455 | 0.4314 | 23.576 | 9.509 | 3.54 | 0.120 | 77.1 |
葡萄样品2 | 624.094 | 0.4659 | 26.026 | 13.720 | 3.88 | 0.193 | 78.2 |
葡萄样品3 | 580.273 | 0.4102 | 21.479 | 10.853 | 3.80 | 0.160 | 74.6 |
葡萄样品4 | 527.438 | 0.2660 | 10.783 | 4.394 | 3.36 | 0.173 | 75.8 |
葡萄样品5 | 590.651 | 0.3972 | 18.547 | 10.333 | 3.58 | 0.260 | 74 |
葡萄样品6 | 532.026 | 0.2755 | 10.469 | 6.867 | 3.31 | 0.213 | 74.5 |
葡萄样品7 | 489.320 | 0.1758 | 9.181 | 3.497 | 3.13 | 0.136 | 72.6 |
葡萄样品8 | 556.091 | 0.4160 | 15.343 | 8.454 | 2.90 | 0.240 | 71.5 |
葡萄样品9 | 703.300 | 0.6689 | 31.767 | 20.433 | 3.68 | 0.150 | 72.2 |
葡萄样品10 | 547.695 | 0.3263 | 9.191 | 4.603 | 3.66 | 0.210 | 71.6 |
葡萄样品11 | 545.034 | 0.2796 | 6.197 | 2.545 | 3.46 | 0.125 | 72 |
葡萄样品12 | 491.265 | 0.1975 | 11.924 | 3.926 | 3.37 | 0.253 | 72.6 |
葡萄样品13 | 603.686 | 0.4420 | 14.572 | 7.360 | 3.91 | 0.170 | 72.1 |
葡萄样品14 | 597.274 | 0.3606 | 15.661 | 7.780 | 3.46 | 0.256 | 69.9 |
葡萄样品15 | 531.431 | 0.2193 | 12.001 | 5.598 | 3.16 | 0.208 | 71.5 |
葡萄样品16 | 585.783 | 0.2371 | 10.992 | 9.185 | 3.25 | 0.138 | 68.8 |
葡萄样品17 | 546.516 | 0.3594 | 15.394 | 8.613 | 3.38 | 0.336 | 68.8 |
葡萄样品18 | 511.565 | 0.2260 | 7.979 | 5.273 | 3.33 | 0.150 | 71.2 |
葡萄样品19 | 542.201 | 0.3806 | 16.732 | 9.370 | 3.69 | 0.089 | 66.3 |
葡萄样品20 | 556.895 | 0.2825 | 11.914 | 8.069 | 3.66 | 0.247 | 66 |
葡萄样品21 | 566.188 | 0.3803 | 15.639 | 7.542 | 3.66 | 0.107 | 68.2 |
葡萄样品22 | 490.759 | 0.2842 | 16.066 | 7.759 | 3.72 | 0.126 | 65.3 |
葡萄样品23 | 547.813 | 0.5748 | 32.522 | 24.436 | 3.42 | 0.263 | 61.6 |
葡萄样品24 | 504.429 | 0.2836 | 8.192 | 8.265 | 3.65 | 0.227 | 68.1 |
葡萄样品25 | 539.502 | 0.3518 | 11.813 | 5.429 | 3.41 | 0.201 | 65.4 |
葡萄样品26 | 589.906 | 0.3179 | 7.129 | 3.413 | 3.63 | 0.210 | 65.7 |
葡萄样品27 | 523.837 | 0.2654 | 9.145 | 4.711 | 3.31 | 0.189 | 68.3 |
由于样本不足,存在过拟合等问题
源码:
#include <iostream> #include <cstdlib> #include <cstring> #include <fstream> #include <cmath> #include <ctime> #define HIDENODE 5 #define RATE 0.9 #define TOLATHRE 0.001 #define PRECISION 10000 #define TOTALNUM 27 #define SAMPLENUM 17 #define ATTRNUM 7 using namespace std; double SampleData[SAMPLENUM][ATTRNUM]; double TotalData[TOTALNUM][ATTRNUM], EvalData[TOTALNUM], MaxEval, MinEval; double hi[HIDENODE], ho[HIDENODE], yi[SAMPLENUM], yo[SAMPLENUM]; double wih[HIDENODE][ATTRNUM - 1], who[HIDENODE]; double bh[HIDENODE], bo; double Distin = 0x7FFFFFFF; inline double f(double x) { return 1.0 / (1 + exp(-1 * x)); } void Init() { freopen("ANN.txt", "r", stdin); double max_attr[ATTRNUM], min_attr[ATTRNUM]; memset(max_attr, 0, sizeof(max_attr)); for (int i = 0; i< ATTRNUM; i++) min_attr[i] = 0x7FFFFFFF; //Input data for (int i = 0; i < SAMPLENUM; i++) { for (int j = 0; j < ATTRNUM; j++) { cin >> SampleData[i][j]; max_attr[j] = (max_attr[j]>SampleData[i][j]) ? max_attr[j] : SampleData[i][j]; min_attr[j] = (min_attr[j]<SampleData[i][j]) ? min_attr[j] : SampleData[i][j]; } } //Normalize data for (int i = 0; i < ATTRNUM; i++) { double diff = max_attr[i] - min_attr[i]; for (int j = 0; j < SAMPLENUM; j++) SampleData[j][i] = (SampleData[j][i] - min_attr[i]) / diff; } //Rand weight for (int i = 0; i < HIDENODE; i++) { who[i] = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1; for (int j = 0; j < ATTRNUM - 1; j++) wih[i][j] = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1; } for (int i = 0; i < SAMPLENUM; i++) bh[i] = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1; bo = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1; } void Train() { double theta1[HIDENODE], theta2 = 0; Distin = 0; //cout << Distin << ' '; for (int i = 0; i < SAMPLENUM; i++) { memset(hi, 0, sizeof(hi)); memset(theta1, 0, sizeof(theta1)); yi[i] = 0; for (int j = 0; j < HIDENODE; j++) for (int k = 0; k < 6; k++) hi[j] += SampleData[i][k] * wih[j][k]; for (int j = 0; j < HIDENODE; j++) ho[j] = f(hi[j] + bh[j]); for (int j = 0; j < HIDENODE; j++) yi[i] += who[j] * ho[j]; yo[i] = f(yi[i] + bo); theta2 = (SampleData[i][ATTRNUM - 1] - yo[i])*yo[i] * (1 - yo[i]); for (int j = 0; j < HIDENODE; j++) who[j] += RATE*theta2*ho[j]; for (int j = 0; j < HIDENODE; j++) { theta1[j] += theta2 * who[j] * ho[j] * (1 - ho[j]); for (int k = 0; k < 6; k++) wih[j][k] += RATE*theta1[j] * SampleData[i][k]; } bo += RATE*theta2; for (int j = 0; j < HIDENODE; j++) bh[j] += RATE*theta1[j]; // compute Distin Distin += pow(yo[i] - SampleData[i][6], 2); } //cout << Distin << endl; } void OutputParaments() { cout << "weights between input layer and hidden layer:" << endl; for (int i = 0; i < HIDENODE; i++) { for (int j = 0; j < ATTRNUM - 1; j++) cout << wih[i][j] << ' '; cout << endl; } cout << endl << "weights between hidden layer and output layer:" << endl; for (int i = 0; i < HIDENODE; i++) cout << who[i] << ' '; cout << endl; cout << endl << "Thresholds in hidden layer:" << endl; for (int i = 0; i < HIDENODE; i++) cout << bh[i] << ' '; cout << endl << endl; cout << "Threshold in output layer:" << endl << bo << endl << endl; } void CalculateDifferences() { double max_diff = 0, min_diff = 0x7FFFFFFF, aver_diff = 0; for (int i = 0; i < SAMPLENUM; i++) { aver_diff += fabs(SampleData[i][ATTRNUM - 1] - yo[i]); max_diff = (max_diff > fabs(SampleData[i][ATTRNUM - 1] - yo[i])) ? max_diff : fabs(SampleData[i][ATTRNUM - 1] - yo[i]); min_diff = (min_diff < fabs(SampleData[i][ATTRNUM - 1] - yo[i])) ? min_diff : fabs(SampleData[i][ATTRNUM - 1] - yo[i]); } aver_diff /= SAMPLENUM; cout << "Max difference is: " << max_diff << endl; cout << "Min difference is: " << min_diff << endl; cout << "Average difference is: " << aver_diff << endl; } void GetAllGrap() { ifstream fin("ANN.txt"); double max_attr[ATTRNUM], min_attr[ATTRNUM]; memset(max_attr, 0, sizeof(max_attr)); MaxEval = 0; MinEval = 0x7FFFFFFF; for (int i = 0; i< ATTRNUM; i++) min_attr[i] = 0x7FFFFFFF; for (int i = 0; i < TOTALNUM; i++) { for (int j = 0; j < ATTRNUM; j++) { fin >> TotalData[i][j]; max_attr[j] = (max_attr[j]>TotalData[i][j]) ? max_attr[j] : TotalData[i][j]; min_attr[j] = (min_attr[j]<TotalData[i][j]) ? min_attr[j] : TotalData[i][j]; } EvalData[i] = TotalData[i][ATTRNUM - 1]; MaxEval = (MaxEval>TotalData[i][ATTRNUM - 1]) ? MaxEval : TotalData[i][ATTRNUM - 1]; MinEval = (MinEval<TotalData[i][ATTRNUM - 1]) ? MinEval : TotalData[i][ATTRNUM - 1]; } for (int i = 0; i < ATTRNUM; i++) { double diff = max_attr[i] - min_attr[i]; for (int j = 0; j < TOTALNUM; j++) TotalData[j][i] = (TotalData[j][i] - min_attr[i]) / diff; } fin.close(); } void CalculateEval(int grapnum) { double tmp_hidden[HIDENODE]; memset(tmp_hidden, 0, sizeof(tmp_hidden)); for (int i = 0; i < HIDENODE; i++) { for (int j = 0; j < ATTRNUM - 1; j++) { tmp_hidden[i] += TotalData[grapnum][j] * wih[i][j]; } tmp_hidden[i] -= bh[i]; } double tmp_output = 0; for (int i = 0; i < HIDENODE; i++) { for (int j = 0; j < ATTRNUM - 1; j++) tmp_output += tmp_hidden[i] * who[i]; } tmp_output -= bo; //tmp_output*=(MaxEval - MinEval) + MinEval; cout << "the test result is: " << tmp_output << endl; cout << "the distinguish is: " << fabs(tmp_output-EvalData[grapnum])<<endl; } int main() { srand((unsigned)time(0)); memset(hi, 0, sizeof(hi)); memset(ho, 0, sizeof(ho)); memset(yi, 0, sizeof(yi)); memset(yo, 0, sizeof(yo)); Init(); while (Distin > TOLATHRE) Train(); OutputParaments(); CalculateDifferences(); int TestGrap = 0; GetAllGrap(); freopen("con", "r", stdin); cout<<"input the test grap num:"<<endl; while (cin >> TestGrap) { if(TestGrap>=0&&TestGrap<=26) { CalculateEval(TestGrap); cout<<"input the test grap num:"<<endl; } else cout<<"Not legal!"<<endl; } return 0; }