天池平台 --- 工业AI大赛智能制造质量预测

赛题:https://tianchi.aliyun.com/competition/entrance/231633/information

基于Python的解决方案:

原始jupyter文件,网盘: 链接:https://pan.baidu.com/s/1guIwAlk9zvI3ULcZ8kkBnw 提取码:g0xs

主要关注数据处理的方法,神经网络建立的方法,逻辑次要。

相关工具包:

import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns#安装anacoda后,利用anacoda的指令在安装的,见收藏夹kaggle中的帖子
import pylab  
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.decomposition import PCA
from pandas import Series,DataFrame
from tensorflow.python.framework import ops

1 数据预处理

1)利用pandas读入csv格式的文件:

# read  data from CSV file 
#train_data是一个DataFrame
train_data = pd.read_csv('C:/Users/FrankFang/Desktop/TianChiDATA/Train.csv')
train_data.info()#输出读入的csv数据的基本信息
print('train_data({0[0]},{0[1]})'.format(train_data.shape))#第一行的标题不被计入总行数
#默认输出矩阵的前5行, 必须放在一个jupyter输入格子的最后一句,才能显示出表格,用print (data.head())则可以,但是没有表格的形式
train_data.head()
输出:
train_data(500,8029)
  ID TOOL_ID 210X1 210X2 210X3 210X4 210X5 210X6 210X7 210X8 ... 750X1444 750X1445 750X1446 750X1447 750X1448 750X1449 750X1450 750X1451 750X1452 Y
0 ID001 N 102.05 0.465 0.27 1.430 67.45 4.620 -0.54 -1.05 ... 0.00072 0.00072 25.7 0.00072 0.00072 25.7 0.00072 0.00072 2.400000e+12 2.945079
1 ID002 M 100.95 0.805 0.22 3.477 62.08 3.412 -2.12 1.02 ... 0.00072 0.00072 25.5 0.00072 0.00072 25.5 0.00072 0.00072 2.400000e+12 2.955092
2 ID003 L 98.56 0.555 0.24 1.172 56.70 3.080 -2.25 0.88 ... 0.00064 0.00064 25.2 0.00064 0.00064 25.2 0.00064 0.00064 2.400000e+12 2.741264
3 ID004 M 100.35 0.901 0.22 3.631 62.25 3.949 -1.98 0.82 ... 0.00072 0.00072 26.4 0.00072 0.00072 26.4 0.00072 0.00072 2.400000e+12 2.799336
4 ID005 M 100.25 0.854 0.23 3.429 61.42 3.630 -1.89 1.02 ... 0.00072 0.00072 26.4 0.00072 0.00072 26.4 0.00072 0.00072 2.400000e+12 2.692093

5 rows × 8029 columns

2)使用random包随机重新排列个样本(重排矩阵的各个行)

#shutttle 各行数据
import random as rd  # 导入random模块,使用里面的sample函数

train_data_tem=train_data

#随机排列所有的training set:
train_data_tem.sample(frac=1) 

3)将非数字类型的列替换为数字

分析处理信息不完整的数据,如果某一个列中有空,则用该列的平均值来填补. 非数字类型的列: Tool,,Tool (#2), ,tool (#1), TOOL,TOOL (#1),TOOL (#2),, TOOL_ID,TOOL_ID (#1),TOOL_ID (#2),TOOL_ID (#3),

train_data["Tool"].loc[train_data["Tool"] == "A"] = 1.0
train_data["Tool"].loc[train_data["Tool"] == "B"] = -1.0

train_data["Tool (#2)"].loc[train_data["Tool (#2)"] == "A"] = 1.0
train_data["Tool (#2)"].loc[train_data["Tool (#2)"] == "B"] = 0.0
train_data["Tool (#2)"].loc[train_data["Tool (#2)"] == "C"] = -1.0


train_data["tool (#1)"].loc[train_data["tool (#1)"] == "P"] = 1.2
train_data["tool (#1)"].loc[train_data["tool (#1)"] == "Q"] = 1.0
train_data["tool (#1)"].loc[train_data["tool (#1)"] == "R"] = 0.8
train_data["tool (#1)"].loc[train_data["tool (#1)"] == "S"] = 0.6
train_data["tool (#1)"].loc[train_data["tool (#1)"] == "T"] = 0.4
train_data["tool (#1)"].loc[train_data["tool (#1)"] == "U"] = 0.2
train_data["tool (#1)"].loc[train_data["tool (#1)"] == "V"] = 0.0
train_data["tool (#1)"].loc[train_data["tool (#1)"] == "W"] = -0.2
train_data["tool (#1)"].loc[train_data["tool (#1)"] == "X"] = -0.4

train_data["TOOL"].loc[train_data["TOOL"] == "B"] = 1.0
train_data["TOOL"].loc[train_data["TOOL"] == "C"] = 0.0
train_data["TOOL"].loc[train_data["TOOL"] == "D"] = -1.0

train_data["TOOL (#1)"].loc[train_data["TOOL (#1)"] == "XY1"] = 1.0 
train_data["TOOL (#1)"].loc[train_data["TOOL (#1)"] == "YX1"] = -1.0


train_data["TOOL (#2)"].loc[train_data["TOOL (#2)"] == "A"] = 1.0
train_data["TOOL (#2)"].loc[train_data["TOOL (#2)"] == "B"] = -1.0
#print((train_data["TOOL (#2)"][7]))

train_data["TOOL_ID"].loc[train_data["TOOL_ID"] == "J"] = 1.0
train_data["TOOL_ID"].loc[train_data["TOOL_ID"] == "K"] = 0.7
train_data["TOOL_ID"].loc[train_data["TOOL_ID"] == "L"] = 0.4
train_data["TOOL_ID"].loc[train_data["TOOL_ID"] == "M"] = 0.1
train_data["TOOL_ID"].loc[train_data["TOOL_ID"] == "N"] = -0.2
train_data["TOOL_ID"].loc[train_data["TOOL_ID"] == "O"] = -0.5

train_data["TOOL_ID (#1)"].loc[train_data["TOOL_ID (#1)"] == "E"] = 1.0
train_data["TOOL_ID (#1)"].loc[train_data["TOOL_ID (#1)"] == "N"] = -1.0

train_data["TOOL_ID (#2)"].loc[train_data["TOOL_ID (#2)"] == "C"] = 1.0
train_data["TOOL_ID (#2)"].loc[train_data["TOOL_ID (#2)"] == "D"] = 0.0
train_data["TOOL_ID (#2)"].loc[train_data["TOOL_ID (#2)"] == "E"] = -1.0

train_data["TOOL_ID (#3)"].loc[train_data["TOOL_ID (#3)"] == "E0"] = 1.0
train_data["TOOL_ID (#3)"].loc[train_data["TOOL_ID (#3)"] == "N0"] = -1.0

4)将空单元格用其所在列的中位数进行替换
5)若某列的值全部相同则直接去掉该列

for i in range(1,8028 ):#i=1,2,...,8027
    train_data[ train_data.columns[i] ].fillna( train_data[ train_data.columns[i] ].median(), inplace=True )

#如果某一列中都为相同的数,则直接删除该列,如果是tool列不要删,因为后面还要使用“tool”检测tool列的列标
#
Dropped_columnNames=[]#保存被drop的列名字,同样需要对test data中的这些列进行drop
a=0
train_data_temp = train_data
for i in range(1,8028):
    CloumnName = train_data.columns[i]
    if (0 == train_data[CloumnName].max() - train_data[CloumnName].min() ) and (not("tool" in CloumnName.lower())):
        if a !=0:#保留一列
            train_data_temp = train_data_temp.drop([CloumnName], axis=1)
            Dropped_columnNames.append(CloumnName)
        a=a+1
        #print(CloumnName)
        #print(train_data[CloumnName].max() - train_data[CloumnName].min())
train_data = train_data_temp
print(a)    

      
train_data_rows = train_data.shape[0]#返回列数,,shape[0]返回行数 #包括第一列的id和最后一列的Y
train_data_colums = train_data.shape[1]#
print(train_data_rows,train_data_colums)

6)计算各个列(各个属性)与Y列(预测值)的相关系数的绝对值,并排序。取前50个或其他合适的数目。

#去掉train_data中的ID列
train_data = train_data.drop(["ID"], axis=1)

#计算各列(除了ID列)与Y列的相关系数
#
train_data.head()
train_data_coorY = train_data.corrwith(train_data["Y"]).abs()
train_data_coorY = train_data_coorY.sort_values(ascending = False)#从大到小排序,
#print(train_data_coorY)
#保存于Y列的相关系数排名前x的列:(注意train_data_coorY第一名为Y列本身)
Column_names_high_corr=[]
Num_selectd_colums=50
for i in range(1,Num_selectd_colums+1):
    if not("tool" in train_data_coorY.index[i].lower()):
        Column_names_high_corr.append(train_data_coorY.index[i])
    
#将所有的Tool列补上
for i in range(1,train_data_colums-1):
    CloumnName = train_data.columns[i]
    if "tool" in CloumnName.lower():
        Column_names_high_corr.append(CloumnName)

Num_selectd_colums=len(Column_names_high_corr)

#将"Y"补放在最后
Column_names_high_corr.append("Y")

#仅仅留train_data中Column_names_high_corr[]中保存的列:
print(train_data.shape)
train_data = train_data[Column_names_high_corr]
    
print(train_data.shape)
train_data.head()

另外一种,通过PCA来进行特征压缩的方法:

基于sklearn包:(前面已经导入)

m = PCA(n_components=0.9) #保留90%的数据信息

array_PCA = m.fit_transform(array) #array是原始矩阵,pandas的datafram。该步骤同时保留了这个pca模型。 array_PCA是压缩后的数据

#用上面的model来对新数据做PCA

NewArray_PCA = m.transform(NewArray)

 

7)按照80%和20%的比例将训练样本分成两部分,前80%用于训练模型,后20%用于测试训练出的模型的好坏,将train_data转换成numpy数组。

#将training data set分成两个部分:(去掉最后一列(Y))
train_data_part1=train_data.iloc[0:int(train_data_rows*0.8),0:train_data_colums-1]
train_data_part2=train_data.iloc[int(train_data_rows*0.8):train_data_rows,0:train_data_colums-1]

#将train_data_part1,train_data_part2转换为numpy数组类型,方便后续使用numpy 的broad casting
train_data_part1 = np.array(train_data_part1)
train_data_part2 = np.array(train_data_part2)
#print(train_data_part1[0:2,0:3])

train_data_part1_rows = train_data_part1.shape[0]#
train_data_part1_colums = train_data_part1.shape[1]#

train_data_part2_rows = train_data_part2.shape[0]#
train_data_part2_colums = train_data_part2.shape[1]#
print(type(train_data_part1))


print(train_data_part1_rows,train_data_part1_colums)
print(train_data_part2_rows,train_data_part2_colums)

8) 计算80%部分的样本的各列max, min, 均值,标准差,并用这些值对全部的labeled样本进行归一化: (data - 均值) /  标准差

#求train_data_part1_PCA各列的均值和最大值,最小值,并保存..train_data_PCA不包括ID列和Y列
train_data_part1_mean = train_data_part1.mean(axis=0).reshape(1,train_data_part1_colums)#数组按照列求平均
train_data_part1_max = train_data_part1.max(axis=0).reshape(1,train_data_part1_colums)#数组按照列求max
train_data_part1_min = train_data_part1.min(axis=0).reshape(1,train_data_part1_colums)#数组按照列求min
train_data_part1_maxMinumMin = train_data_part1_max-train_data_part1_min#求各列的max-min值并保存
train_data_part1_maxMinumMin = train_data_part1_maxMinumMin + 0.0001#防止为零

train_data_part1_std = train_data_part1.std(axis=0).reshape(1,train_data_part1_colums)+0.0001#数组按照列求std

print(train_data_part1_mean.shape)

#对各列进行rescale: x=(x-mean)/(max-min)
print(train_data_part1_mean.shape)
print(type(train_data_part1))
train_data_part1 = train_data_part1 - train_data_part1_mean###np array类型的才能broad casting!!!!
#train_data_part1 = train_data_part1 / train_data_part1_maxMinumMin#利用python的broadcasting 
train_data_part1 = train_data_part1 / train_data_part1_std
print(train_data_part1.shape)

train_data_part2 = train_data_part2 - train_data_part1_mean###np array类型的才能broad casting!!!!
#train_data_part2 = train_data_part2 / train_data_part1_maxMinumMin#利用python的broadcasting
train_data_part2 = train_data_part2 / train_data_part1_std
print(train_data_part2.shape)

9)读取100个unlabeled数据,并做与上面同样的预处理。


2 基于TensorFlow包搭建神经网络

大体思路是,先建立起来前向传播公式,公式中参与运算的是tensorflow的placeholder。 然后,在实际训练中用实际的数据来替换掉placeholder。

1)函数:为X,Y,dropout rate,learning rate 生成placeholder,这些placeholder参与公式运算,矩阵的话需要有确定的列数。

def create_placeholders(Feature_Num):
    #定义placeholder
    x = tf.placeholder('float', shape=[None, Feature_Num])
    # labels
    y = tf.placeholder('float', shape=[None, 1])
    #keep_prob用来表示神经元的输出概率
    keep_prob=tf.placeholder(tf.float32)
    keep_learnRate=tf.placeholder(tf.float32)
    return x,y,keep_prob,keep_learnRate

2) 函数: 参数初始化,使用的是tr.variable,初始化各层之间的参数矩阵以及bias列向量

def initialize_parameters(Feature_Num,Layer1_NodeNum,Layer2_NodeNum,Layer3_NodeNum,Layer4_NodeNum,Layer5_NodeNum,Layer6_NodeNum):
    """
    Initializes weight parameters to build a neural network with tensorflow. The shapes are:
  
    Returns:
    parameters -- a dictionary of tensors containing W1, W2
    """
    #print("initialize_parameters() is called!")
    #print(Feature_Num,Layer1_NodeNum,Layer2_NodeNum,Layer3_NodeNum)
#     W1 = tf.get_variable("W1", [Feature_Num,Layer1_NodeNum], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
#     W2 = tf.get_variable("W2", [Layer1_NodeNum,Layer2_NodeNum], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
#     W3 = tf.get_variable("W3", [Layer2_NodeNum,Layer3_NodeNum], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
#     W4 = tf.get_variable("W4", [Layer3_NodeNum,Layer4_NodeNum], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
    
#     Bias1 = tf.get_variable("Bias1", [Layer1_NodeNum], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
#     Bias2 = tf.get_variable("Bias2", [Layer2_NodeNum], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
#     Bias3 = tf.get_variable("Bias3", [Layer3_NodeNum], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
#     Bias4 = tf.get_variable("Bias4", [Layer4_NodeNum], initializer = tf.contrib.layers.xavier_initializer(seed = 0))

    tf.set_random_seed(1)  
    W1 =tf.Variable(tf.truncated_normal([Feature_Num,Layer1_NodeNum], stddev=0.1))
    W2 =tf.Variable(tf.truncated_normal([Layer1_NodeNum,Layer2_NodeNum], stddev=0.1))
    W3 =tf.Variable(tf.truncated_normal([Layer2_NodeNum,Layer3_NodeNum], stddev=0.1))
    W4 =tf.Variable(tf.truncated_normal([Layer3_NodeNum,Layer4_NodeNum], stddev=0.1))
    W5 =tf.Variable(tf.truncated_normal([Layer4_NodeNum,Layer5_NodeNum], stddev=0.1))
    W6 =tf.Variable(tf.truncated_normal([Layer5_NodeNum,Layer6_NodeNum], stddev=0.1))
    
    Bias1 =tf.Variable(tf.truncated_normal([Layer1_NodeNum], stddev=0.1))
    Bias2 =tf.Variable(tf.truncated_normal([Layer2_NodeNum], stddev=0.1))
    Bias3 =tf.Variable(tf.truncated_normal([Layer3_NodeNum], stddev=0.1))
    Bias4 =tf.Variable(tf.truncated_normal([Layer4_NodeNum], stddev=0.1))
    Bias5 =tf.Variable(tf.truncated_normal([Layer5_NodeNum], stddev=0.1))
    Bias6 =tf.Variable(tf.truncated_normal([Layer6_NodeNum], stddev=0.1))
    
    parameters = {"W1": W1,"W2": W2,"W3": W3,"W4": W4,"W5": W5,"W6": W6,"Bias1": Bias1,"Bias2": Bias2,"Bias3": Bias3,"Bias4": Bias4,"Bias5": Bias5,"Bias6": Bias6}

    return parameters

3)函数:前向传播函数

 入参:两个placeHolder,还有parameters; 返回预测值

def forward_propagation(x, parameters,keep_prob):
    """
     Arguments:
    x -- input dataset placeholder, of shape (input size, FeatureNum)
    parameters -- python dictionary containing your parameters "W1", "W2"..
                  the shapes are given in initialize_parameters
    keep_prob--dropout related
    
    Returns:
    Z3 -- the output of the last LINEAR unit
     """
    # Retrieve the parameters from the dictionary "parameters" 
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']
    W4 = parameters['W4']
    W5 = parameters['W5']
    W6 = parameters['W6']
    
    Bias1 = parameters["Bias1"]
    Bias2 = parameters["Bias2"]
    Bias3 = parameters["Bias3"]
    Bias4 = parameters["Bias4"]
    Bias5 = parameters["Bias5"]
    Bias6 = parameters["Bias6"]
    
    Z1=tf.matmul(x,W1) + Bias1#x(?,1) w1(1,12)
    #A1=tf.nn.tanh(Z1)
    #A1=tf.nn.relu(Z1)
    A1 = Z1
    A1_drop=tf.nn.dropout(A1,keep_prob)
    
    Z2=tf.matmul(A1_drop,W2) + Bias2#A1_drop(?,12) w2(12,1)
    #A2=tf.nn.tanh(Z2)
    #A2=tf.nn.relu(Z2)
    A2 = Z2
    A2_drop=tf.nn.dropout(A2,keep_prob)
    
    Z3=tf.matmul(A2_drop,W3) + Bias3
    A3=Z3
    #A3=tf.nn.tanh(Z3)
    #A3=tf.nn.relu(Z3)
    A3_drop=tf.nn.dropout(A3,keep_prob)
    
    Z4=tf.matmul(A3_drop,W4) + Bias4
    A4=Z4 
    #A4=tf.nn.relu(Z4)
    #A4=tf.nn.tanh(Z4)
    A4_drop=tf.nn.dropout(A4,keep_prob)
    
    Z5=tf.matmul(A4_drop,W5) + Bias5
    A5=Z5 
    #A5=tf.nn.relu(Z5)
    #A5=tf.nn.tanh(Z5)
    A5_drop=tf.nn.dropout(A5,keep_prob)
    
    Z6=tf.matmul(A5_drop,W6) + Bias6
    A6=Z6 
    #A6=tf.nn.relu(Z6)
    
    return A6

4)函数:损失函数

入参:预测值和实际的y值,都是inputsize*1的placeholder

返回:数值

def compute_cost(predict,y):
    cost= tf.reduce_mean( tf.square(y-predict))
    #cost= tf.reduce_mean( predict-predict)
    #cost= 0
    return cost

5)函数:模型

运行这个model()函数就可以得到一个模型,入参中的learning_rate, num_epochs,minibatch_size都是可以调节的参数,可以手动调节,也可以另外写一个函数,自动用不同的入参去调用model函数,寻找更好的参数。

该函数可以将训练过程中的cost打印出来,观察训练过程。

def model(X_train, Y_train, X_test, Y_test,X_NeedPredi, learning_rate = 0.8,
          num_epochs = 10, minibatch_size = 10, print_cost = True):
    #适用于X为二维数据,一行代表一个sample,Y为一列 
    #X_test, Y_test用于对训练出的model进行测试
    
    #ops.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    tf.reset_default_graph()
    
    (X_train_rows, X_train_columns) = X_train.shape
    (Y_train_rows, Y_train_columns) = Y_train.shape
    assert(X_train_rows == Y_train_rows)
    (X_test_rows, X_test_columns) = X_test.shape
    (Y_test_rows, Y_test_columns) = Y_test.shape
    assert(X_test_rows == Y_test_rows)
    assert(X_train_columns == X_test_columns)
    
    (X_NeedPredi_rows, X_NeedPredi_columns) = X_NeedPredi.shape
    assert(X_train_columns == X_NeedPredi_columns)
    
    #########################################################
    mini_batch_num = int(X_train_rows/minibatch_size)
    
    #########################################################
    Feature_Num = X_train_columns
    x, y, keep_prob,keep_learnRate = create_placeholders(Feature_Num)#Feature_Num
    #parameters = initialize_parameters(Feature_Num,200,100,80,50,25,1)
    parameters = initialize_parameters(Feature_Num,50,50,20,10,5,1)
    
    A3 = forward_propagation(x, parameters,keep_prob)
    predict = A3
    cost = compute_cost(predict, y)
    cost_v2 = compute_cost_v2(predict, y)
    
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    #########################################################
    cost_validations = []
    cost_stepValues=[]
    x_range = []
    
    ########################################################
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)#在session开始时,需要初始化前面的各个函数创建的变量
        #print("W1 = " + str(parameters["W1"].eval()))
        for epoch in range(num_epochs):
            New_learnRate = learning_rate*(0.99**epoch)
            
            for i in range(mini_batch_num):
                
                batch_xs = X_train[i*minibatch_size : (i+1)*minibatch_size , 0:Feature_Num]
                batch_ys = Y_train[i*minibatch_size : (i+1)*minibatch_size]
                
                if print_cost == True and (i%200) == 0:  
                    validation_cost = cost.eval( feed_dict={ x:X_test, y:Y_test,keep_prob:1.0 } )
                    cost_current = cost.eval( feed_dict={ x:batch_xs,y:batch_ys,keep_prob:1.0 } )

                    cost_validations.append(validation_cost)
                    cost_stepValues.append(cost_current)
                    x_range.append(i + epoch*mini_batch_num)

                    #print("Iter= " + str(i+epoch*mini_batch_num) + ",Current training batech's cost= " + str(cost_current) + ",cost_validation= " + str(validation_cost))

                sess.run(optimizer,feed_dict={x:batch_xs,y:batch_ys,keep_prob:0.2,keep_learnRate:New_learnRate}) 
        
#         print("W1 = " + str(parameters["W1"].eval()))
#         print("Bias1 = " + str(parameters["Bias1"].eval()))
#         print("W2 = " + str(parameters["W2"].eval()))
#         print("Bias2 = " + str(parameters["Bias2"].eval()))
#         print("W3 = " + str(parameters["W3"].eval()))
#         print("Bias3 = " + str(parameters["Bias3"].eval()))
        if print_cost == True :  
            plt.plot(x_range, cost_validations,'-b', label='validation set')
            plt.plot(x_range, cost_stepValues,'-r', label='setp cost')
            plt.legend(loc='lower right', frameon=False)
            plt.ylim(ymax = 0.6, ymin = 0)
            plt.ylabel('cost')
            plt.xlabel('step')
            #plt.plot(x_range, cost_values,'-b')
            plt.show()
        
        #############利用已经训练出来的模型对X_test数据进行预测!并将预测结果与Y_test比较,计算出cost,以进行评估。太激动了#################################
       
        predict_Y_test = np.zeros( X_test_rows ).reshape( X_test_rows,1 )
        
        minibatch_size=10
        for i in range( int(X_test_rows/minibatch_size) ): #对X_test数据进行预测
            #print(i)
            #print(X_NeedPredi[i*1 : (i+1)*1,0:Feature_Num])
            predict_Y_test[i*minibatch_size : (i+1)*minibatch_size] = predict.eval(feed_dict={x: X_test[i*minibatch_size : (i+1)*minibatch_size,0:Feature_Num],keep_prob: 1.0})
        #print('predict_testA({0})'.format(len(predict_test)))
        
        cost_predict_Y_test = cost.eval( feed_dict={ x:X_test,y:Y_test,keep_prob:1.0 } )
        #print("learning_rate="+str(learning_rate)+"minibatch_size="+str(minibatch_size)+"cost_predict_Y_test=") 
        print(cost_predict_Y_test)
        #print(predict_Y_test)
        
        ############利用模型对没有label的数据进行预测##############################
#         predict_Y = np.zeros( X_NeedPredi_rows ).reshape( X_NeedPredi_rows,1 )
        
#         minibatch_size=10
#         for i in range( int(X_NeedPredi_rows/minibatch_size) ):
#             #print(i)
#             #print(X_NeedPredi[i*1 : (i+1)*1,0:Feature_Num])
#             predict_Y[i*minibatch_size : (i+1)*minibatch_size] = predict.eval(feed_dict={x: X_NeedPredi[i*minibatch_size : (i+1)*minibatch_size,0:Feature_Num],keep_prob: 1.0})
#         print('predict_Y({0})'.format(len(predict_Y)))
#         print("predicted labels for the unbled data:predict_Y=")
#         print(predict_Y)
        return  cost_predict_Y_test
        #return cost_predict_Y_test

 

 

 

 

 

 

你可能感兴趣的:(机器学习)