利用Sequential模型构造一个二层CNN+MLP的多分类任务。任务描述,有一个有向网络,现在需要对网络中的节点进行分类。这里节点的类别就是节点本身,对同一个节点进行多次随机游走得到不同的随机游走序列表示。通过构造的深度学习模型能正确分辨出属于同一节点的不同游走序列。
网络构建,读取一个邻接表文件构建有向图。
class DiGraphs(object):
def __init__(self, data_path):
self.data_path = data_path
def getDiGraph(self):
di_graph = nx.DiGraph()
temp_list = np.loadtxt(self.data_path)
di_graph.add_edges_from(temp_list)
return di_graph
随即游走序列产生,给定一个网络和种子节点,设定好需要产生序列的长度,得到一个节点组成的有序序列。
class NodeSequence(object):
def __init__(self, graph_net, source_node, sequence_len):
self.graph_net = graph_net
self.source_node = source_node
self.sequence_len = sequence_len
self.node_sequence = []
def getSequence(self):
current_node = self.source_node
for i in range(self.sequence_len):
self.node_sequence.append(current_node)
neibor_list = list(self.graph_net.neighbors(current_node))
if 0 == len(neibor_list):
current_node = self.source_node
else:
rand_num = np.random.random()
current_node = neibor_list[int(rand_num * len(neibor_list))]
return np.array(self.node_sequence)
构建神经网络模型双层CNN+MLP。
import GraphNet
from keras.models import Sequential
from keras.layers import Dense, Activation, Convolution2D, Flatten, MaxPooling2D, Dropout
from keras import optimizers
import RandomWalk
import numpy as np
from keras.utils.np_utils import to_categorical
path = r'data/email-Eu-core.txt'
directed_graph = GraphNet.DiGraphs(path).getDiGraph()
label_num = len(directed_graph.nodes())
data = []
label = []
for i in range(label_num):
for j in range(100):
label.append(i)
data.append(RandomWalk.NodeSequence(directed_graph, i, 50).getSequence())
data = np.array(data).reshape(-1, 50)
label = to_categorical(label, num_classes=label_num)
data = data.reshape(-1, 50, 1, 1)
# input_shape = Input(shape=(100, 20, 1))
# filter = 10
model = Sequential()
model.add(Convolution2D(
filters=256,
kernel_size=(5, 1),
input_shape=(50, 1, 1),
border_mode='same',
))
model.add(Dropout(0.1))
model.add(Activation('relu'))
model.add(MaxPooling2D(
pool_size=(2, 1)
))
model.add(Convolution2D(
filters=256,
kernel_size=(3, 1),
border_mode='same',
))
model.add(Dropout(0.1))
model.add(Activation('tanh'))
model.add(MaxPooling2D(
pool_size=(2, 1)
))
model.add(Flatten())
model.add(Dense(label_num))
model.add(Activation('softmax'))
print(model.summary())
sgd = optimizers.SGD(lr=0.03, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
model.fit(data, label, epochs=200)
注:这里在本次实验过程进行简单的总结。在搭建属于自己的CNN的时候出现了一下小小的“不愉快”,过程不是那么顺利,主要式源于自己的keras中一维CNN搭建的不熟悉以及其中相关参数的不理解。首先,一维CNN相关设置,filters这个参数表示有多少种卷积核,kernel_size表示的卷积核大小设置,input_shape表示的输入的形状。
将训练出来的模型进行保存
with open('mode_jason50.json', 'w') as jason_file:
jason_file.write(model.to_json())
model.save_weights('model_weight50')
注:这里保存模型的方式有两种。1.model.save()方法进行保存,这样式将模型的结构和参数都一起保存,加载的时候也是一起加载的;2.将模型结构和参数权重分开保存,这样做的好处是能够在加载模型之后对模型的每一层进行控制,控制每一层参数的权重是否更新。这里,选择的就是第二种保存方式
加载模型
from keras.models import model_from_json, load_model
from keras.layers import Dense, Activation, Convolution2D, Flatten, MaxPooling2D, Input
import dataProcing
import RandomWalk
import GraphNet
import numpy as np
from keras.utils.np_utils import to_categorical
from keras import optimizers
#
# model = load_model('model/ClassifyBySelfModel200')
jason_file = open('model/mode_jason50.json', 'r')
load_model_jason = jason_file.read()
jason_file.close()
model = model_from_json(load_model_jason)
model.load_weights('model/model_weight50')
对加载的模型进行控制,控制加载模型的两层CNN中参数不进行新一轮训练中的参数更新,对原来的MLP层删除,加载新的MLP层进行新的分类
model.pop()
model.pop()
print(model.summary())
model.layers[0].trainable = False
model.layers[4].trainable = False
print(model.summary())
model.add(Dense(18, name='classify'))
model.add(Activation('softmax', name='softmax'))
print(model.summary())
path = r'data/email-Eu-core.txt'
directed_graph = GraphNet.DiGraphs(path).getDiGraph()
train_data = []
train_label_R = []
train_node, test_node, train_label, test_label = dataProcing.DataGeneration(
'data/email-Eu-core-department-labels.txt').getTrainTestData()
index = 0
for i in train_node:
for j in range(10):
train_data.append(RandomWalk.NodeSequence(directed_graph, i, 50).getSequence())
train_label_R.append(train_label[index])
index += 1
train_data = np.array(train_data).reshape(-1, 50)
train_label_R = to_categorical(train_label_R, num_classes=18)
train_data = train_data.reshape(-1, 50, 1, 1)
sgd = optimizers.SGD(lr=0.03, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
model.fit(train_data, train_label_R, epochs=200)
index = 0
test_data = []
test_label_R = []
for i in test_node:
for j in range(1):
test_data.append(RandomWalk.NodeSequence(directed_graph, i, 50).getSequence())
test_label_R.append(test_label[index])
index += 1
test_data = np.array(test_data).reshape(-1, 50)
test_label_R = to_categorical(test_label_R, num_classes=18)
test_data = test_data.reshape(-1, 50, 1, 1)
loss, accuracy = model.evaluate(test_data, test_label_R)
print('test_loss:', loss)
print('test_accuracy:', accuracy)
with open('mode_jason_Fin50.json', 'w') as jason_file:
jason_file.write(model.to_json())
model.save_weights('model_weight_Fin50')
# model.save('ClassifyBySelfModel200')
f = open('result_Fin50.txt', 'w')
f.write('test_loss:' + str(loss) + '\n')
f.write('test_accuracy:' + str(accuracy) + '\n')
f.close()