简单的例子:
Layer
层,主要作用是:对每层的name做了命名,还用一个参数决定是否做logclass Layer(object):
def __init__(self, **kwargs):
allowed_kwargs = {'name', 'logging'}
for kwarg in kwargs.keys():
assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
name = kwargs.get('name')
if not name:
# return class name
layer = self.__class__.__name__.lower()
# 形成了一个新的类名 : trick
name = layer + '_' + str(get_layer_uid(layer))
self.name = name
self.vars = {}
# return False when get failed.
logging = kwargs.get('logging', False)
self.logging = logging
self.sparse_inputs = False
def _call(self, inputs):
def __call__(self, inputs):
def _log_vars(self):
__call__
的作用让 Layer
的实例成为可调用对象;
Layer
继承得到denseNet
,考虑到的参数:train.py
定义为:# Define placeholders
placeholders = {
'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
'labels_mask': tf.placeholder(tf.int32),
'dropout': tf.placeholder_with_default(0., shape=()),
'num_features_nonzero': tf.placeholder(tf.int32) # helper variable for sparse dropout
}
class Dense(Layer):
"""Dense layer."""
def __init__(self, input_dim, output_dim, placeholders, dropout=0., sparse_inputs=False,
act=tf.nn.relu, bias=False, featureless=False, **kwargs):
super(Dense, self).__init__(**kwargs)
if dropout:
self.dropout = placeholders['dropout']
else:
self.dropout = 0.
self.act = act
self.sparse_inputs = sparse_inputs
self.featureless = featureless
self.bias = bias
# helper variable for sparse dropout
self.num_features_nonzero = placeholders['num_features_nonzero']
with tf.variable_scope(self.name + '_vars'):
self.vars['weights'] = glorot([input_dim, output_dim],
name='weights')
if self.bias:
self.vars['bias'] = zeros([output_dim], name='bias')
if self.logging:
self._log_vars()
def _call(self, inputs):
x = inputs
# dropout
if self.sparse_inputs:
x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
else:
x = tf.nn.dropout(x, 1-self.dropout)
# transform
output = dot(x, self.vars['weights'], sparse=self.sparse_inputs)
# bias
if self.bias:
output += self.vars['bias']
return self.act(output)
重写了_call
函数,其中对稀疏矩阵做 drop_out
:
def sparse_dropout(x, keep_prob, noise_shape):
"""Dropout for sparse tensors."""
random_tensor = keep_prob
random_tensor += tf.random_uniform(noise_shape)
dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
pre_out = tf.sparse_retain(x, dropout_mask)
return pre_out * (1./keep_prob)
还有对__init__
中网络参数的初始化函数:
def glorot(shape, name=None):
"""Glorot & Bengio (AISTATS 2010) init."""
init_range = np.sqrt(6.0/(shape[0]+shape[1]))
initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)
return tf.Variable(initial, name=name)
def zeros(shape, name=None):
"""All zeros."""
initial = tf.zeros(shape, dtype=tf.float32)
return tf.Variable(initial, name=name)
接下来是从 Layer
继承下来得到图卷积网络,与denseNet
的唯一差别是_call
函数不一样,还有一个self.support = placeholders['support']
的初始化:
class GraphConvolution(Layer):
"""Graph convolution layer."""
def __init__(self, input_dim, output_dim, placeholders, dropout=0.,
sparse_inputs=False, act=tf.nn.relu, bias=False,
featureless=False, **kwargs):
....
self.support = placeholders['support']
....
其中
def _call(self, inputs):
x = inputs
# dropout
if self.sparse_inputs:
x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
else:
x = tf.nn.dropout(x, 1-self.dropout)
# convolve
supports = list()
for i in range(len(self.support)):
if not self.featureless:
pre_sup = dot(x, self.vars['weights_' + str(i)],
sparse=self.sparse_inputs)
else:
pre_sup = self.vars['weights_' + str(i)]
support = dot(self.support[i], pre_sup, sparse=True)
supports.append(support)
output = tf.add_n(supports)
# bias
if self.bias:
output += self.vars['bias']
return self.act(output)
其中的support
是邻接矩阵的一个变化:
if FLAGS.model == 'gcn':
support = [preprocess_adj(adj)]
num_supports = 1
model_func = GCN
self.layers
和 self.activations
建立序列模型,还有init
中的其他比如loss、accuracy、optimizer、opt_op等。class Model(object):
def __init__(self, **kwargs):
allowed_kwargs = {'name', 'logging'}
for kwarg in kwargs.keys():
assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
name = kwargs.get('name')
if not name:
name = self.__class__.__name__.lower()
self.name = name
logging = kwargs.get('logging', False)
self.logging = logging
self.vars = {}
self.placeholders = {}
self.layers = []
self.activations = []
self.inputs = None
self.outputs = None
self.loss = 0
self.accuracy = 0
self.optimizer = None
self.opt_op = None
def _build(self):
raise NotImplementedError
def build(self):
""" Wrapper for _build() """
with tf.variable_scope(self.name):
self._build()
# Build sequential layer model
self.activations.append(self.inputs)
for layer in self.layers:
hidden = layer(self.activations[-1])
self.activations.append(hidden)
self.outputs = self.activations[-1]
# Store model variables for easy access
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
self.vars = {var.name: var for var in variables}
# Build metrics
self._loss()
self._accuracy()
self.opt_op = self.optimizer.minimize(self.loss)
def predict(self):
def _loss(self):
def _accuracy(self):
def save(self, sess=None):
def load(self, sess=None):
然后继承Model的MLP
,主要是重写了基类中没有实现的函数;计算了网络第一层的权重衰减L2损失,因为这是半监督学习,还计算的掩码交叉熵是:
def masked_softmax_cross_entropy(preds, labels, mask):
"""Softmax cross-entropy loss with masking."""
loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels)
mask = tf.cast(mask, dtype=tf.float32)
mask /= tf.reduce_mean(mask)
loss *= mask
return tf.reduce_mean(loss)
其中 mask
是一个索引向量,值为1表示该位置的标签在训练数据中是给定的;比如100个数据中训练集已知带标签的数据有50个, 那么计算损失的时候,loss
乘以的 mask
是以前的2倍,等于 loss
在未带标签的地方都乘以0没有了,而在带标签的地方损失变成了2倍;即只对带标签的样本计算损失。
注:loss的shape与mask的shape相同,等于样本的数量:(None,),所以 loss *= mask
是向量点乘。
对于准确率也是一样的,只对在验证集或者测试集上的标签位置计算准确率,并且准确率扩大了tf.reduce_mean(mask)
的倒数倍。
def masked_accuracy(preds, labels, mask):
"""Accuracy with masking."""
correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1))
accuracy_all = tf.cast(correct_prediction, tf.float32)
mask = tf.cast(mask, dtype=tf.float32)
mask /= tf.reduce_mean(mask)
accuracy_all *= mask
return tf.reduce_mean(accuracy_all)
class MLP(Model):
def __init__(self, placeholders, input_dim, **kwargs):
super(MLP, self).__init__(**kwargs)
self.inputs = placeholders['features']
self.input_dim = input_dim
# self.input_dim = self.inputs.get_shape().as_list()[1] # To be supported in future Tensorflow versions
self.output_dim = placeholders['labels'].get_shape().as_list()[1]
self.placeholders = placeholders
self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
self.build()
def _loss(self):
# Weight decay loss
for var in self.layers[0].vars.values():
self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var)
# Cross entropy error
self.loss += masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'],
self.placeholders['labels_mask'])
def _accuracy(self):
self.accuracy = masked_accuracy(self.outputs, self.placeholders['labels'],
self.placeholders['labels_mask'])
def _build(self):
self.layers.append(Dense(input_dim=self.input_dim,
output_dim=FLAGS.hidden1,
placeholders=self.placeholders,
act=tf.nn.relu,
dropout=True,
sparse_inputs=True,
logging=self.logging))
self.layers.append(Dense(input_dim=FLAGS.hidden1,
output_dim=self.output_dim,
placeholders=self.placeholders,
act=lambda x: x,
dropout=True,
logging=self.logging))
def predict(self):
return tf.nn.softmax(self.outputs)
继承Model的卷机模型 GCN
:
class GCN(Model):
def __init__(self, placeholders, input_dim, **kwargs):
super(GCN, self).__init__(**kwargs)
self.inputs = placeholders['features']
self.input_dim = input_dim
# self.input_dim = self.inputs.get_shape().as_list()[1] # To be supported in future Tensorflow versions
self.output_dim = placeholders['labels'].get_shape().as_list()[1]
self.placeholders = placeholders
self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
self.build()
def _loss(self):
# Weight decay loss
for var in self.layers[0].vars.values():
self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var)
# Cross entropy error
self.loss += masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'],
self.placeholders['labels_mask'])
def _accuracy(self):
self.accuracy = masked_accuracy(self.outputs, self.placeholders['labels'],
self.placeholders['labels_mask'])
def _build(self):
self.layers.append(GraphConvolution(input_dim=self.input_dim,
output_dim=FLAGS.hidden1,
placeholders=self.placeholders,
act=tf.nn.relu,
dropout=True,
sparse_inputs=True,
logging=self.logging))
self.layers.append(GraphConvolution(input_dim=FLAGS.hidden1,
output_dim=self.output_dim,
placeholders=self.placeholders,
act=lambda x: x,
dropout=True,
logging=self.logging))
def predict(self):
return tf.nn.softmax(self.outputs)
def load_data(dataset_str):
"""
Loads input data from gcn/data directory
ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
(a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
object;
ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.
All objects above must be saved using python pickle module.
:param dataset_str: Dataset name
:return: All data input files loaded (as well the training/test data).
"""
names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []
for i in range(len(names)):
with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
if sys.version_info > (3, 0):
#
objects.append(pkl.load(f, encoding='latin1'))
else:
objects.append(pkl.load(f))
# x.shape:(140, 1433); y.shape:(140, 7);tx.shape:(1000, 1433);ty.shape:(1708, 1433);
# allx.shape:(1708, 1433);ally.shape:(1708, 7)
x, y, tx, ty, allx, ally, graph = tuple(objects)
test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
test_idx_range = np.sort(test_idx_reorder)
if dataset_str == 'citeseer':
# Fix citeseer dataset (there are some isolated nodes in the graph)
# Find isolated nodes, add them as zero-vecs into the right position
test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
tx_extended[test_idx_range-min(test_idx_range), :] = tx
tx = tx_extended
ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
ty_extended[test_idx_range-min(test_idx_range), :] = ty
ty = ty_extended
features = sp.vstack((allx, tx)).tolil()
features[test_idx_reorder, :] = features[test_idx_range, :]
adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
# labels.shape:(2708, 7)
labels = np.vstack((ally, ty))
labels[test_idx_reorder, :] = labels[test_idx_range, :]
# len(list(idx_val)) + len(list(idx_train)) + len(idx_test) = 1640
idx_test = test_idx_range.tolist()
idx_train = range(len(y))
idx_val = range(len(y), len(y)+500)
# train_mask.shape : (2708,) , 指示坐标的函数,表明坐标为true的位置将被取出来使用
train_mask = sample_mask(idx_train, labels.shape[0])
val_mask = sample_mask(idx_val, labels.shape[0])
test_mask = sample_mask(idx_test, labels.shape[0])
# y_train y_val y_test .shape : (2708, 7)
# 替换了true位置,即这些位置的labels是已知,其他的是未知(zeros)
y_train = np.zeros(labels.shape)
y_val = np.zeros(labels.shape)
y_test = np.zeros(labels.shape)
y_train[train_mask, :] = labels[train_mask, :]
y_val[val_mask, :] = labels[val_mask, :]
y_test[test_mask, :] = labels[test_mask, :]
return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
还有为什么处理邻接矩阵还有属性矩阵,跟谱图卷积的理论有关,目的是要把周围节点的特征和自身节点的特征都捕捉到,同时避免不同节点间度的不均衡带来的问题,还有加入属性,以及如何半监督做考虑:
def preprocess_features(features):
"""Row-normalize feature matrix and convert to tuple representation"""
rowsum = np.array(features.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
# dot product
features = r_mat_inv.dot(features)
return sparse_to_tuple(features)
def normalize_adj(adj):
"""Symmetrically normalize adjacency matrix."""
# 图的归一化拉普拉斯矩阵
adj = sp.coo_matrix(adj)
rowsum = np.array(adj.sum(1))
d_inv_sqrt = np.power(rowsum, -0.5).flatten()
d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
def preprocess_adj(adj):
"""Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
return sparse_to_tuple(adj_normalized)
Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)
Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'cora', 'Dataset string.') # 'cora', 'citeseer', 'pubmed'
...
Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)
Some preprocessing
features = preprocess_features(features)
...
Define placeholders
placeholders = {
'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
'labels_mask': tf.placeholder(tf.int32),
'dropout': tf.placeholder_with_default(0., shape=()),
'num_features_nonzero': tf.placeholder(tf.int32) # helper variable for sparse dropout
}
Create model
Initialize session
Define model evaluation function
def evaluate(features, support, labels, mask, placeholders):
t_test = time.time()
feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
return outs_val[0], outs_val[1], (time.time() - t_test)
Init variables
Train model :1. Construct feed dictionary 2. Training step 3. Validation 4. Print results
Testing