欢迎关注公众号与头条号:极意AI,转发支持5秒后获得锦鲤~
这篇文章直接上代码,对于图神经网络的理论还没有整理完毕,这是第一版的tensorflow2.0 GCN实现。谷歌今年推出tf2.0后,建议我们还是快速更新代码,因为这次API的变化确实很大!另外后续我也会发表pytorch版本的GCN实例,敬请关注。后面直播开课后大家可以去听听,不求打赏,只求推广~
graph.py
import tensorflow as tf
from tensorflow.keras import activations, initializers, constraints
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Layer
import tensorflow.keras.backend as K
class GraphConvolution(Layer):
"""Basic graph convolution layer as in https://arxiv.org/abs/1609.02907"""
def __init__(self, units, support=1,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(GraphConvolution, self).__init__(**kwargs)
self.units = units
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.supports_masking = True
self.support = support
assert support >= 1.0
def compute_output_shape(self, input_shapes):
features_shape = input_shapes[0]
output_shape = (features_shape[0], self.units)
return output_shape # (batch_size, output_dim)
def build(self, input_shapes):
features_shape = input_shapes[0]
assert len(features_shape) == 2
input_dim = features_shape[1]
self.kernel = self.add_weight(shape=(input_dim * self.support,
self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.built = True
# core code
def call(self, inputs, mask=None):
features = inputs[0]
basis = inputs[1:] # this is a list
supports = list()
for i in range(self.support):
# A * X
supports.append(K.dot(basis[i], features))
supports = K.concatenate(supports, axis=1)
# A * X * W
output = K.dot(supports, self.kernel)
if tf.is_tensor(self.bias) :
output += self.bias
return self.activation(output)
def get_config(self):
config = {'units': self.units,
'support': self.support,
'activation': activations.serialize(self.activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(
self.kernel_initializer),
'bias_initializer': initializers.serialize(
self.bias_initializer),
'kernel_regularizer': regularizers.serialize(
self.kernel_regularizer),
'bias_regularizer': regularizers.serialize(
self.bias_regularizer),
'activity_regularizer': regularizers.serialize(
self.activity_regularizer),
'kernel_constraint': constraints.serialize(
self.kernel_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint)
}
base_config = super(GraphConvolution, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
utils.py
from __future__ import print_function
import scipy.sparse as sp
import numpy as np
from scipy.sparse.linalg.eigen.arpack import eigsh, ArpackNoConvergence
def encode_onehot(labels):
classes = set(labels)
classes_dict = {c: np.identity(len(classes))[i, :] for i, c in enumerate(classes)}
labels_onehot = np.array(list(map(classes_dict.get, labels)), dtype=np.int32)
return labels_onehot
def load_data(path="./data/cora/", dataset="cora"):
"""Load citation network dataset (cora only for now)"""
print('Loading {} dataset...'.format(dataset))
idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), dtype=np.dtype(str))
features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
labels = encode_onehot(idx_features_labels[:, -1])
# build graph
idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
idx_map = {j: i for i, j in enumerate(idx)}
edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), dtype=np.int32)
edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
dtype=np.int32).reshape(edges_unordered.shape)
adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
shape=(labels.shape[0], labels.shape[0]), dtype=np.float32)
# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
print('Dataset(adj) has {} nodes, {} edges, {} features.'.format(adj.shape[0], edges.shape[0], features.shape[1]))
return features.todense(), adj, labels
def normalize_adj(adj, symmetric=True):
if symmetric:
d = sp.diags(np.power(np.array(adj.sum(1)), -0.5).flatten(), 0)
a_norm = adj.dot(d).transpose().dot(d).tocsr()
else:
d = sp.diags(np.power(np.array(adj.sum(1)), -1).flatten(), 0)
a_norm = d.dot(adj).tocsr()
return a_norm
def preprocess_adj(adj, symmetric=True):
adj = adj + sp.eye(adj.shape[0])
adj = normalize_adj(adj, symmetric)
return adj
def sample_mask(idx, l):
mask = np.zeros(l)
mask[idx] = 1
return np.array(mask, dtype=np.bool)
def get_splits(y):
idx_train = range(140)
idx_val = range(200, 500)
idx_test = range(500, 1500)
y_train = np.zeros(y.shape, dtype=np.int32)
y_val = np.zeros(y.shape, dtype=np.int32)
y_test = np.zeros(y.shape, dtype=np.int32)
y_train[idx_train] = y[idx_train]
y_val[idx_val] = y[idx_val]
y_test[idx_test] = y[idx_test]
train_mask = sample_mask(idx_train, y.shape[0])
return y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask
def categorical_crossentropy(preds, labels):
return np.mean(-np.log(np.extract(labels, preds)))
def accuracy(preds, labels):
return np.mean(np.equal(np.argmax(labels, 1), np.argmax(preds, 1)))
def evaluate_preds(preds, labels, indices):
split_loss = list()
split_acc = list()
for y_split, idx_split in zip(labels, indices):
split_loss.append(categorical_crossentropy(preds[idx_split], y_split[idx_split]))
split_acc.append(accuracy(preds[idx_split], y_split[idx_split]))
return split_loss, split_acc
def normalized_laplacian(adj, symmetric=True):
adj_normalized = normalize_adj(adj, symmetric)
laplacian = sp.eye(adj.shape[0]) - adj_normalized
return laplacian
def rescale_laplacian(laplacian):
try:
print('Calculating largest eigenvalue of normalized graph Laplacian...')
largest_eigval = eigsh(laplacian, 1, which='LM', return_eigenvectors=False)[0]
except ArpackNoConvergence:
print('Eigenvalue calculation did not converge! Using largest_eigval=2 instead.')
largest_eigval = 2
scaled_laplacian = (2. / largest_eigval) * laplacian - sp.eye(laplacian.shape[0])
return scaled_laplacian
def chebyshev_polynomial(X, k):
"""Calculate Chebyshev polynomials up to order k. Return a list of sparse matrices."""
print("Calculating Chebyshev polynomials up to order {}...".format(k))
T_k = list()
T_k.append(sp.eye(X.shape[0]).tocsr())
T_k.append(X)
def chebyshev_recurrence(T_k_minus_one, T_k_minus_two, X):
X_ = sp.csr_matrix(X, copy=True)
return 2 * X_.dot(T_k_minus_one) - T_k_minus_two
for i in range(2, k+1):
T_k.append(chebyshev_recurrence(T_k[-1], T_k[-2], X))
return T_k
def sparse_to_tuple(sparse_mx):
if not sp.isspmatrix_coo(sparse_mx):
sparse_mx = sparse_mx.tocoo()
coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
values = sparse_mx.data
shape = sparse_mx.shape
return coords, values, shape
train.py
import tensorflow as tf
from kegra.utils import *
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from kegra.layers.graph import GraphConvolution
class Config(object):
dataset = 'cora'
filter = 'localpool' # Local pooling filters (see 'renormalization trick' in Kipf & Welling, arXiv 2016)
# filter = 'chebyshev' # Chebyshev polynomial basis filters (Defferard et al., NIPS 2016)
max_degree = 2 # maximum polynomial degree
sym_norm = True # symmetric (True) vs. left-only (False) normalization
NB_EPOCH = 20
PATIENCE = 10 # early stopping patience
support = 1
epochs = 100
def convert_sparse_matrix_to_sparse_tensor(x):
coo = x.tocoo()
indices = np.mat([coo.row, coo.col]).transpose()
return tf.SparseTensor(indices, coo.data, coo.shape)
def get_inputs(adj, x):
if Config.filter == 'localpool':
print('Using local pooling filters...')
adj_ = preprocess_adj(adj, Config.sym_norm)
adj_ = adj_.todense()
graph = [x, adj_]
adj_input = [Input(batch_shape=(None, None), sparse=False, name='adj_input')]
elif Config.filter == 'chebyshev':
print('Using Chebyshev polynomial basis filters...')
L = normalized_laplacian(adj, Config.sym_norm)
L_scaled = rescale_laplacian(L)
T_k = chebyshev_polynomial(L_scaled, Config.max_degree)
support = Config.max_degree + 1
graph = [x] + T_k
adj_input = [Input(batch_shape=(None, None), sparse=False, name='adj_input') for _ in range(support)]
else:
raise Exception('Invalid filter type.')
return graph, adj_input
def build_model(x, y, adj_input):
fea_input = Input(batch_shape=(None, x.shape[1]), name='fea_input')
net = Dropout(0.2)(fea_input)
net = GraphConvolution(512, Config.support, activation='relu', kernel_regularizer=l2(5e-4))([net] + adj_input)
net = Dropout(0.2)(net)
net = GraphConvolution(256, Config.support, activation='relu', kernel_regularizer=l2(5e-4))([net] + adj_input)
net = Dropout(0.2)(net)
net = GraphConvolution(128, Config.support, activation='relu', kernel_regularizer=l2(5e-4))([net] + adj_input)
net = Dropout(0.2)(net)
net = GraphConvolution(64, Config.support, activation='relu', kernel_regularizer=l2(5e-4))([net] + adj_input)
net = Dropout(0.2)(net)
net = Flatten()(net)
output = Dense(y.shape[1], activation='softmax')(net)
# output = GraphConvolution(y.shape[1], Config.support, activation='softmax')([net] + adj_input)
model = Model(inputs=[fea_input] + adj_input, outputs=output)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))
return model
def train_model(x, y, model, train_mask, y_train, y_val, idx_train, idx_val, batch_size):
for i in range(Config.epochs):
model.fit(x, y, sample_weight=train_mask, batch_size=batch_size, epochs=1, shuffle=False, verbose=1)
y_pred = model.predict(x, batch_size=batch_size)
train_val_loss, train_val_acc = evaluate_preds(y_pred, [y_train, y_val], [idx_train, idx_val])
print("train_loss= {:.2f}".format(train_val_loss[0]), "train_acc= {:.2f}".format(train_val_acc[0]),
"val_loss= {:.2f}".format(train_val_loss[1]), "val_acc= {:.2f}".format(train_val_acc[1]))
return model
def estimate_model(model, x, y_test, idx_test, batch_size):
y_pred = model.predict(x, batch_size=batch_size)
test_loss, test_acc = evaluate_preds(y_pred, [y_test], [idx_test])
print("Test set results:", "loss= {:.2f}".format(test_loss[0]), "accuracy= {:.4f}".format(test_acc[0]))
def main():
x, adj, y = load_data(dataset=Config.dataset)
batch_size = adj.shape[1]
x /= x.sum(1).reshape(-1, 1) # Normalize X
y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits(y)
x_graph, adj_input = get_inputs(adj, x)
model = build_model(x, y, adj_input)
model = train_model(x_graph, y, model, train_mask, y_train, y_val, idx_train, idx_val, batch_size)
estimate_model(model, x_graph, y_test, idx_test, batch_size)
if __name__ == '__main__':
main()