GraphAttention
from __future__ import absolute_import
from keras import activations, constraints, initializers, regularizers
from keras import backend as K
from keras.layers import Layer, Dropout, LeakyReLU
class GraphAttention(Layer):
def __init__(self,
F_,
attn_heads=1,
attn_heads_reduction='concat',
dropout_rate=0.5,
activation='relu',
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
attn_kernel_initializer='glorot_uniform',
kernel_regularizer=None,
bias_regularizer=None,
attn_kernel_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
attn_kernel_constraint=None,
**kwargs):
if attn_heads_reduction not in {'concat', 'average'}:
raise ValueError('Possbile reduction methods: concat, average')
self.F_ = F_
self.attn_heads = attn_heads
self.attn_heads_reduction = attn_heads_reduction
self.dropout_rate = dropout_rate
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.attn_kernel_initializer = initializers.get(attn_kernel_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.attn_kernel_regularizer = regularizers.get(attn_kernel_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.attn_kernel_constraint = constraints.get(attn_kernel_constraint)
self.supports_masking = False
self.kernels = []
self.biases = []
self.attn_kernels = []
if attn_heads_reduction == 'concat':
self.output_dim = self.F_ * self.attn_heads
else:
self.output_dim = self.F_
super(GraphAttention, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) >= 2
F = input_shape[0][-1]
for head in range(self.attn_heads):
kernel = self.add_weight(shape=(F, self.F_),
initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint,
name='kernel_{}'.format(head))
self.kernels.append(kernel)
if self.use_bias:
bias = self.add_weight(shape=(self.F_, ),
initializer=self.bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint,
name='bias_{}'.format(head))
self.biases.append(bias)
attn_kernel_self = self.add_weight(shape=(self.F_, 1),
initializer=self.attn_kernel_initializer,
regularizer=self.attn_kernel_regularizer,
constraint=self.attn_kernel_constraint,
name='attn_kernel_self_{}'.format(head),)
attn_kernel_neighs = self.add_weight(shape=(self.F_, 1),
initializer=self.attn_kernel_initializer,
regularizer=self.attn_kernel_regularizer,
constraint=self.attn_kernel_constraint,
name='attn_kernel_neigh_{}'.format(head))
self.attn_kernels.append([attn_kernel_self, attn_kernel_neighs])
self.built = True
def call(self, inputs):
X = inputs[0]
A = inputs[1]
outputs = []
for head in range(self.attn_heads):
kernel = self.kernels[head]
attention_kernel = self.attn_kernels[head]
features = K.dot(X, kernel)
attn_for_self = K.dot(features, attention_kernel[0])
attn_for_neighs = K.dot(features, attention_kernel[1])
dense = attn_for_self + K.transpose(attn_for_neighs)
dense = LeakyReLU(alpha=0.2)(dense)
mask = -10e9 * (1.0 - A)
dense += mask
dense = K.softmax(dense)
dropout_attn = Dropout(self.dropout_rate)(dense)
dropout_feat = Dropout(self.dropout_rate)(features)
node_features = K.dot(dropout_attn, dropout_feat)
if self.use_bias:
node_features = K.bias_add(node_features, self.biases[head])
outputs.append(node_features)
if self.attn_heads_reduction == 'concat':
output = K.concatenate(outputs)
else:
output = K.mean(K.stack(outputs), axis=0)
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
output_shape = input_shape[0][0], self.output_dim
return output_shape
utils
from __future__ import print_function
import os
import pickle as pkl
import sys
import networkx as nx
import numpy as np
import scipy.sparse as sp
def parse_index_file(filename):
"""Parse index file."""
index = []
for line in open(filename):
index.append(int(line.strip()))
return index
def sample_mask(idx, l):
"""Create mask."""
mask = np.zeros(l)
mask[idx] = 1
return np.array(mask, dtype=np.bool)
def load_data(dataset_str):
"""Load data."""
FILE_PATH = os.path.abspath(__file__)
DIR_PATH = os.path.dirname(FILE_PATH)
DATA_PATH = os.path.join(DIR_PATH, 'data/')
"""
加载数据,在data文件夹中可以看见共有Cora,citeseer,pubmed三种数据集,每个分为8个文件(x,y,tx,ty,allx,ally,graph,index)
.x :训练数据的特征向量。
.y :训练集的标签。
.tx :测试集的特征向量。
.ty :训练集的标签。
.allx :训练与测试集的特征向量。
.ally :训练与测试集的标签
.graph :图
.index :测试数据的ID索引
"""
names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []
for i in range(len(names)):
with open("{}ind.{}.{}".format(DATA_PATH, dataset_str, names[i]), 'rb') as f:
if sys.version_info > (3, 0):
objects.append(pkl.load(f, encoding='latin1'))
else:
objects.append(pkl.load(f))
x, y, tx, ty, allx, ally, graph = tuple(objects)
test_idx_reorder = parse_index_file("{}ind.{}.test.index".format(DATA_PATH, dataset_str))
test_idx_range = np.sort(test_idx_reorder)
if dataset_str == 'citeseer':
test_idx_range_full = range(min(test_idx_reorder),
max(test_idx_reorder) + 1)
tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
tx_extended[test_idx_range - min(test_idx_range), :] = tx
tx = tx_extended
ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
ty_extended[test_idx_range - min(test_idx_range), :] = ty
ty = ty_extended
features = sp.vstack((allx, tx)).tolil()
features[test_idx_reorder, :] = features[test_idx_range, :]
adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
labels = np.vstack((ally, ty))
labels[test_idx_reorder, :] = labels[test_idx_range, :]
idx_test = test_idx_range.tolist()
idx_train = range(len(y))
idx_val = range(len(y), len(y) + 500)
train_mask = sample_mask(idx_train, labels.shape[0])
val_mask = sample_mask(idx_val, labels.shape[0])
test_mask = sample_mask(idx_test, labels.shape[0])
y_train = np.zeros(labels.shape)
y_val = np.zeros(labels.shape)
y_test = np.zeros(labels.shape)
y_train[train_mask, :] = labels[train_mask, :]
y_val[val_mask, :] = labels[val_mask, :]
y_test[test_mask, :] = labels[test_mask, :]
return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
def preprocess_features(features):
"""Row-normalize feature matrix and convert to tuple representation"""
rowsum = np.array(features.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
features = r_mat_inv.dot(features)
return features.todense()
if __name__ == "__main__":
A, X, Y_train, Y_val, Y_test, idx_train, idx_val, idx_test = load_data('cora')
最后是GAT的使用实例gat.py
from __future__ import division
import numpy as np
from keras.callbacks import EarlyStopping, TensorBoard, ModelCheckpoint
from keras.layers import Input, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.regularizers import l2
from keras_gat import GraphAttention
from keras_gat.utils import load_data, preprocess_features
A, X, Y_train, Y_val, Y_test, idx_train, idx_val, idx_test = load_data('cora')
N = X.shape[0]
F = X.shape[1]
n_classes = Y_train.shape[1]
F_ = 8
n_attn_heads = 8
dropout_rate = 0.6
l2_reg = 5e-4/2
learning_rate = 5e-3
epochs = 10000
es_patience = 100
X = preprocess_features(X)
A = A + np.eye(A.shape[0])
X_in = Input(shape=(F,))
A_in = Input(shape=(N,))
dropout1 = Dropout(dropout_rate)(X_in)
graph_attention_1 = GraphAttention(F_,
attn_heads=n_attn_heads,
attn_heads_reduction='concat',
dropout_rate=dropout_rate,
activation='elu',
kernel_regularizer=l2(l2_reg),
attn_kernel_regularizer=l2(l2_reg))([dropout1, A_in])
dropout2 = Dropout(dropout_rate)(graph_attention_1)
graph_attention_2 = GraphAttention(n_classes,
attn_heads=1,
attn_heads_reduction='average',
dropout_rate=dropout_rate,
activation='softmax',
kernel_regularizer=l2(l2_reg),
attn_kernel_regularizer=l2(l2_reg))([dropout2, A_in])
model = Model(inputs=[X_in, A_in], outputs=graph_attention_2)
optimizer = Adam(lr=learning_rate)
model.compile(optimizer=optimizer,
loss='categorical_crossentropy',
weighted_metrics=['acc'])
model.summary()
es_callback = EarlyStopping(monitor='val_weighted_acc', patience=es_patience)
tb_callback = TensorBoard(batch_size=N)
mc_callback = ModelCheckpoint('.\\logs\\best_model.h5',
monitor='val_weighted_acc',
save_best_only=True,
save_weights_only=True)
validation_data = ([X, A], Y_val, idx_val)
model.fit([X, A],
Y_train,
sample_weight=idx_train,
epochs=epochs,
batch_size=N,
validation_data=validation_data,
shuffle=False,
callbacks=[es_callback, tb_callback, mc_callback])
model.load_weights('.\\logs\\best_model.h5')
eval_results = model.evaluate([X, A],
Y_test,
sample_weight=idx_test,
batch_size=N,
verbose=0)
print('Done.\n'
'Test loss: {}\n'
'Test accuracy: {}'.format(*eval_results))