- 对称邻接矩阵的正则化
# [A * D^(-1/2)]^T * D^(-1/2) = D^(-1/2) * A * D^(-1/2)
def sym_adj(adj):
"""Symmetrically normalize adjacency matrix."""
# 压缩的邻接矩阵
adj = sp.coo_matrix(adj)
rowsum = np.array(adj.sum(1))
# 将n*1的矩阵转换为1个向量
d_inv_sqrt = np.power(rowsum, - 0.5).flatten()
d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
d_mat_inv_sqrt = sp.diags(d_inv_sqrt) # ndarray类型
# toarray returns an ndarray; todense returns a matrix. If you want a matrix, use todense otherwise, use toarray
return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).astype(np.float32).todense()
- 非对称邻接矩阵的正则化
# D^(-1/2) * A
def asym_adj(adj):
adj = sp.coo_matrix(adj)
rowsum = np.array(adj.sum(1))
d_inv = np.power(rowsum, -1).flatten()
d_inv[np.isinf(d_inv)] = 0.
d_mat = sp.diags(d_inv)
return d_mat.dot(adj).astype(np.float32).todense()
def calculate_normalized_laplacian(adj):
# L = D^-1/2 (D-A) D^-1/2 = I - D^-1/2 A D^-1/2
# D = diag(A 1)
# 由于下面用到的是与稀疏的单位阵相减,故需要tocoo()
D_tilde = sym_adj(adj).tocoo()
normarlized_laplacian = sp.identity(np.size(adj)[0]) - D_tilde
return normarlized_laplacian
- 补充:csr_matrix的实现形式
import numpy as np
data = np.array([1, 2, 3, 4, 5, 6]) #所有非零数值
indices = np.array([0, 2, 2, 0, 1, 2]) #所有值的列索引
indptr = np.array([0, 2, 3, 6])
size_n = len(indptr) - 1
matrix = np.zeros((size_n, size_n))
leng = size_n - 1
for i in range(size_n):
lft = indptr[i]
rgt = indptr[i+1]
for idx in range(lft,rgt):
j = indices[idx]
matrix[i][j] = data[idx]
print(matrix)
[[1. 0. 2.]
[0. 0. 3.]
[4. 5. 6.]]
matrix = np.array([[1., 0., 2.], [0., 0., 3.], [4., 5., 6.]])
csr_matrix = sp.csc_matrix(matrix)
print(csr_matrix)
(0, 0) 1.0
(2, 0) 4.0
(2, 1) 5.0
(0, 2) 2.0
(1, 2) 3.0
(2, 2) 6.0
3.计算 calculate_scaled_laplacian
def calculate_scaled_laplacian(adj_mx, lambda_max=2, undirected=True):
if undirected:
adj_mx = np.maximum.reduce([adj_mx, adj_mx.T])
L = calculate_normalized_laplacian(adj_mx)
if lambda_max is None:
# ‘LM’ : Largest (in magnitude) eigenvalues.
# 返回1个绝对值最大的特征值与特征向量
lambda_max, _ = linalg.eigsh(L, 1, which='LM')
lambda_max = lambda_max[0]
# 转换为稀疏矩阵
L = sp.csc_matrix(L)
M, _ = L.shape # 原始矩阵的行数
I = sp.identity(M, format='csr', dtype=L.dtype)
L = (2 / lambda_max * L) - I
return L.astype(np.float32).todense()
4. 读取pickle的压缩数据
def load_pickle(pickle_file):
try:
with open(pickle_file, 'rb') as f:
pickle_data = pickle.load(f)
except UnicodeDecodeError as e:
with open(pickle_file, 'rb') as f:
pickle_data = pickle.load(f, encoding='latin1')
except Exception as e:
print('Unable to load data', pickle_file, ':', e)
raise
return pickle_data
5. 加载Dataset
# 返回train_loader、val_loader、test_loader及归一化所需的均值与方差参数
def load_dataset(dataset_dir, batch_size, valid_batch_size=None, test_batch_size=None):
data = {}
for category in ['train', 'val', 'test']:
cat_data = np.load(os.path.join(dataset_dir, category+'.npz'))
data['x_' + category] = cat_data['x']
data['y_' + category] = cat_data['y']
scaler = StandardScaler(mean=data['x_train'][..., 0].mean(), std=data['x_train'][..., 0].std())
# Data format
for category in ['train', 'val', 'test']:
data['x_' + category][..., 0] = scaler.transform(data['x_' + category][..., 0])
data['train_loader'] = DataLoader(data['x_train'], data['y_train'], batch_size)
data['val_loader'] = DataLoader(data['x_val'], data['y_val'], valid_batch_size)
data['test_loader'] = DataLoader(data['x_test'], data['y_test'], test_batch_size)
data['scaler'] = scaler
return data
6. 使用masked loss
def masked_mse(preds, labels, null_val=np.nan):
if np.isnan(null_val):
mask = ~torch.isnan(labels)
else:
mask = (labels != null_val)
mask = mask.float()
mask /= torch.mean((mask))
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
loss = (preds - labels) ** 2
loss = loss * mask
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
return torch.mean(loss)
def masked_rmse(preds, labels, null_val=np.nan):
return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))
def masked_mae(preds, labels, null_val=np.nan):
if np.isnan(null_val):
mask = ~torch.isnan(labels)
else:
mask = (labels != null_val)
mask = mask.float()
mask /= torch.mean((mask))
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
loss = torch.abs(preds-labels)
loss *= mask
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
return torch.mean(loss)
def masked_mape(preds, labels, null_val=np.nan):
if np.isnan(null_val):
mask = ~torch.isnan(labels)
else:
mask = (labels != null_val)
mask = mask.float()
mask /= torch.mean((mask))
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
loss = torch.abs(preds-labels)/labels
loss *= mask
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
return torch.mean(loss)
7. 获取衡量的参数
def metric(pred, real):
# torch.tensor.item(): Returns the value of this tensor as a standard Python number.
mae = masked_mse(pred, real, 0.0).item()
mape = masked_mape(pred, real, 0.0).item()
rmse = masked_rmse(pred, real, 0.0).item()
return mae, mape, rmse