import time
from itertools import product
import numpy as np
from scipy.optimize import linear_sum_assignment
将带有时间戳的消息打印到控制台。
def print_log(message):
"""
:param message: str,
:return:
"""
print("[{}] {}".format(time.strftime("%Y-%m-%d %X", time.localtime()), message))
#用于初始化标签矩阵
def init_label_matrix(y):
"""
y 是输入的标签数据,Shape=(n_nodes,) 的 NumPy 数组。其中,-1 表示未标记的数据,而其他数字表示已标记的数据。
:return:
"""
y = y.reshape(-1)
labels = list(np.unique(y))
if -1 in labels:
labels.remove(-1)
n_nodes = y.shape[0]
Y = np.ones((n_nodes, len(labels))) * (1/len(labels))
for idx, label in enumerate(labels):
Y[np.where(y == label), :] = 0
Y[np.where(y == label), idx] = 1
return Y
#计算准确率的函数
def calculate_accuracy(F, y_test):
predict_y = np.argmax(F, axis=1).reshape(-1)
return sum(predict_y == y_test) / len(predict_y)
#计算类别间交并比(Intersection over Union, IoU)的函数
def iou_socre(pred, target):
"""
:param pred:
:param target:
:return:
"""
ious = []
n_class = target.max() + 1
# IOU for background class ("0")
for c in range(1, n_class):
pred_idx = pred == c
target_idx = target == c
intersection = (pred_idx & target_idx).sum()
union = (pred_idx | target_idx).sum()
ious.append((intersection + 1e-6)/(union + 1e-6))
return ious
交并比(Intersection over Union, IoU)是用于评估两个集合重叠程度的度量,通常用于计算对象检测或图像分割任务的性能。对于两个集合 A 和 B,IoU 的计算公式如下:
#最大最小归一化
def minmax_scale(array, ranges=(0., 1.)):
"""
normalize to [min, max], default is [0., 1.]
:param array: ndarray
:param ranges: tuple, (min, max)
:return:
"""
_min = ranges[0]
_max = ranges[1]
return (_max - _min) * (array - array.min()) / (array.max() - array.min()) + _min
#用于从输入的三维数组 x 中按照指定的 patch_size 提取图像块的特征
def gather_patch_ft(x, patch_size):
"""
:param x: M x N x C
:param patch_size: row x column
:return:
"""
#检查输入张量 x 是否为三维,patch的大小必须为2
assert len(x.shape) == 3
assert len(patch_size) == 2
#将输入张量 x 重塑为形状为 (M*N) x C 的二维数组,在这个重塑后的张量的开头连接一行零。
x_row_num, x_col_num = x.shape[:2]
x = x.reshape(-1, x.shape[2])
x = np.concatenate([np.zeros(x.shape[1])[np.newaxis, :], x])
# 生成输出索引
out_idx = []
center_row, center_col = (patch_size[0] + 1) // 2 - 1, (patch_size[1] + 1) // 2 - 1
x_idx = np.arange(x_row_num * x_col_num).reshape(x_row_num, x_col_num)
x_idx_pad = np.zeros((x_row_num + patch_size[0] - 1, x_col_num + patch_size[1] - 1))
x_idx_pad[center_row:center_row + x_row_num, center_col:center_col + x_col_num] = x_idx + 1
for _row, _col in product(range(patch_size[0]), range(patch_size[1])):
out_idx.append(x_idx_pad[_row:_row + x_row_num, _col:_col + x_col_num].reshape(-1, 1))
out_idx = np.concatenate(out_idx, axis=1).astype(np.long) # MN x kk
# 最终返回的张量 out 的形状是 M x N x (kk * C)。
out = x[out_idx.reshape(-1)] # MNkk x C
out = out.reshape(x_row_num, x_col_num, -1) # M x N x kkC
return out
#用于计算聚类的准确率。
def calculate_clustering_accuracy(y_gnd, y_pred):
"""
参数y_gnd,y_pred表示真实的类别标签和预测的类别标签。函数的目标是通过匈牙利算法(Hungarian algorithm)计算出最佳匹配,然后计算聚类的准确率。
"""
y_pred = y_pred.reshape(-1)
y_gnd = y_gnd.reshape(-1)
n_samples = y_gnd.shape[0]
n_class = np.unique(y_gnd).shape[0]
M = np.zeros((n_class, n_class))
for i in range(n_samples):
r = y_gnd[i]
c = y_pred[i]
M[r, c] += 1
row_idx, col_idx = linear_sum_assignment(-M)
map = np.zeros((n_class, n_class))
map[row_idx, col_idx] = 1.
acc = np.sum(M * map) / n_samples
return acc
from . import generation
from . import learning
from . import hyperg
from . import utils
from .version import __version__
__all__ = [
'generation',
'learning',
'hyperg',
'utils'
]
不懂的可以看一下这个,解释得挺好的:
【python】__init__.py文件到底是什么? - 知乎 (zhihu.com)
__version__ = '0.0.3'
import time
import scipy.sparse as sparse
import numpy as np
class HyperG:
def __init__(self, H, X=None, w=None):
"""
初始化超图的关联矩阵、节点特征矩阵和超边权重向量
:param H: scipy coo_matrix of shape (n_nodes, n_edges)
:param X: numpy array of shape (n_nodes, n_features)
:param w: numpy array of shape (n_edges,)
"""
#检查H是否是稀疏矩阵,而且是二维的
assert sparse.issparse(H)
assert H.ndim == 2
self._H = H
self._n_nodes = self._H.shape[0]
self._n_edges = self._H.shape[1]
#如果X不为空,就检查X是不是np.ndarray(numpy的多维数组类型)
if X is not None:
assert isinstance(X, np.ndarray) and X.ndim == 2
self._X = X
else:
self._X = None
#初始化超边权重,w不为空就拉长成向量形式,为空就全置为1。
if w is not None:
self.w = w.reshape(-1)
assert self.w.shape[0] == self._n_edges
else:
self.w = np.ones(self._n_edges)
self._DE = None #用于存储边的阶(Degree )的矩阵
self._DV = None #用于存储点的阶(Degree )的矩阵
self._INVDE = None #用于存储边阶矩阵的逆矩阵
self._DV2 = None #用于存储节点阶矩阵的逆平方根矩阵
self._THETA = None #用于存储超图的θ矩阵
self._L = None #用于存储超图的拉普拉斯矩阵
def num_edges(self):
return self._n_edges
def num_nodes(self):
return self._n_nodes
def incident_matrix(self):
return self._H
def hyperedge_weights(self):
return self.w
def node_features(self):
return self._X
#计算节点的阶矩阵
def node_degrees(self):
if self._DV is None:
H = self._H.tocsr()
dv = H.dot(self.w.reshape(-1, 1)).reshape(-1)
self._DV = sparse.diags(dv, shape=(self._n_nodes, self._n_nodes))
return self._DV
#计算超边的阶矩阵
def edge_degrees(self):
if self._DE is None:
H = self._H.tocsr()
de = H.sum(axis=0).A.reshape(-1)
self._DE = sparse.diags(de, shape=(self._n_edges, self._n_edges))
return self._DE
#用于计算超边的阶矩阵的逆矩阵
def inv_edge_degrees(self):
if self._INVDE is None:
self.edge_degrees()
inv_de = np.power(self._DE.data.reshape(-1), -1.)
self._INVDE = sparse.diags(inv_de, shape=(self._n_edges, self._n_edges))
return self._INVDE
#用于计算节点的阶矩阵的逆平方根矩阵。
def inv_square_node_degrees(self):
if self._DV2 is None:
self.node_degrees()
dv2 = np.power(self._DV.data.reshape(-1), -0.5)
self._DV2 = sparse.diags(dv2, shape=(self._n_nodes, self._n_nodes))
return self._DV2
#计算θ的值
def theta_matrix(self):
if self._THETA is None:
self.inv_square_node_degrees()
self.inv_edge_degrees()
W = sparse.diags(self.w)
self._THETA = self._DV2.dot(self._H).dot(W).dot(self._INVDE).dot(self._H.T).dot(self._DV2)
return self._THETA
#计算拉普拉斯算子
def laplacian(self):
if self._L is None:
self.theta_matrix()
self._L = sparse.eye(self._n_nodes) - self._THETA
return self._L
#更新超边权重,清除与权重相关的矩阵
def update_hyedge_weights(self, w):
assert isinstance(w, (np.ndarray, list)), #超图应该是多维矩阵或者一个列表
self.w = np.array(w).reshape(-1)
assert w.shape[0] == self._n_edges
self._DV = None
self._DV2 = None
self._THETA = None
self._L = None
#更新关联矩阵H
def update_incident_matrix(self, H):
assert sparse.issparse(H)
assert H.ndim == 2
assert H.shape[0] == self._n_nodes
assert H.shape[1] == self._n_edges
#清除权重相关联的缓存
self._H = H
self._DE = None
self._DV = None
self._INVDE = None
self._DV2 = None
self._THETA = None
self._L = None
这里为什么要清除权重相关的矩阵不太清楚。
class IMHL:
def __init__(self, M, omega):
assert isinstance(M, list)
assert isinstance(omega, np.ndarray)
assert len(M) == omega.shape[0]
self.M = M
self.omega = omega