SchNet+attention代码阅读记录

阅读SchNet的过程中,关于一些变量的定义和含义总是忘记,下面做一下记录和整理。

class SchNet(torch.nn.Module):
	def __init__(self, hidden_channels, num_filters, num_interactions, num_gaussians, cutoff, 
				readout='add', dipole, mean, std, atomref):
		super(SchNet, self).__init__()
		...
	
	def forward(self, z, pos, batch):
		# z为一维张量(torch.dim==1维度为1,shape为N),内容为原子序数[atomic_numbers]。pos为natoms*3的向量。batch为一个batch,包含其他所有的信息。
		assert z.dim()==1 and z.dtype==torch.long
		batch = torch.zeros_like(z) if batch is None else batch
		
		# h是embedding层对z作用后的向量。词嵌入(100, **hidden_channels**)。把一个维数为所有词的数量的高维空间嵌入到一个维数低得多的连续向量空间(通常128或者256维)中,每个词或者词组被映射为实数域上的向量。输入N*M,输出为N*M*embedding_dimension. 输入为词的编号,
		h = self.embedding(z)
		
		edge_index = radius_graph(pos, r=self.cutoff, batch=batch)
		row, col = edge_index
		edge_weight = (pos[row] - pos[col]).norm(dim=-1)
		edge_attr = self.distance_expansion(edge_weight)
	
		# interactions 为ModuleList,长度为num_interactions,列表中为InteractionBlock组合而成的模块.
		# ModuleList().append(InteractionBlock(hidden_channels, num_gaussians, num_filters, cutoff))
		for interaction in self.interactions:
			h = h+ interaction(h, edge_index, edge_weight, edge_attr)
		...
		out = scatter(h, batch, dim=0, reduce=self.readout)
		# 根据index,将index相同值对应的src元素进行对应定义的计算,dim为在第几维进行相应的运算。e.g.scatter_sum即进行sum运算,scatter_mean即进行mean运算。
		# def scatter(src: torch.Tensor, index: torch.Tensor, dim: int = -1,
        #    out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None,
        #    reduce: str = "sum") -> torch.Tensor:
        ...
        return out
		

对于interactions模块的定义是本文的创新之处。CFCONV模块,继承了MessagePassing类,并使用其中的propagate方法。

class InteractionBlock(torch.nn.Module):
	def __init__(self, hidden_channels, num_gaussians, num_filters, cutoff):
		self.mlp = Sequential(Linear(num_gaussians, num_filters), shiftedSoftplus(), Linear(num_filters, num_filters),)
		self.conv = CFConv(hidden_channels, hidden_channels, num_filters, self.mlp, cutoff)
		...
	def forward(self, x, edge_index, edge_weight, edge_attr):
		x= self.conv(x, edge_index, edge_weight, edge_attr)
		x = self.act(x)
		x =self.lin(x)
		return x

此外,还阅读了perceiver的代码,对一些遇到的问题, 做一下整理和记录。使用了einops库,专门用于张量运算。


class Attention(nn.Module):
    def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64):
        super().__init__()
        inner_dim = dim_head * heads
        context_dim = default(context_dim, query_dim)
        self.scale = dim_head ** -0.5
        self.heads = heads

        self.to_q = nn.Linear(query_dim, inner_dim, bias = False)
        self.to_kv = nn.Linear(context_dim, inner_dim * 2, bias = False)
        self.to_out = nn.Linear(inner_dim, query_dim)



	def forward(self, x, context = None, mask = None):
        h = self.heads

        q = self.to_q(x)
        context = default(context, x)
        # 将张量分块,分别为attention重的k, v向量。
        k, v = self.to_kv(context).chunk(2, dim = -1)
		
		# 用'h w c -> c h w'就完成了维度调换,把张量的维度操作具象化,让开发者“想出即写出”,这是einops的优势。
		# 张量变换。把每一份从[b, n, h*d]变成[b*h, n,d]的形式,原本(q, k, v)是(dim_head*heads, inner_dim, inner_dim)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
			
		# 爱因斯坦求和,对q,k进行矩阵运算。
        sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
		if exists(mask):
            mask = rearrange(mask, 'b ... -> b (...)')
            max_neg_value = -torch.finfo(sim.dtype).max
            mask = repeat(mask, 'b j -> (b h) () j', h = h)
            sim.masked_fill_(~mask, max_neg_value)

        # attention, what we cannot get enough of
        attn = sim.softmax(dim = -1)

        out = einsum('b i j, b j d -> b i d', attn, v)
        out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
        return self.to_out(out)

图的注意力网络阅读,https://github.com/Diego999/pyGAT/blob/master/layers.py

class GraphAttentionLayer(nn.Module):
    """
    Simple GAT layer, similar to https://arxiv.org/abs/1710.10903
    """
    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GraphAttentionLayer, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat
		
		# 这个函数可以理解为类型转换函数,将一个不可训练的类型Tensor转换成可以训练的类型parameter并将这个parameter绑定到这个module里面,所以经过类型转换这个self.W变成了模型的一部分,成为了模型中根据训练可以改动的参数了。使用这个函数的目的也是想让某些变量在学习的过程中不断的修改其值以达到最优化。

        self.W = nn.Parameter(torch.empty(size=(in_features, out_features)))
        # 初始化方式:均匀分布 ~ U (-a , a ) 。xavier的方式有助于避免梯度消失,和非线形丢失的问题。注意,Xavier在tanh中表现的很好,但在Relu激活函数中表现的很差,所何凯明提出了针对于Relu的初始化方法。

        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        self.a = nn.Parameter(torch.empty(size=(2*out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, h, adj):
        Wh = torch.mm(h, self.W) # h.shape: (N, in_features), Wh.shape: (N, out_features)
        e = self._prepare_attentional_mechanism_input(Wh)

        zero_vec = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.matmul(attention, Wh)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime

    def _prepare_attentional_mechanism_input(self, Wh):
        # Wh.shape (N, out_feature)
        # self.a.shape (2 * out_feature, 1)
        # Wh1&2.shape (N, 1)
        # e.shape (N, N)
        Wh1 = torch.matmul(Wh, self.a[:self.out_features, :])
        Wh2 = torch.matmul(Wh, self.a[self.out_features:, :])
        # broadcast add
        e = Wh1 + Wh2.T
        return self.leakyrelu(e)
# 如果要把一个类的实例变成 str,就需要实现特殊方法__str__()
# __repr__和__str__这两个方法都是用于显示的,__str__是面向用户的,而__repr__面向开发者

    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'

你可能感兴趣的:(deep,learning,pytorch,pytorch,深度学习,机器学习)