Metapath2vec 的python简单实现

本文在github上:https://github.com/Mrxiahelong/Unsupervised-Author-Disambiguation/实现

这里我们使用三张图结构  分别是paper-coauhor-paper,paper-cotitle-paper,paper-covenue-paper,也就是结点类型一种,边类型三种,我们的metapath类型为coauthor-cotitle-coauthor-cotitle.

Metapath2vec 的python简单实现_第1张图片

def positive_sampler(path):
    '''
        对每一条path建立一个window大小的滑动窗口,
        例如 0 1 2 3 4,window大小为2,则返回 pos_u=[0,0,1,1,1.....],pos_v=[1,2,0,2,3....]
    '''
    pos_u,pos_v=[],[]
    for i in range(len(path)):
        if len(path)==1:
            continue
        u=path[i]
        v=np.concatenate([path[max(i-window,0):i],path[i+1:i+window+1]],axis=0)
        pos_u.extend([u]*len(v))
        pos_v.extend(v)
    return pos_u,pos_v    
def get_negative_ratio(metapath):
    '''
    对所有结点根据出现频率建立negative_ratio,出现频率越大的越有可能出现在负采样中
    返回的ratio 是对每一个结点被负采样的概率
    '''
    node_frequency=dict()
    sentence_count,node_count=0,0
    for path in metapath:
        for node in path:
            node_frequency[node]=node_frequency.get(node,0)+1
            node_count+=1
    pow_frequency=np.array(list(map(lambda x:x[-1],sorted(node_frequency.items(),key=lambda asd:asd[0]))))**0.75
    node_pow=np.sum(pow_frequency)
    ratio=pow_frequency/node_pow
    return ratio
def negative_sampler(path,ratio,nodes):
    '''
    根据上一个函数的到负采样的概率表ratio,进行负采样
    '''
    negtives_size=5
    negatives=[]
    while len(negatives)<5:
        temp=np.random.choice(nodes, size=negtives_size-len(negatives), replace=False, p=ratio)
        negatives.extend([node for node in temp if node not in path])
    return negatives
def create_node2node_dict(graph):
    '''
    输入的是dgl建立的图
    返回的是个字典类型,保存的是在该图中,每个结点可以到达的结点
    '''
    src_dst={}
    for src,dst in zip(graph.edges()[0],graph.edges()[1]):
        src,dst=src.item(),dst.item()
        if src not in src_dst.keys():
            src_dst[src]=[]
        src_dst[src].append(dst)
    return src_dst
window=2# 这里是取metapath时的窗口大小
metapaths=[]#所有的metapath
num_walks=10#每个结点run 多少遍
walk_len=100#每个path的长度
metapath_type=['coauthor','covenue','coauthor','cotitle']#根据论文,作者使用的是AVAT

edge_per_graph={}#对应每个图,建立个字典,每个字典的key为结点编号,value为key在该图中可以到达的结点编号
edge_per_graph['coauthor']=create_node2node_dict(coauthor_graph)
edge_per_graph['cotitle']=create_node2node_dict(cotitle_graph)
edge_per_graph['covenue']=create_node2node_dict(covenue_graph)
weights_all_graph={'coauthor':weights_coauthor,'cotitle':weights_cotitle,'covenue':weights_covenue}

def Is_isolate(node):
    for rel in metapath_type:
        if node in edge_per_graph[rel].keys():
            return 0
    return 1
for walk in tqdm(range(num_walks)):
    for cur_node in list(range(len(labels))):#对图里的每个结点循环一次
        stop=0
        path=[]
        path.append(cur_node)
        while len(path)

这儿得到的metapaths就是总的metapath,pos_us和pos_vs的元素一 一对应作为正对,neg_vs作为负对,对应着skip_gram模型的ui,uc,uj,如下

 

#单纯的metapath2vec
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init

"""
    u_embedding: Embedding for center word.
    v_embedding: Embedding for neighbor words.
"""


class SkipGramModel(nn.Module):

    def __init__(self, emb_size, emb_dimension):
        super(SkipGramModel, self).__init__()
        self.emb_size = emb_size
        self.emb_dimension = emb_dimension
        self.u_embeddings = nn.Embedding(emb_size, emb_dimension)
        self.v_embeddings = nn.Embedding(emb_size, emb_dimension)

        initrange = 1.0 / self.emb_dimension
        init.uniform_(self.u_embeddings.weight.data, -initrange, initrange)
        init.constant_(self.v_embeddings.weight.data, 0)

    def forward(self, pos_u, pos_v, neg_v):
        emb_u = self.u_embeddings(pos_u)
        emb_v = self.v_embeddings(pos_v)
        emb_neg_v = self.v_embeddings(neg_v)

        score = torch.sum(torch.mul(emb_u, emb_v), dim=1)
        score = torch.clamp(score, max=10, min=-10)
        score = -F.logsigmoid(score)

        neg_score = torch.bmm(emb_neg_v, emb_u.unsqueeze(2)).squeeze()
        neg_score = torch.clamp(neg_score, max=10, min=-10)
        neg_score = -torch.sum(F.logsigmoid(-neg_score), dim=1)

        return torch.mean(score + neg_score)
skip_model=SkipGramModel(sum_papers,64)
optimizer=torch.optim.Adam(skip_model.parameters(),lr=0.001)
for epoch in range(500):
    optimizer.zero_grad()
    loss=skip_model(torch.tensor(pos_us),torch.tensor(pos_vs),torch.tensor(neg_vs))
    loss.backward()
    optimizer.step()
    losses.append(loss.item())
    if epoch %100==0:
        print('epoch {0}  loss {1}'.format(epoch,loss))
embedding=skip_model.u_embeddings.weight.cpu().data.numpy()

这儿embedding就是得到的每个结点的embdding,可以用来做下游任务

你可能感兴趣的:(图神经网络,数据挖掘,pytorch,深度学习,机器学习)