数据_常用命令_Python

打印计数器

def flushPrint(variable):
    sys.stdout.write('\r')
    sys.stdout.write('%s' % variable)
    sys.stdout.flush()

单变量最小二乘(OLS)回归拟合

def OLSRegressFit(x,y):
   xx = sm.add_constant(x, prepend=True)
   res = sm.OLS(y,xx).fit()
   constant, beta = res.params
   r2 = res.rsquared
   return [constant, beta, r2]

双对数坐标系下线性回归拟合画图(幂律函数)

def alloPlot(x,y,col,lab):
    lx = np.log(x+1)
    ly = np.log(y+1)
    xx = sm.add_constant(lx, prepend=True)
    res = sm.OLS(ly,xx).fit()
    constant, beta = res.params
    plt.plot(x,y, "o",color=col)
    plt.plot(x,np.exp(constant)*x**beta,color=col,label=lab+' '+str(np.round(beta,2)))

纵轴对数坐标下线性回归拟合(指数函数)

def semilogPlot(x,y,col,lab):
    ly = np.log(y)
    xx = sm.add_constant(x, prepend=True)
    res = sm.OLS(ly,xx).fit()
    constant, beta = res.params
    plt.plot(x,y, "o",color=col)
    plt.plot(x,np.exp(constant+x*beta),color=col,label=lab+' '+str(np.round(beta,2)))

计算一个平衡过的流网络从源到各节点的流距离

def flowDistanceFromSource(G): #input a balanced nx graph
    R = G.reverse()
    mapping = {'source':'sink','sink':'source'}
    H = nx.relabel_nodes(R,mapping)
    #---------initialize flow distance dict------
    L = dict((i,1) for i in G.nodes())
    #---------prepare weighted out-degree dict------
    T = G.out_degree(weight='weight')
    #---------iterate until converge------------
    ls = np.array(L.values())
    delta = len(L)*0.001 + 1
    k=0
    while delta > len(L)*0.001:
        k+=1
        if k>20:
            break
        for i in L:
            l=1
            for m,n in H.edges(i):
                l+=L[n]*H[m][n].values()[0]/float(T[m])
            L[i]=l
        delta = sum(np.abs(np.array(L.values()) - ls))
        ls = np.array(L.values())
    #---------clean the result-------
    del L['sink']
    for i in L:
        L[i]-=1
    L['sink'] = L.pop('source')
    L['source'] = 0
    return L

计算一个平衡过的流网络从各节点到汇的流距离

def flowDistanceToSink(G): #input a balanced nx graph
    #---------initialize flow distance dict------
    L = dict((i,1) for i in G.nodes())
    #---------prepare weighted out-degree dict------
    T = G.out_degree(weight='weight')
    #---------iterate until converge------------
    ls = np.array(L.values())
    delta = len(L)*0.001 + 1
    k=0
    while delta > len(L)*0.001:
        k+=1
        if k>20:
            break
        for i in L:
            l=1
            for m,n in G.edges(i):
                l+=L[n]*G[m][n].values()[0]/float(T[m])
            L[i]=l
        delta = sum(np.abs(np.array(L.values()) - ls))
        ls = np.array(L.values())
    for i in L:
        L[i]-=1
    return L

你可能感兴趣的:(数据_常用命令_Python)