Python相关性分析

# -*- coding: utf-8 -*-
"""
Created on Thu Aug  2 11:17:06 2018

@author: 102155
"""

def mean(x):
    return sum(x)/len(x)

#计算每项数据与均值的差
def de_mean(x):
    x_bar = mean(x)
    return [x_i - x_bar for x_i in x]

#辅助计算函数
#dot(v,w)点乘 v=[1,2,3] w=[7,8,9] dot(v,w) = 1*7 + 2*8 + 3*9]
def dot(v,w):
    return sum(v_i*w_i for v_i,w_i in zip(v,w))
#v数组内的元素平方后相加
def sum_of_squares(v):
    return dot(v,v)
#返回x数组的样本方差
def variance(x):
    n = len(x)
    deviations = de_mean(x)
    return sum_of_squares(deviations)/(n-1)

#标准差
import math
def standard_deviation(x):
    return math.sqrt(variance(x))


#协方差
def covariance(x,y):
    n = len(x)
    return dot(de_mean(x), de_mean(y))/(n-1)

#相关系数
def correlation(x,y):
    std_x = standard_deviation(x)
    std_y = standard_deviation(y)
    if std_x>0 and std_y>0:
        return covariance(x, y) / (std_x * std_y)
    else:
        return 0

import random

a = [random.randint(0, 100) for a in range(20)]
b = [random.randint(0, 100) for a in range(20)]

print(correlation(a,b))

 

你可能感兴趣的:(Python相关性分析)