《商务与经济统计》Python实现笔记(二)

总体方差的置信区间

from scipy.stats import chi2
def CI(n,sigma2,alpha):
    """
    n:样本量
    sigma2:样本方差
    alpha:置信水平
    
    """
    a = (1-alpha)/2
    b = (n-1)*sigma2
    lower = b/chi2.ppf(a,df=n-1)
    upper = b/chi2.ppf(1-a,df=n-1)
    return (lower,upper)
    

方差假设检验(卡方检验)

from scipy.stats import chi2
def chi2_test(n,s,sigma):
    chisquare = (n-1)*s/sigma
    return chi2.sf(chisquare,df=n-1)
    

两个总体方差齐性检验(F检验)

两样本方差估计两总体方差大小关系

from scipy.stats import f
def f_test(n1,n2,sig_squre1,sig_square2):
    """
    sig_square1>sig_square2
    """
    f = sig_sqaure1/sig_square2
    
    return f.sf(f,n1,n2)#单尾p值

多个总体比率相等性检验(观察频率与期望频率的卡方检验)

import pandas as pd
import numpy as np
from scipy import stats

data = pd.read_excel(r"C:\Users\liuhao\Desktop\a.xls")

品牌
雪佛兰 69 56
福特 120 80
本田 123 52
kf = stats.chi2_contingency(data[["是","否"]])
(7.891045125087675,   0.019341106790498865,   2,  array([[ 78. ,  47. ],[124.8,  75.2],[109.2,  65.8]]))
(统计量,单尾p值,自由度,各观察项的期望频率)

多重比较方法(Marascuilo produce)

import pandas as pd
import numpy as np
from scipy import stats

data = pd.read_excel(r"C:\Users\liuhao\Desktop\a.xls",index_col=0)
data["sum"] = data.sum(axis="columns")
data["fre"] = data["是"]/data["sum"]

chi_square = stats.chi2.ppf(0.95,df=2)
a = []
for i in list(range(len(data.index))):
    for j in list(range(i+1,len(data.index))):
        p = abs(data.iloc[i,3] - data.iloc[i+1,3])
        cv = np.sqrt(5.991)*np.sqrt((data.iloc[i,3]*(1-data.iloc[i,3])/data.iloc[i,2])+(data.iloc[j,3]*(1-data.iloc[j,3])/data.iloc[j,2]))
        a.append([p,cv])
        # 若p>cv,则差异显著


输出:

[[0.04799999999999993, 0.13799131381358756],
    [0.04799999999999993, 0.13784846145853047],
    [0.10285714285714287, 0.11974554538054417]]

两分类变量的独立性检验(卡方检验)

import pandas as pd
import numpy as np
from scipy import stats

data = pd.read_excel(r"C:\Users\liuhao\Desktop\s.xls",index_col=0)
stats.chi2_contingency(data)

啤酒
51 39
普通 56 21
25 8
(6.446821152703504,
0.03981902059909915,
2,
array([[59.4 , 30.6 ],[50.82, 26.18],[21.78, 11.22]]))

拟合优度检验(卡方检验)

多项概率分布(检验样本分布是否符合历史分布)

import pandas as pd
import numpy as np
from scipy import stats

f_obs = [48,98,54] #观察pinlv
f_exp = [60,100,40] #期望频率
stats.chisquare(f_obs,f_exp)

#Power_divergenceResult(statistic=7.34, pvalue=0.02547646994668102)

正态分布检验

import numpy as np
from scipy import stats
data=[71,66,61,65,54,93,60,86,70,70,73,73,55,63,56,62,76,54,82,79,76,68,53,58,85,80,56,61,61,64,65,62,90,69,76,79,77,54,64,74,65,65,61,56,63,80,56,71,79,84]

stats.normaltest (a, axis=0)
#NormaltestResult(statistic=3.220501948981951, pvalue=0.19983745367770303)

stats.kstest(data, 'norm', (np.mean(data),np.std(data)))
#Power_divergenceResult(statistic=7.34, pvalue=0.02547646994668102)

stats.shapiro(data)
#(0.9567651152610779, 0.06508207321166992)

# p值大于显著水平则不能拒绝原假设,符合正态分布

你可能感兴趣的:(数据分析,python,统计学,数据分析)