Python 统计分析

1. K-S test

#判断一组数据是否符合正态分布
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(19)
x = stats.norm.rvs(size=100)
plt.figure()
plt.hist(x, bins=20, color='red')
plt.xlabel('x')
plt.ylabel('frequenry')
plt.title('hist plot')
print(x.dtype)
print(stats.kstest(x, 'norm'))
plt.show()
image.png

2. 两独立样本t检验

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
np.random.seed(125)
x1 = stats.norm.rvs(loc=5, scale=10, size=500)
x2 = stats.norm.rvs(loc=1, scale=10, size=500)
plt.hist(x1, bins=20, color='red', alpha=0.5)
plt.hist(x2, bins=20, color='green', alpha=0.5)
plt.xlabel('x')
plt.ylabel('frequenry')
plt.title('hist plot')
print(stats.ttest_ind(x1,x2))
plt.show()
image.png

3. 单因素方差分析

import pandas as pd
from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib.pyplot as plt
Iris = pd.DataFrame(pd.read_csv('../11.seanborn/seaborn-data/iris.csv'))
print(Iris.head())
print(Iris['species'].value_counts())

setosa = Iris[Iris['species']=='setosa']['sepal_width']
versicolor = Iris[Iris['species']=='versicolor']['sepal_width']
virginica = Iris[Iris['species']=='virginica']['sepal_width']

print(stats.levene(setosa, versicolor, virginica))
print(stats.f_oneway(setosa, versicolor, virginica))

tukey = pairwise_tukeyhsd(endog=Iris['sepal_width'], groups=Iris['species'], alpha=0.05)
print(tukey)
tukey.plot_simultaneous()
plt.show()
image.png

4. 相关性检验

import pandas as pd
from scipy import stats
Iris = pd.DataFrame(pd.read_csv('../11.seanborn/seaborn-data/iris.csv'))
print(Iris.columns)
r, pval = stats.pearsonr(Iris['sepal_length'], Iris['petal_length'])
print('相关系数:',r)
print('相关系数显著性检验p-value:', pval)

你可能感兴趣的:(Python 统计分析)