Sim_Jackson | 2023
# 导入需要的第三方库
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')
dir_ = r'D:\科研论文\Python\数据分析'
os.chdir(dir_)
files = os.listdir(dir_) # 将该地址下的文件都列出来
files # finaldata为填补完缺失值之后的数据
['data.xlsx', 'finaldata.csv', 'google.csv', 'reaseach_data.xlsx']
# 数据读取
df = pd.read_csv(files[1]) # 1即为第2个,'finaldata.csv'
collst = df.drop(columns=['time']).columns
collst
Index(['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10'], dtype='object')
df[collst[2]]
0 1527.100000
1 1548.750000
2 1573.100000
3 1567.850000
4 1571.950000
...
752 1789.400408
753 1789.400408
754 1803.350000
755 1813.750000
756 1789.400408
Name: X3, Length: 757, dtype: float64
import scipy.stats as stats
# Jarque-Bera检验(Jarque-Bera test )是一种拟合优度检验,它确定样本数据是否具有与正态分布相匹配的偏度(skewness)和峰度
#P值<指定水平0.05,拒绝原假设,认为样本数据在5%的显著水平下不服从正态分布
#输出(统计量JB的值,P值)=(0.28220016508625245, 0.86840239542814834)
for col in collst:
print(col, stats.jarque_bera(df[col]))
X1 Jarque_beraResult(statistic=59.454720948523104, pvalue=1.2290168882600483e-13)
X2 Jarque_beraResult(statistic=36.29592246751836, pvalue=1.3135317611379094e-08)
X3 Jarque_beraResult(statistic=17.54796785231033, pvalue=0.00015470601396028183)
X4 Jarque_beraResult(statistic=96.21929966917415, pvalue=0.0)
X5 Jarque_beraResult(statistic=18.26592700292738, pvalue=0.00010804491891125423)
X6 Jarque_beraResult(statistic=4263.5528015676455, pvalue=0.0)
X7 Jarque_beraResult(statistic=81.06892326459135, pvalue=0.0)
X8 Jarque_beraResult(statistic=71.48073636057188, pvalue=3.3306690738754696e-16)
X9 Jarque_beraResult(statistic=29.700531080527117, pvalue=3.553134283462711e-07)
X10 Jarque_beraResult(statistic=85.37705482352717, pvalue=0.0)
输出结果为: