描述性统计分析,是统计学当中的一种分析方法,它是对原始数据进行概括和观察后的一种描述,一般分为集中趋势分析和离散程度分析两种方法。对于集中趋势分析,我们主要研究的是数据的集中程度,有众数,中位数,均值这几个反映指标;而离散程度分析主要度量的是数据的分散程度,有四分位数,标准差这两个反映指标。值得注意的是,当数据集的集中趋势较强时,分散程度就越弱,反之亦然。
#获得原始数据
def getdata():
while True:
data=eval(input('请输入数据型数据(数据间用\',\'作为分隔符:)'))
for i in data:
judge=type(i)
if judge != int:
print('输入非数据型数据,请重新输入')
print('-'*20)
break
else:
return data
#对原始数据求众数
def Mode(data):
'''
data:tuple
'''
count={
}
mode_list=[]
for num in data:
if count.get(num,0) == 0:
count[num]=1
else:
count[num]+=1
for key,value in count.items():
if value == max(count.values()):
mode_list.append(key)
for i in range(len(mode_list)):
mode_list[i]=str(mode_list[i])
return ','.join(mode_list)
def Median(data):
data=sorted(data)
if len(data) %2 !=0:
locate=int((len(data)+1)/2)
med=data[locate-1]
else:
locate=(len(data)+1)/2
med=(data[int(locate-0.5-1)]+data[int(locate+0.5-1)])/2
return med
def Average(data):
count=len(data)
sum=0
for num in data:
sum+=num
avg=sum/count
return avg
def Quartile(data):
data=sorted(data)
Ql=(len(data)+1)*0.25
QU=(3*(len(data)+1))/4
if Ql ==int(Ql):
Ml=data[int(Ql)-1]
else:
Ml=(1-(Ql-int(Ql)))*data[int(Ql-1)]+(Ql-int(Ql))*data[int(Ql)]
if QU ==int(QU):
Mu=data[int(QU)-1]
else:
Mu=(1-(QU-int(QU)))*data[int(QU-1)]+(QU-int(QU))*data[int(QU)]
return Ml,Mu
def Stdev(avg,data):
mils=[]
sum=0
for num in data:
mils.append((num-avg)**2)
for num in mils:
sum+=num
return (sum/len(data))**(1/2)
def getdata():
while True:
data=eval(input('请输入数据型数据(数据间用\',\'作为分隔符):'))
for i in data:
judge=type(i)
if judge != int:
print('输入非数据型数据,请重新输入')
print('-'*20)
break
else:
return data
def Mode(data):
count={
}
mode_list=[]
for num in data:
if count.get(num,0) == 0:
count[num]=1
else:
count[num]+=1
for key,value in count.items():
if max(count.values())==1:
return None
if value == max(count.values()):
mode_list.append(key)
for i in range(len(mode_list)):
mode_list[i]=str(mode_list[i])
return ','.join(mode_list)
def Median(data):
data=sorted(data)
if len(data) %2 !=0:
locate=int((len(data)+1)/2)
med=data[locate-1]
return med
else:
locate=(len(data)+1)/2
med=(data[int(locate-0.5-1)]+data[int(locate+0.5-1)])/2
return med
def Average(data):
count=len(data)
sum=0
for num in data:
sum+=num
avg=sum/count
return avg
def Quartile(data):
data=sorted(data)
Ql=(len(data)+1)*0.25
QU=(3*(len(data)+1))/4
if Ql ==int(Ql):
Ml=data[int(Ql)-1]
else:
Ml=(1-(Ql-int(Ql)))*data[int(Ql-1)]+(Ql-int(Ql))*data[int(Ql)]
if QU ==int(QU):
Mu=data[int(QU)-1]
else:
Mu=(1-(QU-int(QU)))*data[int(QU-1)]+(QU-int(QU))*data[int(QU)]
return Ml,Mu
def Stdev(avg,data):
mils=[]
sum=0
for num in data:
mils.append((num-avg)**2)
for num in mils:
sum+=num
return (sum/len(data))**(1/2)
data=getdata()
mode=Mode(data)
med=Median(data)
avg=Average(data)
Ml,Mu=Quartile(data)
std=Stdev(avg,data)
print('众数为{},\n中位数为{},\n上分位数为{},\n下分位数为{},\n均值为{:.2f},\n标准差为{:.2f},\n'.format(mode,med,Ml,Mu,avg,std))