#-*-encoding:utf-8-*-
'''
created by zwg in 2016-12-03
'''
import pandas
import numpy
from pylab import mpl
from matplotlib import pyplot
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False
def practise_one():
#pandas知识回顾
numpy.random.seed()
Data = pandas.DataFrame(data=numpy.random.randn(5, 3), columns=list('ABC'))
# print Data
# print Data.values
# print Data.columns
# print Data.index
# print Data.tail(5)
# print Data.head(5)
#iloc/ix切片
print Data.iloc[0:2, [0, 2]]
print Data.ix[0:2, [0, 2]]
print Data.ix[0:2, ['A', 'C']]
print Data.iat[0, 0]
# 列条件与与列的筛选
print Data[Data.A > 0.5]
print Data[(Data.A > 0.5) & (Data.C < 0.7)]
print Data[['B']]
print Data[Data.A > 0.5].ix[:, 'B']
print Data[Data.A > 0.5][['B']]
print Data[['B']][Data.A > 0.5]
#读取csv、xlsx文件
# pandas.read_csv(filepath_or_buffer=file_name,header=0/None,index_col=Fasle/row_name,encoding='utf-8')
# pandas.read_table(filepath_or_buffer=file_name,header=0/None,index_col=Fasle/row_name,encoding='utf-8')
# pandas.read_excel(io=file_name,sheetname=0/sheetname,header=0/None,index_col=Fasle/row_name,encoding='utf-8')
def practise_two():
Data=pandas.read_excel('test.xlsx',encoding='utf-8',header=0)
# 行的增加与删除
# Data1 = Data.drop([1,2,3], axis=0)
# Data2=Data1.copy()
# Data1=Data1.append(Data2,ignore_index=True)
# print Data1
# 列的增加与删除
# Data1=Data.drop(['name', 'class'],axis=1)
# print Data1.columns
# Data1=Data.reindex(columns=['class','name','grade','add'])
# print Data1.columns
#排序
# print Data.sort_values(by=['grade'],ascending=False)
# print Data.sort_index(axis=0,ascending=False)
# 数据分组描述
# Data1=Data.groupby('sex')
# print Data1['sex'].count()
# print Data1['grade'].mean()
# print Data1['sex'].unique()
# 数据分组
# Data2=Data.groupby(['class','sex'])
# print Data2['grade'].describe()
# 统计描述
# print Data.describe(include='all')
# Data3=Data[['grade','sex']]
# figure=pyplot.figure()
# Data3.plot(kind='box',by='sex')
# pyplot.show()
#分组计数
# print Data['sex'].value_counts()
# 作图
# Data.boxplot(column='grade',by=['class','sex'])
# Data.hist(column='grade')
# Data['sex'].value_counts().plot(kind='bar')
Data.groupby(['class'])['grade'].mean().plot(kind='barh',colormap='cool')
Data.plot(x='grade',y='age',kind='scatter',title='grade-age change',logx=True,logy=True)
Data.plot(kind='kde')
pyplot.show()
def practise_three():
#数据框合并
Data1=pandas.DataFrame(data=numpy.random.rand(5,3),columns=list('ABC'))
Data1['D'] = [1, 2, 3, 4, 5]
print Data1
Data2 = pandas.DataFrame(data=numpy.random.rand(5, 3), columns=list('ABC'))
Data2['D'] = [1, 2, 2, 4, 5]
print Data2
# join数据框合并和merge数据框合并
# Data1=Data1.set_index('D')
# Data2=Data2.set_index('D')
# Data3=Data1.join(Data2,lsuffix='_left',rsuffix='_right',how='left')
# print Data3
Data4=Data1.merge(Data2,on='D',how='inner',suffixes=('_1','_2'))
print Data4
if __name__=='__main__':
# practise_one()
practise_two()
# practise_three()