numpy部分
# import numpy as np
#
# #a = np.array([1, 2, 3, 4, 5], ndmin=1)
# #a = np.array([[1,1,2],[1,2,3]])
# #a = np.arange(12).reshape((3,4))
#a = np.linspace(0,10,5) #从几到几,用几段
#a = np.linspace(0,10,10).reshape(2,5)
#a = np.array([1,2,3,4]).reshape(2,2)
# d = np.array([[1,1],[2,3]])
# b = np.arange(4)
#c = 10*np.sin(a)
# c = a*d
# c_dot = np.dot(a,d)#矩阵 ,c_cot_2 = a.dot(b)
#print (a)
#print (c)
# #print(a+b)
# #print(b<3) #b = np.arange(4) [ True True True False]
# print (c,c_dot)
# #random
# import numpy as np
# a = np.random.random((2,4))
#
# print(a)
# print(np.sum(a,axis =1)) #还有min,max,axis 是维度 ,
# print(np.sum(a,axis =0))# axis 为1,列,为0,行
# # [[0.10429317 0.15675549 0.31883109 0.42421789]
# # [0.76801578 0.33735384 0.97900856 0.00229303]]
# # [1.00409764 2.08667122]
# # [0.87230895 0.49410933 1.29783965 0.42651092]
#基础运算
# a = np.arange(12).reshape((3,4))
# print(a)
# print(np.argmax(a))
# print(np.mean(a)) #平均
# print(np.median(a)) #中位数
# print(np.cumsum(a)) #累加
# print(np.diff(a)) #累减
# # sort ,transport ,
# print(np.clip(a,3,5)) #截断
# print(np.mean(a,axis=1 ))
#
#5
#
# a = np.arange(12).reshape((3,4))
# # print(a)
# # print(a[2])
# # print(a[:,2])#第一列
# # print(a[1,1:3])
# #for循环
# for row in a:#输出行
# print (row)
# for col in a.T: # a.T 为a的矩阵反转
# print (col)
#
# for item in a.flat:
# print(item) #矩阵拉成一行
#
#6
#a = np.array([[1,1,2],[1,2,1]])
# b = np.array([[2,1,2],[3,2,3]])
#
# c = np.vstack((a,b))#上下合并
# c = np.hstack((a,b))#左右
# print (c.shape,a.shape) #
# print (c)
# #改变维度
# a = np.array([1,1,1])[:,np.newaxis]
# b = np.array([2,2,2])[:,np.newaxis]
# # print(a.shape)
# # print(a+b)
# c = np.concatenate((a,a,b),axis = 0) #多个合并
# print(a)
# print(c)
#7
import pylab as p
a = np.arange(12).reshape(3,4)
#print(np.split(a,3,axis=0))#均等
#print(np.array_split(a,4,axis=0)) # bu均等几块,维度
#8
# a = np.arange(4)
# b = a
# c = a
# d = b
# a[0] = 11 #类似引用
# print(a == b)
# print( a == d)
# b = a.copy() #deep copy
pandas部分
#pandas
import pandas as pd
#1
# s = pd.Series([1,3,6,np.nan,44,1])
# # print(s)
# dates = pd.date_range('20220101',periods = 6)
# # print(dates)
#
# p = np.random.randn(6,4)
# #print(p)
# df = pd.DataFrame(np.random.randn(6,4 ),index = dates,columns=['a','b','c','d'])
# #创造一个类似二维数组的东西,填充数组大小,列名(或者内容),行名
# #print(df)
#
# df2 = pd.DataFrame({'a':['1','2','3','4'],
# 'b':pd.Timestamp('20220101'),
# 'c':pd.Series(1,index = list(range(4)),dtype='float32'),
# 'd':np.array([3]*4,dtype='int32'),
# 'f':'end',
# 'g':range(3,11,2)})
# # print(df2)
# # print(df2.columns)
# # print(df2.values)
# # print(df2.describe())
# print(df2.T)
#2
dates = pd.date_range('20220101',periods = 6)
#df = pd.DataFrame(np.random.randn(6,4 ),index = dates,columns=['a','b','c','d'])
df = pd.DataFrame(np.arange(24).reshape(6,4),index = dates,columns=['a','b','c','d'])
#print(df.b) #print(df[b])
#print(df[0:3],df['20220103':'20220106'])
#select by label:loc loc的意思是基于标签(label-based selection),
# 输入为标签,也就是,行是(0,1,2,3,4)中的值,列是('a', 'b' ,'c')中的值。
# print(df.loc[:,['a','b']])
# print(df.loc[['20220103'],['a','b']])
#select by posstion:iloc iloc的意思是基于索引(index-based selection),
# 输入为索引,也就是,行是(0,1,2,3,4)中的值,列是(0,1,2)中的值。
#print(df.iloc[3:5,1:3])
#print(df.iloc[[1,3,5],1:4])
#select by posstion:ix #融合了前两者
#print(df.ix[:3,['a','c']])
#boolean indexing
# print(df)
# print(df[df.a<8])
#3
# dates = pd.date_range('20220101',periods = 6)
# #df = pd.DataFrame(np.random.randn(6,4 ),index = dates,columns=['a','b','c','d'])
# df = pd.DataFrame(np.arange(24).reshape(6,4),index = dates,columns=['a','b','c','d'])
# pd.Series([1,2,3,4,5,6],index=pd.date_range('20220101',periods = 6)) #序列,一一对应
# # df.iloc[2,2]= 1111
# # print(df)
# # df.loc['20220104','b'] = 2222
# print(df)
# df[df.a>4] = 33
# print(df)
# df.a[df.a>4] = 99
# print(df)
# df['f'] = np.arange(1,7)
# print(df)
#4
# dates = pd.date_range('20220101',periods = 6)
# #df = pd.DataFrame(np.random.randn(6,4 ),index = dates,columns=['a','b','c','d'])
# df = pd.DataFrame(np.arange(24).reshape(6,4),index = dates,columns=['a','b','c','d'])
# #pd.Series([1,2,3,4,5,6],index=pd.date_range('20220101',periods = 6)) #序列,一一对应
# df.iloc[0,1] = np.nan
# df.iloc[1,2] = np.nan
# print(df.dropna(axis = 1 ,how ='any'))#how = {’any,,‘all’} 默认
# print(df.dropna(axis = 1 ,how ='all'))
# print(df.fillna(value = 0)) #填入
# print(df.isnull()) #检查是否缺失数据
# print(np.any(df.isnull()) == True) #检查
#5 读取录入目录
#C:\\Users\\txjoe\\Desktop\\2022年综合测评优秀学生干部评选原始分数.xlsx
# data = pd.read_excel('C:\\Users\\txjoe\\Desktop\\2022年综合测评优秀学生干部评选原始分数.xlsx')
# print(data)
# data.to_excel()
# pd.read_pickle('student.excel')
# #print(p)
#6 concatenating
# df1 = pd.DataFrame(np.arange(24).reshape(6,4),index = dates,columns=['a','b','c','d'])
# df2 = pd.DataFrame(np.arange(24,48).reshape(6,4),index = dates,columns=['a','b','c','d'])
# df3 = pd.DataFrame(np.arange(48,72).reshape(6,4),index = dates,columns=['a','b','c','d'])
# # print(df1)
# # print(df2)
# # print(df3)
# res = pd.concat([df1,df2,df3],axis=0,ignore_index=True)#合并
# print(res)
# #join,['inner','outer']
# df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'],index = [1,2,3])
# df2 = pd.DataFrame(np.ones((3,4))*2,columns=['b','c','d','e'],index = [2,3,4])
# df3 = pd.DataFrame(np.ones((3,4))*1,columns=['b','c','d','e'],index = [2,3,4])
#res = pd.concat([df1,df2])#默认outer,还有inner
# res = pd.concat([df1,df2],join='inner',ignore_index=True)
# print(res)
#append
# res = df1.append(df2,ignore_index=True)
# res = df1.append([df3,df2],ignore_index=True)
# print(res)
# s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
# res = df1.append(s1,ignore_index = True)
# print(res)
#7 merge
# left = pd.DataFrame({'key':['L','M','N','K'],
# 'key2':['1','2','3','4'],
# 'A':['a','b','c','d']})
# right = pd.DataFrame({'key':['L','M','N','K'],
# 'key2':['1','1','1','1'],
# 'C ':['a','b','c','d']})
# # res = pd.merge(left,right,on = 'key')
# res = pd.merge(left,right,on = ['key','key2'],how='right')
# # res = pd.merge(left,right,on = ['key','key2'],how='inner')
# # res = pd.merge(left,right,on = ['key','key2'],how='outer')
#
# # print(left)
# # print((right))
#
# print(res)
#8
# import matplotlib.pyplot as plt
#
# data = pd.Series(np.random.randn(1000),index = np.arange(1000))
#
#
# data = pd.DataFrame(np.random.randn(1000,4),
# index=np.arange(1000),
# columns=list('ABCD'))
# data = data.cumsum()
# #print(data)
# ax = data.plot.scatter(x= 'A',y = 'B',color = 'r',lable = 'Class 1')
#
# data.plot.scatter(x= 'A',y = 'C',color = 'DarkGreen',lable = 'Class 2',ax = ax)
# plt.show()